1 | /**************************************************************************** |
2 | * Copyright (C) 2013-2016 Woboq GmbH |
3 | * Olivier Goffart <contact at woboq.com> |
4 | * https://woboq.com/ |
5 | * |
6 | * This program is free software: you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation, either version 3 of the License, or |
9 | * (at your option) any later version. |
10 | * |
11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License |
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | */ |
19 | |
20 | #pragma once |
21 | |
22 | #include <utility> |
23 | #include <vector> |
24 | #include <string> |
25 | |
26 | |
27 | struct EmbeddedFile { |
28 | const char *filename; |
29 | const char *content; |
30 | size_t size; |
31 | template <int N> |
32 | constexpr EmbeddedFile(const char *filename, const char (&data)[N]) |
33 | : filename(filename) , content(data), size(N-1) {} |
34 | constexpr EmbeddedFile () : filename(nullptr) , content(nullptr), size(0) {} |
35 | }; |
36 | |
37 | static constexpr EmbeddedFile EmbeddedFiles[] = { |
38 | { "/builtins/__clang_cuda_builtin_vars.h" , "/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===\n" |
39 | " *\n" |
40 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
41 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
42 | " * in the Software without restriction, including without limitation the rights\n" |
43 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
44 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45 | " * furnished to do so, subject to the following conditions:\n" |
46 | " *\n" |
47 | " * The above copyright notice and this permission notice shall be included in\n" |
48 | " * all copies or substantial portions of the Software.\n" |
49 | " *\n" |
50 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
51 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
53 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
54 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
55 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
56 | " * THE SOFTWARE.\n" |
57 | " *\n" |
58 | " *===-----------------------------------------------------------------------===\n" |
59 | " */\n" |
60 | "\n" |
61 | "#ifndef __CUDA_BUILTIN_VARS_H\n" |
62 | "#define __CUDA_BUILTIN_VARS_H\n" |
63 | "\n" |
64 | "// Forward declares from vector_types.h.\n" |
65 | "struct uint3;\n" |
66 | "struct dim3;\n" |
67 | "\n" |
68 | "// The file implements built-in CUDA variables using __declspec(property).\n" |
69 | "// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx\n" |
70 | "// All read accesses of built-in variable fields get converted into calls to a\n" |
71 | "// getter function which in turn calls the appropriate builtin to fetch the\n" |
72 | "// value.\n" |
73 | "//\n" |
74 | "// Example:\n" |
75 | "// int x = threadIdx.x;\n" |
76 | "// IR output:\n" |
77 | "// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3\n" |
78 | "// PTX output:\n" |
79 | "// mov.u32 %r2, %tid.x;\n" |
80 | "\n" |
81 | "#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \\\n" |
82 | " __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \\\n" |
83 | " static inline __attribute__((always_inline)) \\\n" |
84 | " __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \\\n" |
85 | " return INTRINSIC; \\\n" |
86 | " }\n" |
87 | "\n" |
88 | "#if __cplusplus >= 201103L\n" |
89 | "#define __DELETE =delete\n" |
90 | "#else\n" |
91 | "#define __DELETE\n" |
92 | "#endif\n" |
93 | "\n" |
94 | "// Make sure nobody can create instances of the special variable types. nvcc\n" |
95 | "// also disallows taking address of special variables, so we disable address-of\n" |
96 | "// operator as well.\n" |
97 | "#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \\\n" |
98 | " __attribute__((device)) TypeName() __DELETE; \\\n" |
99 | " __attribute__((device)) TypeName(const TypeName &) __DELETE; \\\n" |
100 | " __attribute__((device)) void operator=(const TypeName &) const __DELETE; \\\n" |
101 | " __attribute__((device)) TypeName *operator&() const __DELETE\n" |
102 | "\n" |
103 | "struct __cuda_builtin_threadIdx_t {\n" |
104 | " __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());\n" |
105 | " __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());\n" |
106 | " __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());\n" |
107 | " // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n" |
108 | " // uint3). This function is defined after we pull in vector_types.h.\n" |
109 | " __attribute__((device)) operator uint3() const;\n" |
110 | "private:\n" |
111 | " __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);\n" |
112 | "};\n" |
113 | "\n" |
114 | "struct __cuda_builtin_blockIdx_t {\n" |
115 | " __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());\n" |
116 | " __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());\n" |
117 | " __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());\n" |
118 | " // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n" |
119 | " // uint3). This function is defined after we pull in vector_types.h.\n" |
120 | " __attribute__((device)) operator uint3() const;\n" |
121 | "private:\n" |
122 | " __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);\n" |
123 | "};\n" |
124 | "\n" |
125 | "struct __cuda_builtin_blockDim_t {\n" |
126 | " __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());\n" |
127 | " __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());\n" |
128 | " __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());\n" |
129 | " // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a\n" |
130 | " // dim3). This function is defined after we pull in vector_types.h.\n" |
131 | " __attribute__((device)) operator dim3() const;\n" |
132 | "private:\n" |
133 | " __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);\n" |
134 | "};\n" |
135 | "\n" |
136 | "struct __cuda_builtin_gridDim_t {\n" |
137 | " __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());\n" |
138 | " __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());\n" |
139 | " __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());\n" |
140 | " // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a\n" |
141 | " // dim3). This function is defined after we pull in vector_types.h.\n" |
142 | " __attribute__((device)) operator dim3() const;\n" |
143 | "private:\n" |
144 | " __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);\n" |
145 | "};\n" |
146 | "\n" |
147 | "#define __CUDA_BUILTIN_VAR \\\n" |
148 | " extern const __attribute__((device)) __attribute__((weak))\n" |
149 | "__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;\n" |
150 | "__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;\n" |
151 | "__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;\n" |
152 | "__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;\n" |
153 | "\n" |
154 | "// warpSize should translate to read of %WARP_SZ but there's currently no\n" |
155 | "// builtin to do so. According to PTX v4.2 docs 'to date, all target\n" |
156 | "// architectures have a WARP_SZ value of 32'.\n" |
157 | "__attribute__((device)) const int warpSize = 32;\n" |
158 | "\n" |
159 | "#undef __CUDA_DEVICE_BUILTIN\n" |
160 | "#undef __CUDA_BUILTIN_VAR\n" |
161 | "#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS\n" |
162 | "\n" |
163 | "#endif /* __CUDA_BUILTIN_VARS_H */\n" |
164 | "" } , |
165 | { "/builtins/__clang_cuda_cmath.h" , "/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===\n" |
166 | " *\n" |
167 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
168 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
169 | " * in the Software without restriction, including without limitation the rights\n" |
170 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
171 | " * copies of the Software, and to permit persons to whom the Software is\n" |
172 | " * furnished to do so, subject to the following conditions:\n" |
173 | " *\n" |
174 | " * The above copyright notice and this permission notice shall be included in\n" |
175 | " * all copies or substantial portions of the Software.\n" |
176 | " *\n" |
177 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
178 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
179 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
180 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
181 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
182 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
183 | " * THE SOFTWARE.\n" |
184 | " *\n" |
185 | " *===-----------------------------------------------------------------------===\n" |
186 | " */\n" |
187 | "#ifndef __CLANG_CUDA_CMATH_H__\n" |
188 | "#define __CLANG_CUDA_CMATH_H__\n" |
189 | "#ifndef __CUDA__\n" |
190 | "#error \"This file is for CUDA compilation only.\"\n" |
191 | "#endif\n" |
192 | "\n" |
193 | "#include <limits>\n" |
194 | "\n" |
195 | "// CUDA lets us use various std math functions on the device side. This file\n" |
196 | "// works in concert with __clang_cuda_math_forward_declares.h to make this work.\n" |
197 | "//\n" |
198 | "// Specifically, the forward-declares header declares __device__ overloads for\n" |
199 | "// these functions in the global namespace, then pulls them into namespace std\n" |
200 | "// with 'using' statements. Then this file implements those functions, after\n" |
201 | "// their implementations have been pulled in.\n" |
202 | "//\n" |
203 | "// It's important that we declare the functions in the global namespace and pull\n" |
204 | "// them into namespace std with using statements, as opposed to simply declaring\n" |
205 | "// these functions in namespace std, because our device functions need to\n" |
206 | "// overload the standard library functions, which may be declared in the global\n" |
207 | "// namespace or in std, depending on the degree of conformance of the stdlib\n" |
208 | "// implementation. Declaring in the global namespace and pulling into namespace\n" |
209 | "// std covers all of the known knowns.\n" |
210 | "\n" |
211 | "#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))\n" |
212 | "\n" |
213 | "__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }\n" |
214 | "__DEVICE__ long abs(long __n) { return ::labs(__n); }\n" |
215 | "__DEVICE__ float abs(float __x) { return ::fabsf(__x); }\n" |
216 | "__DEVICE__ double abs(double __x) { return ::fabs(__x); }\n" |
217 | "__DEVICE__ float acos(float __x) { return ::acosf(__x); }\n" |
218 | "__DEVICE__ float asin(float __x) { return ::asinf(__x); }\n" |
219 | "__DEVICE__ float atan(float __x) { return ::atanf(__x); }\n" |
220 | "__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }\n" |
221 | "__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }\n" |
222 | "__DEVICE__ float cos(float __x) { return ::cosf(__x); }\n" |
223 | "__DEVICE__ float cosh(float __x) { return ::coshf(__x); }\n" |
224 | "__DEVICE__ float exp(float __x) { return ::expf(__x); }\n" |
225 | "__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }\n" |
226 | "__DEVICE__ float floor(float __x) { return ::floorf(__x); }\n" |
227 | "__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }\n" |
228 | "__DEVICE__ int fpclassify(float __x) {\n" |
229 | " return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n" |
230 | " FP_ZERO, __x);\n" |
231 | "}\n" |
232 | "__DEVICE__ int fpclassify(double __x) {\n" |
233 | " return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n" |
234 | " FP_ZERO, __x);\n" |
235 | "}\n" |
236 | "__DEVICE__ float frexp(float __arg, int *__exp) {\n" |
237 | " return ::frexpf(__arg, __exp);\n" |
238 | "}\n" |
239 | "\n" |
240 | "// For inscrutable reasons, the CUDA headers define these functions for us on\n" |
241 | "// Windows.\n" |
242 | "#ifndef _MSC_VER\n" |
243 | "__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }\n" |
244 | "__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }\n" |
245 | "__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }\n" |
246 | "// For inscrutable reasons, __finite(), the double-precision version of\n" |
247 | "// __finitef, does not exist when compiling for MacOS. __isfinited is available\n" |
248 | "// everywhere and is just as good.\n" |
249 | "__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }\n" |
250 | "__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }\n" |
251 | "__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }\n" |
252 | "#endif\n" |
253 | "\n" |
254 | "__DEVICE__ bool isgreater(float __x, float __y) {\n" |
255 | " return __builtin_isgreater(__x, __y);\n" |
256 | "}\n" |
257 | "__DEVICE__ bool isgreater(double __x, double __y) {\n" |
258 | " return __builtin_isgreater(__x, __y);\n" |
259 | "}\n" |
260 | "__DEVICE__ bool isgreaterequal(float __x, float __y) {\n" |
261 | " return __builtin_isgreaterequal(__x, __y);\n" |
262 | "}\n" |
263 | "__DEVICE__ bool isgreaterequal(double __x, double __y) {\n" |
264 | " return __builtin_isgreaterequal(__x, __y);\n" |
265 | "}\n" |
266 | "__DEVICE__ bool isless(float __x, float __y) {\n" |
267 | " return __builtin_isless(__x, __y);\n" |
268 | "}\n" |
269 | "__DEVICE__ bool isless(double __x, double __y) {\n" |
270 | " return __builtin_isless(__x, __y);\n" |
271 | "}\n" |
272 | "__DEVICE__ bool islessequal(float __x, float __y) {\n" |
273 | " return __builtin_islessequal(__x, __y);\n" |
274 | "}\n" |
275 | "__DEVICE__ bool islessequal(double __x, double __y) {\n" |
276 | " return __builtin_islessequal(__x, __y);\n" |
277 | "}\n" |
278 | "__DEVICE__ bool islessgreater(float __x, float __y) {\n" |
279 | " return __builtin_islessgreater(__x, __y);\n" |
280 | "}\n" |
281 | "__DEVICE__ bool islessgreater(double __x, double __y) {\n" |
282 | " return __builtin_islessgreater(__x, __y);\n" |
283 | "}\n" |
284 | "__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }\n" |
285 | "__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }\n" |
286 | "__DEVICE__ bool isunordered(float __x, float __y) {\n" |
287 | " return __builtin_isunordered(__x, __y);\n" |
288 | "}\n" |
289 | "__DEVICE__ bool isunordered(double __x, double __y) {\n" |
290 | " return __builtin_isunordered(__x, __y);\n" |
291 | "}\n" |
292 | "__DEVICE__ float ldexp(float __arg, int __exp) {\n" |
293 | " return ::ldexpf(__arg, __exp);\n" |
294 | "}\n" |
295 | "__DEVICE__ float log(float __x) { return ::logf(__x); }\n" |
296 | "__DEVICE__ float log10(float __x) { return ::log10f(__x); }\n" |
297 | "__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }\n" |
298 | "__DEVICE__ float pow(float __base, float __exp) {\n" |
299 | " return ::powf(__base, __exp);\n" |
300 | "}\n" |
301 | "__DEVICE__ float pow(float __base, int __iexp) {\n" |
302 | " return ::powif(__base, __iexp);\n" |
303 | "}\n" |
304 | "__DEVICE__ double pow(double __base, int __iexp) {\n" |
305 | " return ::powi(__base, __iexp);\n" |
306 | "}\n" |
307 | "__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }\n" |
308 | "__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }\n" |
309 | "__DEVICE__ float sin(float __x) { return ::sinf(__x); }\n" |
310 | "__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }\n" |
311 | "__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }\n" |
312 | "__DEVICE__ float tan(float __x) { return ::tanf(__x); }\n" |
313 | "__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }\n" |
314 | "\n" |
315 | "// Notably missing above is nexttoward. We omit it because\n" |
316 | "// libdevice doesn't provide an implementation, and we don't want to be in the\n" |
317 | "// business of implementing tricky libm functions in this header.\n" |
318 | "\n" |
319 | "// Now we've defined everything we promised we'd define in\n" |
320 | "// __clang_cuda_math_forward_declares.h. We need to do two additional things to\n" |
321 | "// fix up our math functions.\n" |
322 | "//\n" |
323 | "// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define\n" |
324 | "// only sin(float) and sin(double), which means that e.g. sin(0) is\n" |
325 | "// ambiguous.\n" |
326 | "//\n" |
327 | "// 2) Pull the __device__ overloads of \"foobarf\" math functions into namespace\n" |
328 | "// std. These are defined in the CUDA headers in the global namespace,\n" |
329 | "// independent of everything else we've done here.\n" |
330 | "\n" |
331 | "// We can't use std::enable_if, because we want to be pre-C++11 compatible. But\n" |
332 | "// we go ahead and unconditionally define functions that are only available when\n" |
333 | "// compiling for C++11 to match the behavior of the CUDA headers.\n" |
334 | "template<bool __B, class __T = void>\n" |
335 | "struct __clang_cuda_enable_if {};\n" |
336 | "\n" |
337 | "template <class __T> struct __clang_cuda_enable_if<true, __T> {\n" |
338 | " typedef __T type;\n" |
339 | "};\n" |
340 | "\n" |
341 | "// Defines an overload of __fn that accepts one integral argument, calls\n" |
342 | "// __fn((double)x), and returns __retty.\n" |
343 | "#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \\\n" |
344 | " template <typename __T> \\\n" |
345 | " __DEVICE__ \\\n" |
346 | " typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \\\n" |
347 | " __retty>::type \\\n" |
348 | " __fn(__T __x) { \\\n" |
349 | " return ::__fn((double)__x); \\\n" |
350 | " }\n" |
351 | "\n" |
352 | "// Defines an overload of __fn that accepts one two arithmetic arguments, calls\n" |
353 | "// __fn((double)x, (double)y), and returns a double.\n" |
354 | "//\n" |
355 | "// Note this is different from OVERLOAD_1, which generates an overload that\n" |
356 | "// accepts only *integral* arguments.\n" |
357 | "#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \\\n" |
358 | " template <typename __T1, typename __T2> \\\n" |
359 | " __DEVICE__ typename __clang_cuda_enable_if< \\\n" |
360 | " std::numeric_limits<__T1>::is_specialized && \\\n" |
361 | " std::numeric_limits<__T2>::is_specialized, \\\n" |
362 | " __retty>::type \\\n" |
363 | " __fn(__T1 __x, __T2 __y) { \\\n" |
364 | " return __fn((double)__x, (double)__y); \\\n" |
365 | " }\n" |
366 | "\n" |
367 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)\n" |
368 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)\n" |
369 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)\n" |
370 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)\n" |
371 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)\n" |
372 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);\n" |
373 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)\n" |
374 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)\n" |
375 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)\n" |
376 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);\n" |
377 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)\n" |
378 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)\n" |
379 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)\n" |
380 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)\n" |
381 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)\n" |
382 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)\n" |
383 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)\n" |
384 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)\n" |
385 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);\n" |
386 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)\n" |
387 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);\n" |
388 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);\n" |
389 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);\n" |
390 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)\n" |
391 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);\n" |
392 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)\n" |
393 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)\n" |
394 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);\n" |
395 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);\n" |
396 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);\n" |
397 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);\n" |
398 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);\n" |
399 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);\n" |
400 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);\n" |
401 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)\n" |
402 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);\n" |
403 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)\n" |
404 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)\n" |
405 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)\n" |
406 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)\n" |
407 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)\n" |
408 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)\n" |
409 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)\n" |
410 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)\n" |
411 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)\n" |
412 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)\n" |
413 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);\n" |
414 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);\n" |
415 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);\n" |
416 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);\n" |
417 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);\n" |
418 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);\n" |
419 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)\n" |
420 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)\n" |
421 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)\n" |
422 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)\n" |
423 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)\n" |
424 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)\n" |
425 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)\n" |
426 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);\n" |
427 | "\n" |
428 | "#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1\n" |
429 | "#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2\n" |
430 | "\n" |
431 | "// Overloads for functions that don't match the patterns expected by\n" |
432 | "// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.\n" |
433 | "template <typename __T1, typename __T2, typename __T3>\n" |
434 | "__DEVICE__ typename __clang_cuda_enable_if<\n" |
435 | " std::numeric_limits<__T1>::is_specialized &&\n" |
436 | " std::numeric_limits<__T2>::is_specialized &&\n" |
437 | " std::numeric_limits<__T3>::is_specialized,\n" |
438 | " double>::type\n" |
439 | "fma(__T1 __x, __T2 __y, __T3 __z) {\n" |
440 | " return std::fma((double)__x, (double)__y, (double)__z);\n" |
441 | "}\n" |
442 | "\n" |
443 | "template <typename __T>\n" |
444 | "__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n" |
445 | " double>::type\n" |
446 | "frexp(__T __x, int *__exp) {\n" |
447 | " return std::frexp((double)__x, __exp);\n" |
448 | "}\n" |
449 | "\n" |
450 | "template <typename __T>\n" |
451 | "__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n" |
452 | " double>::type\n" |
453 | "ldexp(__T __x, int __exp) {\n" |
454 | " return std::ldexp((double)__x, __exp);\n" |
455 | "}\n" |
456 | "\n" |
457 | "template <typename __T1, typename __T2>\n" |
458 | "__DEVICE__ typename __clang_cuda_enable_if<\n" |
459 | " std::numeric_limits<__T1>::is_specialized &&\n" |
460 | " std::numeric_limits<__T2>::is_specialized,\n" |
461 | " double>::type\n" |
462 | "remquo(__T1 __x, __T2 __y, int *__quo) {\n" |
463 | " return std::remquo((double)__x, (double)__y, __quo);\n" |
464 | "}\n" |
465 | "\n" |
466 | "template <typename __T>\n" |
467 | "__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n" |
468 | " double>::type\n" |
469 | "scalbln(__T __x, long __exp) {\n" |
470 | " return std::scalbln((double)__x, __exp);\n" |
471 | "}\n" |
472 | "\n" |
473 | "template <typename __T>\n" |
474 | "__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n" |
475 | " double>::type\n" |
476 | "scalbn(__T __x, int __exp) {\n" |
477 | " return std::scalbn((double)__x, __exp);\n" |
478 | "}\n" |
479 | "\n" |
480 | "// We need to define these overloads in exactly the namespace our standard\n" |
481 | "// library uses (including the right inline namespace), otherwise they won't be\n" |
482 | "// picked up by other functions in the standard library (e.g. functions in\n" |
483 | "// <complex>). Thus the ugliness below.\n" |
484 | "#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n" |
485 | "_LIBCPP_BEGIN_NAMESPACE_STD\n" |
486 | "#else\n" |
487 | "namespace std {\n" |
488 | "#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
489 | "_GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
490 | "#endif\n" |
491 | "#endif\n" |
492 | "\n" |
493 | "// Pull the new overloads we defined above into namespace std.\n" |
494 | "using ::acos;\n" |
495 | "using ::acosh;\n" |
496 | "using ::asin;\n" |
497 | "using ::asinh;\n" |
498 | "using ::atan;\n" |
499 | "using ::atan2;\n" |
500 | "using ::atanh;\n" |
501 | "using ::cbrt;\n" |
502 | "using ::ceil;\n" |
503 | "using ::copysign;\n" |
504 | "using ::cos;\n" |
505 | "using ::cosh;\n" |
506 | "using ::erf;\n" |
507 | "using ::erfc;\n" |
508 | "using ::exp;\n" |
509 | "using ::exp2;\n" |
510 | "using ::expm1;\n" |
511 | "using ::fabs;\n" |
512 | "using ::fdim;\n" |
513 | "using ::floor;\n" |
514 | "using ::fma;\n" |
515 | "using ::fmax;\n" |
516 | "using ::fmin;\n" |
517 | "using ::fmod;\n" |
518 | "using ::fpclassify;\n" |
519 | "using ::frexp;\n" |
520 | "using ::hypot;\n" |
521 | "using ::ilogb;\n" |
522 | "using ::isfinite;\n" |
523 | "using ::isgreater;\n" |
524 | "using ::isgreaterequal;\n" |
525 | "using ::isless;\n" |
526 | "using ::islessequal;\n" |
527 | "using ::islessgreater;\n" |
528 | "using ::isnormal;\n" |
529 | "using ::isunordered;\n" |
530 | "using ::ldexp;\n" |
531 | "using ::lgamma;\n" |
532 | "using ::llrint;\n" |
533 | "using ::llround;\n" |
534 | "using ::log;\n" |
535 | "using ::log10;\n" |
536 | "using ::log1p;\n" |
537 | "using ::log2;\n" |
538 | "using ::logb;\n" |
539 | "using ::lrint;\n" |
540 | "using ::lround;\n" |
541 | "using ::nearbyint;\n" |
542 | "using ::nextafter;\n" |
543 | "using ::pow;\n" |
544 | "using ::remainder;\n" |
545 | "using ::remquo;\n" |
546 | "using ::rint;\n" |
547 | "using ::round;\n" |
548 | "using ::scalbln;\n" |
549 | "using ::scalbn;\n" |
550 | "using ::signbit;\n" |
551 | "using ::sin;\n" |
552 | "using ::sinh;\n" |
553 | "using ::sqrt;\n" |
554 | "using ::tan;\n" |
555 | "using ::tanh;\n" |
556 | "using ::tgamma;\n" |
557 | "using ::trunc;\n" |
558 | "\n" |
559 | "// Well this is fun: We need to pull these symbols in for libc++, but we can't\n" |
560 | "// pull them in with libstdc++, because its ::isinf and ::isnan are different\n" |
561 | "// than its std::isinf and std::isnan.\n" |
562 | "#ifndef __GLIBCXX__\n" |
563 | "using ::isinf;\n" |
564 | "using ::isnan;\n" |
565 | "#endif\n" |
566 | "\n" |
567 | "// Finally, pull the \"foobarf\" functions that CUDA defines in its headers into\n" |
568 | "// namespace std.\n" |
569 | "using ::acosf;\n" |
570 | "using ::acoshf;\n" |
571 | "using ::asinf;\n" |
572 | "using ::asinhf;\n" |
573 | "using ::atan2f;\n" |
574 | "using ::atanf;\n" |
575 | "using ::atanhf;\n" |
576 | "using ::cbrtf;\n" |
577 | "using ::ceilf;\n" |
578 | "using ::copysignf;\n" |
579 | "using ::cosf;\n" |
580 | "using ::coshf;\n" |
581 | "using ::erfcf;\n" |
582 | "using ::erff;\n" |
583 | "using ::exp2f;\n" |
584 | "using ::expf;\n" |
585 | "using ::expm1f;\n" |
586 | "using ::fabsf;\n" |
587 | "using ::fdimf;\n" |
588 | "using ::floorf;\n" |
589 | "using ::fmaf;\n" |
590 | "using ::fmaxf;\n" |
591 | "using ::fminf;\n" |
592 | "using ::fmodf;\n" |
593 | "using ::frexpf;\n" |
594 | "using ::hypotf;\n" |
595 | "using ::ilogbf;\n" |
596 | "using ::ldexpf;\n" |
597 | "using ::lgammaf;\n" |
598 | "using ::llrintf;\n" |
599 | "using ::llroundf;\n" |
600 | "using ::log10f;\n" |
601 | "using ::log1pf;\n" |
602 | "using ::log2f;\n" |
603 | "using ::logbf;\n" |
604 | "using ::logf;\n" |
605 | "using ::lrintf;\n" |
606 | "using ::lroundf;\n" |
607 | "using ::modff;\n" |
608 | "using ::nearbyintf;\n" |
609 | "using ::nextafterf;\n" |
610 | "using ::powf;\n" |
611 | "using ::remainderf;\n" |
612 | "using ::remquof;\n" |
613 | "using ::rintf;\n" |
614 | "using ::roundf;\n" |
615 | "using ::scalblnf;\n" |
616 | "using ::scalbnf;\n" |
617 | "using ::sinf;\n" |
618 | "using ::sinhf;\n" |
619 | "using ::sqrtf;\n" |
620 | "using ::tanf;\n" |
621 | "using ::tanhf;\n" |
622 | "using ::tgammaf;\n" |
623 | "using ::truncf;\n" |
624 | "\n" |
625 | "#ifdef _LIBCPP_END_NAMESPACE_STD\n" |
626 | "_LIBCPP_END_NAMESPACE_STD\n" |
627 | "#else\n" |
628 | "#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
629 | "_GLIBCXX_END_NAMESPACE_VERSION\n" |
630 | "#endif\n" |
631 | "} // namespace std\n" |
632 | "#endif\n" |
633 | "\n" |
634 | "#undef __DEVICE__\n" |
635 | "\n" |
636 | "#endif\n" |
637 | "" } , |
638 | { "/builtins/__clang_cuda_complex_builtins.h" , "/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===\n" |
639 | " *\n" |
640 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
641 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
642 | " * in the Software without restriction, including without limitation the rights\n" |
643 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
644 | " * copies of the Software, and to permit persons to whom the Software is\n" |
645 | " * furnished to do so, subject to the following conditions:\n" |
646 | " *\n" |
647 | " * The above copyright notice and this permission notice shall be included in\n" |
648 | " * all copies or substantial portions of the Software.\n" |
649 | " *\n" |
650 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
651 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
652 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
653 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
654 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
655 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
656 | " * THE SOFTWARE.\n" |
657 | " *\n" |
658 | " *===-----------------------------------------------------------------------===\n" |
659 | " */\n" |
660 | "\n" |
661 | "#ifndef __CLANG_CUDA_COMPLEX_BUILTINS\n" |
662 | "#define __CLANG_CUDA_COMPLEX_BUILTINS\n" |
663 | "\n" |
664 | "// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are\n" |
665 | "// libgcc functions that clang assumes are available when compiling c99 complex\n" |
666 | "// operations. (These implementations come from libc++, and have been modified\n" |
667 | "// to work with CUDA.)\n" |
668 | "\n" |
669 | "extern \"C\" inline __device__ double _Complex __muldc3(double __a, double __b,\n" |
670 | " double __c, double __d) {\n" |
671 | " double __ac = __a * __c;\n" |
672 | " double __bd = __b * __d;\n" |
673 | " double __ad = __a * __d;\n" |
674 | " double __bc = __b * __c;\n" |
675 | " double _Complex z;\n" |
676 | " __real__(z) = __ac - __bd;\n" |
677 | " __imag__(z) = __ad + __bc;\n" |
678 | " if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n" |
679 | " int __recalc = 0;\n" |
680 | " if (std::isinf(__a) || std::isinf(__b)) {\n" |
681 | " __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n" |
682 | " __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n" |
683 | " if (std::isnan(__c))\n" |
684 | " __c = std::copysign(0, __c);\n" |
685 | " if (std::isnan(__d))\n" |
686 | " __d = std::copysign(0, __d);\n" |
687 | " __recalc = 1;\n" |
688 | " }\n" |
689 | " if (std::isinf(__c) || std::isinf(__d)) {\n" |
690 | " __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n" |
691 | " __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n" |
692 | " if (std::isnan(__a))\n" |
693 | " __a = std::copysign(0, __a);\n" |
694 | " if (std::isnan(__b))\n" |
695 | " __b = std::copysign(0, __b);\n" |
696 | " __recalc = 1;\n" |
697 | " }\n" |
698 | " if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n" |
699 | " std::isinf(__ad) || std::isinf(__bc))) {\n" |
700 | " if (std::isnan(__a))\n" |
701 | " __a = std::copysign(0, __a);\n" |
702 | " if (std::isnan(__b))\n" |
703 | " __b = std::copysign(0, __b);\n" |
704 | " if (std::isnan(__c))\n" |
705 | " __c = std::copysign(0, __c);\n" |
706 | " if (std::isnan(__d))\n" |
707 | " __d = std::copysign(0, __d);\n" |
708 | " __recalc = 1;\n" |
709 | " }\n" |
710 | " if (__recalc) {\n" |
711 | " // Can't use std::numeric_limits<double>::infinity() -- that doesn't have\n" |
712 | " // a device overload (and isn't constexpr before C++11, naturally).\n" |
713 | " __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n" |
714 | " __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n" |
715 | " }\n" |
716 | " }\n" |
717 | " return z;\n" |
718 | "}\n" |
719 | "\n" |
720 | "extern \"C\" inline __device__ float _Complex __mulsc3(float __a, float __b,\n" |
721 | " float __c, float __d) {\n" |
722 | " float __ac = __a * __c;\n" |
723 | " float __bd = __b * __d;\n" |
724 | " float __ad = __a * __d;\n" |
725 | " float __bc = __b * __c;\n" |
726 | " float _Complex z;\n" |
727 | " __real__(z) = __ac - __bd;\n" |
728 | " __imag__(z) = __ad + __bc;\n" |
729 | " if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n" |
730 | " int __recalc = 0;\n" |
731 | " if (std::isinf(__a) || std::isinf(__b)) {\n" |
732 | " __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n" |
733 | " __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n" |
734 | " if (std::isnan(__c))\n" |
735 | " __c = std::copysign(0, __c);\n" |
736 | " if (std::isnan(__d))\n" |
737 | " __d = std::copysign(0, __d);\n" |
738 | " __recalc = 1;\n" |
739 | " }\n" |
740 | " if (std::isinf(__c) || std::isinf(__d)) {\n" |
741 | " __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n" |
742 | " __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n" |
743 | " if (std::isnan(__a))\n" |
744 | " __a = std::copysign(0, __a);\n" |
745 | " if (std::isnan(__b))\n" |
746 | " __b = std::copysign(0, __b);\n" |
747 | " __recalc = 1;\n" |
748 | " }\n" |
749 | " if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n" |
750 | " std::isinf(__ad) || std::isinf(__bc))) {\n" |
751 | " if (std::isnan(__a))\n" |
752 | " __a = std::copysign(0, __a);\n" |
753 | " if (std::isnan(__b))\n" |
754 | " __b = std::copysign(0, __b);\n" |
755 | " if (std::isnan(__c))\n" |
756 | " __c = std::copysign(0, __c);\n" |
757 | " if (std::isnan(__d))\n" |
758 | " __d = std::copysign(0, __d);\n" |
759 | " __recalc = 1;\n" |
760 | " }\n" |
761 | " if (__recalc) {\n" |
762 | " __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n" |
763 | " __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n" |
764 | " }\n" |
765 | " }\n" |
766 | " return z;\n" |
767 | "}\n" |
768 | "\n" |
769 | "extern \"C\" inline __device__ double _Complex __divdc3(double __a, double __b,\n" |
770 | " double __c, double __d) {\n" |
771 | " int __ilogbw = 0;\n" |
772 | " // Can't use std::max, because that's defined in <algorithm>, and we don't\n" |
773 | " // want to pull that in for every compile. The CUDA headers define\n" |
774 | " // ::max(float, float) and ::max(double, double), which is sufficient for us.\n" |
775 | " double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n" |
776 | " if (std::isfinite(__logbw)) {\n" |
777 | " __ilogbw = (int)__logbw;\n" |
778 | " __c = std::scalbn(__c, -__ilogbw);\n" |
779 | " __d = std::scalbn(__d, -__ilogbw);\n" |
780 | " }\n" |
781 | " double __denom = __c * __c + __d * __d;\n" |
782 | " double _Complex z;\n" |
783 | " __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n" |
784 | " __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n" |
785 | " if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n" |
786 | " if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {\n" |
787 | " __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n" |
788 | " __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n" |
789 | " } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n" |
790 | " std::isfinite(__d)) {\n" |
791 | " __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);\n" |
792 | " __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);\n" |
793 | " __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n" |
794 | " __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n" |
795 | " } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&\n" |
796 | " std::isfinite(__b)) {\n" |
797 | " __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);\n" |
798 | " __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);\n" |
799 | " __real__(z) = 0.0 * (__a * __c + __b * __d);\n" |
800 | " __imag__(z) = 0.0 * (__b * __c - __a * __d);\n" |
801 | " }\n" |
802 | " }\n" |
803 | " return z;\n" |
804 | "}\n" |
805 | "\n" |
806 | "extern \"C\" inline __device__ float _Complex __divsc3(float __a, float __b,\n" |
807 | " float __c, float __d) {\n" |
808 | " int __ilogbw = 0;\n" |
809 | " float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n" |
810 | " if (std::isfinite(__logbw)) {\n" |
811 | " __ilogbw = (int)__logbw;\n" |
812 | " __c = std::scalbn(__c, -__ilogbw);\n" |
813 | " __d = std::scalbn(__d, -__ilogbw);\n" |
814 | " }\n" |
815 | " float __denom = __c * __c + __d * __d;\n" |
816 | " float _Complex z;\n" |
817 | " __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n" |
818 | " __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n" |
819 | " if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n" |
820 | " if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {\n" |
821 | " __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n" |
822 | " __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n" |
823 | " } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n" |
824 | " std::isfinite(__d)) {\n" |
825 | " __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n" |
826 | " __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n" |
827 | " __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n" |
828 | " __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n" |
829 | " } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&\n" |
830 | " std::isfinite(__b)) {\n" |
831 | " __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n" |
832 | " __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n" |
833 | " __real__(z) = 0 * (__a * __c + __b * __d);\n" |
834 | " __imag__(z) = 0 * (__b * __c - __a * __d);\n" |
835 | " }\n" |
836 | " }\n" |
837 | " return z;\n" |
838 | "}\n" |
839 | "\n" |
840 | "#endif // __CLANG_CUDA_COMPLEX_BUILTINS\n" |
841 | "" } , |
842 | { "/builtins/__clang_cuda_device_functions.h" , "/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===\n" |
843 | " *\n" |
844 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
845 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
846 | " * in the Software without restriction, including without limitation the rights\n" |
847 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
848 | " * copies of the Software, and to permit persons to whom the Software is\n" |
849 | " * furnished to do so, subject to the following conditions:\n" |
850 | " *\n" |
851 | " * The above copyright notice and this permission notice shall be included in\n" |
852 | " * all copies or substantial portions of the Software.\n" |
853 | " *\n" |
854 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
855 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
856 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
857 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
858 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
859 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
860 | " * THE SOFTWARE.\n" |
861 | " *\n" |
862 | " *===-----------------------------------------------------------------------===\n" |
863 | " */\n" |
864 | "\n" |
865 | "#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n" |
866 | "#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n" |
867 | "\n" |
868 | "#if CUDA_VERSION < 9000\n" |
869 | "#error This file is intended to be used with CUDA-9+ only.\n" |
870 | "#endif\n" |
871 | "\n" |
872 | "// __DEVICE__ is a helper macro with common set of attributes for the wrappers\n" |
873 | "// we implement in this file. We need static in order to avoid emitting unused\n" |
874 | "// functions and __forceinline__ helps inlining these wrappers at -O1.\n" |
875 | "#pragma push_macro(\"__DEVICE__\")\n" |
876 | "#define __DEVICE__ static __device__ __forceinline__\n" |
877 | "\n" |
878 | "// libdevice provides fast low precision and slow full-recision implementations\n" |
879 | "// for some functions. Which one gets selected depends on\n" |
880 | "// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if\n" |
881 | "// -ffast-math or -fcuda-approx-transcendentals are in effect.\n" |
882 | "#pragma push_macro(\"__FAST_OR_SLOW\")\n" |
883 | "#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n" |
884 | "#define __FAST_OR_SLOW(fast, slow) fast\n" |
885 | "#else\n" |
886 | "#define __FAST_OR_SLOW(fast, slow) slow\n" |
887 | "#endif\n" |
888 | "\n" |
889 | "__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }\n" |
890 | "__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }\n" |
891 | "__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }\n" |
892 | "__DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }\n" |
893 | "__DEVICE__ unsigned long long __brevll(unsigned long long __a) {\n" |
894 | " return __nv_brevll(__a);\n" |
895 | "}\n" |
896 | "__DEVICE__ void __brkpt() { asm volatile(\"brkpt;\"); }\n" |
897 | "__DEVICE__ void __brkpt(int __a) { __brkpt(); }\n" |
898 | "__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,\n" |
899 | " unsigned int __c) {\n" |
900 | " return __nv_byte_perm(__a, __b, __c);\n" |
901 | "}\n" |
902 | "__DEVICE__ int __clz(int __a) { return __nv_clz(__a); }\n" |
903 | "__DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); }\n" |
904 | "__DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); }\n" |
905 | "__DEVICE__ double __dAtomicAdd(double *__p, double __v) {\n" |
906 | " return __nvvm_atom_add_gen_d(__p, __v);\n" |
907 | "}\n" |
908 | "__DEVICE__ double __dAtomicAdd_block(double *__p, double __v) {\n" |
909 | " return __nvvm_atom_cta_add_gen_d(__p, __v);\n" |
910 | "}\n" |
911 | "__DEVICE__ double __dAtomicAdd_system(double *__p, double __v) {\n" |
912 | " return __nvvm_atom_sys_add_gen_d(__p, __v);\n" |
913 | "}\n" |
914 | "__DEVICE__ double __dadd_rd(double __a, double __b) {\n" |
915 | " return __nv_dadd_rd(__a, __b);\n" |
916 | "}\n" |
917 | "__DEVICE__ double __dadd_rn(double __a, double __b) {\n" |
918 | " return __nv_dadd_rn(__a, __b);\n" |
919 | "}\n" |
920 | "__DEVICE__ double __dadd_ru(double __a, double __b) {\n" |
921 | " return __nv_dadd_ru(__a, __b);\n" |
922 | "}\n" |
923 | "__DEVICE__ double __dadd_rz(double __a, double __b) {\n" |
924 | " return __nv_dadd_rz(__a, __b);\n" |
925 | "}\n" |
926 | "__DEVICE__ double __ddiv_rd(double __a, double __b) {\n" |
927 | " return __nv_ddiv_rd(__a, __b);\n" |
928 | "}\n" |
929 | "__DEVICE__ double __ddiv_rn(double __a, double __b) {\n" |
930 | " return __nv_ddiv_rn(__a, __b);\n" |
931 | "}\n" |
932 | "__DEVICE__ double __ddiv_ru(double __a, double __b) {\n" |
933 | " return __nv_ddiv_ru(__a, __b);\n" |
934 | "}\n" |
935 | "__DEVICE__ double __ddiv_rz(double __a, double __b) {\n" |
936 | " return __nv_ddiv_rz(__a, __b);\n" |
937 | "}\n" |
938 | "__DEVICE__ double __dmul_rd(double __a, double __b) {\n" |
939 | " return __nv_dmul_rd(__a, __b);\n" |
940 | "}\n" |
941 | "__DEVICE__ double __dmul_rn(double __a, double __b) {\n" |
942 | " return __nv_dmul_rn(__a, __b);\n" |
943 | "}\n" |
944 | "__DEVICE__ double __dmul_ru(double __a, double __b) {\n" |
945 | " return __nv_dmul_ru(__a, __b);\n" |
946 | "}\n" |
947 | "__DEVICE__ double __dmul_rz(double __a, double __b) {\n" |
948 | " return __nv_dmul_rz(__a, __b);\n" |
949 | "}\n" |
950 | "__DEVICE__ float __double2float_rd(double __a) {\n" |
951 | " return __nv_double2float_rd(__a);\n" |
952 | "}\n" |
953 | "__DEVICE__ float __double2float_rn(double __a) {\n" |
954 | " return __nv_double2float_rn(__a);\n" |
955 | "}\n" |
956 | "__DEVICE__ float __double2float_ru(double __a) {\n" |
957 | " return __nv_double2float_ru(__a);\n" |
958 | "}\n" |
959 | "__DEVICE__ float __double2float_rz(double __a) {\n" |
960 | " return __nv_double2float_rz(__a);\n" |
961 | "}\n" |
962 | "__DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); }\n" |
963 | "__DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); }\n" |
964 | "__DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); }\n" |
965 | "__DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); }\n" |
966 | "__DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); }\n" |
967 | "__DEVICE__ long long __double2ll_rd(double __a) {\n" |
968 | " return __nv_double2ll_rd(__a);\n" |
969 | "}\n" |
970 | "__DEVICE__ long long __double2ll_rn(double __a) {\n" |
971 | " return __nv_double2ll_rn(__a);\n" |
972 | "}\n" |
973 | "__DEVICE__ long long __double2ll_ru(double __a) {\n" |
974 | " return __nv_double2ll_ru(__a);\n" |
975 | "}\n" |
976 | "__DEVICE__ long long __double2ll_rz(double __a) {\n" |
977 | " return __nv_double2ll_rz(__a);\n" |
978 | "}\n" |
979 | "__DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); }\n" |
980 | "__DEVICE__ unsigned int __double2uint_rd(double __a) {\n" |
981 | " return __nv_double2uint_rd(__a);\n" |
982 | "}\n" |
983 | "__DEVICE__ unsigned int __double2uint_rn(double __a) {\n" |
984 | " return __nv_double2uint_rn(__a);\n" |
985 | "}\n" |
986 | "__DEVICE__ unsigned int __double2uint_ru(double __a) {\n" |
987 | " return __nv_double2uint_ru(__a);\n" |
988 | "}\n" |
989 | "__DEVICE__ unsigned int __double2uint_rz(double __a) {\n" |
990 | " return __nv_double2uint_rz(__a);\n" |
991 | "}\n" |
992 | "__DEVICE__ unsigned long long __double2ull_rd(double __a) {\n" |
993 | " return __nv_double2ull_rd(__a);\n" |
994 | "}\n" |
995 | "__DEVICE__ unsigned long long __double2ull_rn(double __a) {\n" |
996 | " return __nv_double2ull_rn(__a);\n" |
997 | "}\n" |
998 | "__DEVICE__ unsigned long long __double2ull_ru(double __a) {\n" |
999 | " return __nv_double2ull_ru(__a);\n" |
1000 | "}\n" |
1001 | "__DEVICE__ unsigned long long __double2ull_rz(double __a) {\n" |
1002 | " return __nv_double2ull_rz(__a);\n" |
1003 | "}\n" |
1004 | "__DEVICE__ long long __double_as_longlong(double __a) {\n" |
1005 | " return __nv_double_as_longlong(__a);\n" |
1006 | "}\n" |
1007 | "__DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); }\n" |
1008 | "__DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); }\n" |
1009 | "__DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); }\n" |
1010 | "__DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); }\n" |
1011 | "__DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); }\n" |
1012 | "__DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); }\n" |
1013 | "__DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); }\n" |
1014 | "__DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); }\n" |
1015 | "__DEVICE__ double __dsub_rd(double __a, double __b) {\n" |
1016 | " return __nv_dsub_rd(__a, __b);\n" |
1017 | "}\n" |
1018 | "__DEVICE__ double __dsub_rn(double __a, double __b) {\n" |
1019 | " return __nv_dsub_rn(__a, __b);\n" |
1020 | "}\n" |
1021 | "__DEVICE__ double __dsub_ru(double __a, double __b) {\n" |
1022 | " return __nv_dsub_ru(__a, __b);\n" |
1023 | "}\n" |
1024 | "__DEVICE__ double __dsub_rz(double __a, double __b) {\n" |
1025 | " return __nv_dsub_rz(__a, __b);\n" |
1026 | "}\n" |
1027 | "__DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); }\n" |
1028 | "__DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); }\n" |
1029 | "__DEVICE__ float __fAtomicAdd(float *__p, float __v) {\n" |
1030 | " return __nvvm_atom_add_gen_f(__p, __v);\n" |
1031 | "}\n" |
1032 | "__DEVICE__ float __fAtomicAdd_block(float *__p, float __v) {\n" |
1033 | " return __nvvm_atom_cta_add_gen_f(__p, __v);\n" |
1034 | "}\n" |
1035 | "__DEVICE__ float __fAtomicAdd_system(float *__p, float __v) {\n" |
1036 | " return __nvvm_atom_sys_add_gen_f(__p, __v);\n" |
1037 | "}\n" |
1038 | "__DEVICE__ float __fAtomicExch(float *__p, float __v) {\n" |
1039 | " return __nv_int_as_float(\n" |
1040 | " __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n" |
1041 | "}\n" |
1042 | "__DEVICE__ float __fAtomicExch_block(float *__p, float __v) {\n" |
1043 | " return __nv_int_as_float(\n" |
1044 | " __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n" |
1045 | "}\n" |
1046 | "__DEVICE__ float __fAtomicExch_system(float *__p, float __v) {\n" |
1047 | " return __nv_int_as_float(\n" |
1048 | " __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n" |
1049 | "}\n" |
1050 | "__DEVICE__ float __fadd_rd(float __a, float __b) {\n" |
1051 | " return __nv_fadd_rd(__a, __b);\n" |
1052 | "}\n" |
1053 | "__DEVICE__ float __fadd_rn(float __a, float __b) {\n" |
1054 | " return __nv_fadd_rn(__a, __b);\n" |
1055 | "}\n" |
1056 | "__DEVICE__ float __fadd_ru(float __a, float __b) {\n" |
1057 | " return __nv_fadd_ru(__a, __b);\n" |
1058 | "}\n" |
1059 | "__DEVICE__ float __fadd_rz(float __a, float __b) {\n" |
1060 | " return __nv_fadd_rz(__a, __b);\n" |
1061 | "}\n" |
1062 | "__DEVICE__ float __fdiv_rd(float __a, float __b) {\n" |
1063 | " return __nv_fdiv_rd(__a, __b);\n" |
1064 | "}\n" |
1065 | "__DEVICE__ float __fdiv_rn(float __a, float __b) {\n" |
1066 | " return __nv_fdiv_rn(__a, __b);\n" |
1067 | "}\n" |
1068 | "__DEVICE__ float __fdiv_ru(float __a, float __b) {\n" |
1069 | " return __nv_fdiv_ru(__a, __b);\n" |
1070 | "}\n" |
1071 | "__DEVICE__ float __fdiv_rz(float __a, float __b) {\n" |
1072 | " return __nv_fdiv_rz(__a, __b);\n" |
1073 | "}\n" |
1074 | "__DEVICE__ float __fdividef(float __a, float __b) {\n" |
1075 | " return __nv_fast_fdividef(__a, __b);\n" |
1076 | "}\n" |
1077 | "__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }\n" |
1078 | "__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }\n" |
1079 | "__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }\n" |
1080 | "__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }\n" |
1081 | "__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }\n" |
1082 | "__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }\n" |
1083 | "__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }\n" |
1084 | "__DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); }\n" |
1085 | "__DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); }\n" |
1086 | "__DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); }\n" |
1087 | "__DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); }\n" |
1088 | "__DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); }\n" |
1089 | "__DEVICE__ unsigned int __float2uint_rd(float __a) {\n" |
1090 | " return __nv_float2uint_rd(__a);\n" |
1091 | "}\n" |
1092 | "__DEVICE__ unsigned int __float2uint_rn(float __a) {\n" |
1093 | " return __nv_float2uint_rn(__a);\n" |
1094 | "}\n" |
1095 | "__DEVICE__ unsigned int __float2uint_ru(float __a) {\n" |
1096 | " return __nv_float2uint_ru(__a);\n" |
1097 | "}\n" |
1098 | "__DEVICE__ unsigned int __float2uint_rz(float __a) {\n" |
1099 | " return __nv_float2uint_rz(__a);\n" |
1100 | "}\n" |
1101 | "__DEVICE__ unsigned long long __float2ull_rd(float __a) {\n" |
1102 | " return __nv_float2ull_rd(__a);\n" |
1103 | "}\n" |
1104 | "__DEVICE__ unsigned long long __float2ull_rn(float __a) {\n" |
1105 | " return __nv_float2ull_rn(__a);\n" |
1106 | "}\n" |
1107 | "__DEVICE__ unsigned long long __float2ull_ru(float __a) {\n" |
1108 | " return __nv_float2ull_ru(__a);\n" |
1109 | "}\n" |
1110 | "__DEVICE__ unsigned long long __float2ull_rz(float __a) {\n" |
1111 | " return __nv_float2ull_rz(__a);\n" |
1112 | "}\n" |
1113 | "__DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); }\n" |
1114 | "__DEVICE__ unsigned int __float_as_uint(float __a) {\n" |
1115 | " return __nv_float_as_uint(__a);\n" |
1116 | "}\n" |
1117 | "__DEVICE__ double __fma_rd(double __a, double __b, double __c) {\n" |
1118 | " return __nv_fma_rd(__a, __b, __c);\n" |
1119 | "}\n" |
1120 | "__DEVICE__ double __fma_rn(double __a, double __b, double __c) {\n" |
1121 | " return __nv_fma_rn(__a, __b, __c);\n" |
1122 | "}\n" |
1123 | "__DEVICE__ double __fma_ru(double __a, double __b, double __c) {\n" |
1124 | " return __nv_fma_ru(__a, __b, __c);\n" |
1125 | "}\n" |
1126 | "__DEVICE__ double __fma_rz(double __a, double __b, double __c) {\n" |
1127 | " return __nv_fma_rz(__a, __b, __c);\n" |
1128 | "}\n" |
1129 | "__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) {\n" |
1130 | " return __nv_fmaf_ieee_rd(__a, __b, __c);\n" |
1131 | "}\n" |
1132 | "__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) {\n" |
1133 | " return __nv_fmaf_ieee_rn(__a, __b, __c);\n" |
1134 | "}\n" |
1135 | "__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) {\n" |
1136 | " return __nv_fmaf_ieee_ru(__a, __b, __c);\n" |
1137 | "}\n" |
1138 | "__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) {\n" |
1139 | " return __nv_fmaf_ieee_rz(__a, __b, __c);\n" |
1140 | "}\n" |
1141 | "__DEVICE__ float __fmaf_rd(float __a, float __b, float __c) {\n" |
1142 | " return __nv_fmaf_rd(__a, __b, __c);\n" |
1143 | "}\n" |
1144 | "__DEVICE__ float __fmaf_rn(float __a, float __b, float __c) {\n" |
1145 | " return __nv_fmaf_rn(__a, __b, __c);\n" |
1146 | "}\n" |
1147 | "__DEVICE__ float __fmaf_ru(float __a, float __b, float __c) {\n" |
1148 | " return __nv_fmaf_ru(__a, __b, __c);\n" |
1149 | "}\n" |
1150 | "__DEVICE__ float __fmaf_rz(float __a, float __b, float __c) {\n" |
1151 | " return __nv_fmaf_rz(__a, __b, __c);\n" |
1152 | "}\n" |
1153 | "__DEVICE__ float __fmul_rd(float __a, float __b) {\n" |
1154 | " return __nv_fmul_rd(__a, __b);\n" |
1155 | "}\n" |
1156 | "__DEVICE__ float __fmul_rn(float __a, float __b) {\n" |
1157 | " return __nv_fmul_rn(__a, __b);\n" |
1158 | "}\n" |
1159 | "__DEVICE__ float __fmul_ru(float __a, float __b) {\n" |
1160 | " return __nv_fmul_ru(__a, __b);\n" |
1161 | "}\n" |
1162 | "__DEVICE__ float __fmul_rz(float __a, float __b) {\n" |
1163 | " return __nv_fmul_rz(__a, __b);\n" |
1164 | "}\n" |
1165 | "__DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); }\n" |
1166 | "__DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); }\n" |
1167 | "__DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); }\n" |
1168 | "__DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); }\n" |
1169 | "__DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); }\n" |
1170 | "__DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); }\n" |
1171 | "__DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); }\n" |
1172 | "__DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); }\n" |
1173 | "__DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); }\n" |
1174 | "__DEVICE__ float __fsub_rd(float __a, float __b) {\n" |
1175 | " return __nv_fsub_rd(__a, __b);\n" |
1176 | "}\n" |
1177 | "__DEVICE__ float __fsub_rn(float __a, float __b) {\n" |
1178 | " return __nv_fsub_rn(__a, __b);\n" |
1179 | "}\n" |
1180 | "__DEVICE__ float __fsub_ru(float __a, float __b) {\n" |
1181 | " return __nv_fsub_ru(__a, __b);\n" |
1182 | "}\n" |
1183 | "__DEVICE__ float __fsub_rz(float __a, float __b) {\n" |
1184 | " return __nv_fsub_rz(__a, __b);\n" |
1185 | "}\n" |
1186 | "__DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }\n" |
1187 | "__DEVICE__ double __hiloint2double(int __a, int __b) {\n" |
1188 | " return __nv_hiloint2double(__a, __b);\n" |
1189 | "}\n" |
1190 | "__DEVICE__ int __iAtomicAdd(int *__p, int __v) {\n" |
1191 | " return __nvvm_atom_add_gen_i(__p, __v);\n" |
1192 | "}\n" |
1193 | "__DEVICE__ int __iAtomicAdd_block(int *__p, int __v) {\n" |
1194 | " __nvvm_atom_cta_add_gen_i(__p, __v);\n" |
1195 | "}\n" |
1196 | "__DEVICE__ int __iAtomicAdd_system(int *__p, int __v) {\n" |
1197 | " __nvvm_atom_sys_add_gen_i(__p, __v);\n" |
1198 | "}\n" |
1199 | "__DEVICE__ int __iAtomicAnd(int *__p, int __v) {\n" |
1200 | " return __nvvm_atom_and_gen_i(__p, __v);\n" |
1201 | "}\n" |
1202 | "__DEVICE__ int __iAtomicAnd_block(int *__p, int __v) {\n" |
1203 | " return __nvvm_atom_cta_and_gen_i(__p, __v);\n" |
1204 | "}\n" |
1205 | "__DEVICE__ int __iAtomicAnd_system(int *__p, int __v) {\n" |
1206 | " return __nvvm_atom_sys_and_gen_i(__p, __v);\n" |
1207 | "}\n" |
1208 | "__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) {\n" |
1209 | " return __nvvm_atom_cas_gen_i(__p, __cmp, __v);\n" |
1210 | "}\n" |
1211 | "__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) {\n" |
1212 | " return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v);\n" |
1213 | "}\n" |
1214 | "__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) {\n" |
1215 | " return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v);\n" |
1216 | "}\n" |
1217 | "__DEVICE__ int __iAtomicExch(int *__p, int __v) {\n" |
1218 | " return __nvvm_atom_xchg_gen_i(__p, __v);\n" |
1219 | "}\n" |
1220 | "__DEVICE__ int __iAtomicExch_block(int *__p, int __v) {\n" |
1221 | " return __nvvm_atom_cta_xchg_gen_i(__p, __v);\n" |
1222 | "}\n" |
1223 | "__DEVICE__ int __iAtomicExch_system(int *__p, int __v) {\n" |
1224 | " return __nvvm_atom_sys_xchg_gen_i(__p, __v);\n" |
1225 | "}\n" |
1226 | "__DEVICE__ int __iAtomicMax(int *__p, int __v) {\n" |
1227 | " return __nvvm_atom_max_gen_i(__p, __v);\n" |
1228 | "}\n" |
1229 | "__DEVICE__ int __iAtomicMax_block(int *__p, int __v) {\n" |
1230 | " return __nvvm_atom_cta_max_gen_i(__p, __v);\n" |
1231 | "}\n" |
1232 | "__DEVICE__ int __iAtomicMax_system(int *__p, int __v) {\n" |
1233 | " return __nvvm_atom_sys_max_gen_i(__p, __v);\n" |
1234 | "}\n" |
1235 | "__DEVICE__ int __iAtomicMin(int *__p, int __v) {\n" |
1236 | " return __nvvm_atom_min_gen_i(__p, __v);\n" |
1237 | "}\n" |
1238 | "__DEVICE__ int __iAtomicMin_block(int *__p, int __v) {\n" |
1239 | " return __nvvm_atom_cta_min_gen_i(__p, __v);\n" |
1240 | "}\n" |
1241 | "__DEVICE__ int __iAtomicMin_system(int *__p, int __v) {\n" |
1242 | " return __nvvm_atom_sys_min_gen_i(__p, __v);\n" |
1243 | "}\n" |
1244 | "__DEVICE__ int __iAtomicOr(int *__p, int __v) {\n" |
1245 | " return __nvvm_atom_or_gen_i(__p, __v);\n" |
1246 | "}\n" |
1247 | "__DEVICE__ int __iAtomicOr_block(int *__p, int __v) {\n" |
1248 | " return __nvvm_atom_cta_or_gen_i(__p, __v);\n" |
1249 | "}\n" |
1250 | "__DEVICE__ int __iAtomicOr_system(int *__p, int __v) {\n" |
1251 | " return __nvvm_atom_sys_or_gen_i(__p, __v);\n" |
1252 | "}\n" |
1253 | "__DEVICE__ int __iAtomicXor(int *__p, int __v) {\n" |
1254 | " return __nvvm_atom_xor_gen_i(__p, __v);\n" |
1255 | "}\n" |
1256 | "__DEVICE__ int __iAtomicXor_block(int *__p, int __v) {\n" |
1257 | " return __nvvm_atom_cta_xor_gen_i(__p, __v);\n" |
1258 | "}\n" |
1259 | "__DEVICE__ int __iAtomicXor_system(int *__p, int __v) {\n" |
1260 | " return __nvvm_atom_sys_xor_gen_i(__p, __v);\n" |
1261 | "}\n" |
1262 | "__DEVICE__ long long __illAtomicMax(long long *__p, long long __v) {\n" |
1263 | " return __nvvm_atom_max_gen_ll(__p, __v);\n" |
1264 | "}\n" |
1265 | "__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) {\n" |
1266 | " return __nvvm_atom_cta_max_gen_ll(__p, __v);\n" |
1267 | "}\n" |
1268 | "__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) {\n" |
1269 | " return __nvvm_atom_sys_max_gen_ll(__p, __v);\n" |
1270 | "}\n" |
1271 | "__DEVICE__ long long __illAtomicMin(long long *__p, long long __v) {\n" |
1272 | " return __nvvm_atom_min_gen_ll(__p, __v);\n" |
1273 | "}\n" |
1274 | "__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) {\n" |
1275 | " return __nvvm_atom_cta_min_gen_ll(__p, __v);\n" |
1276 | "}\n" |
1277 | "__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) {\n" |
1278 | " return __nvvm_atom_sys_min_gen_ll(__p, __v);\n" |
1279 | "}\n" |
1280 | "__DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); }\n" |
1281 | "__DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); }\n" |
1282 | "__DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }\n" |
1283 | "__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }\n" |
1284 | "__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }\n" |
1285 | "__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }\n" |
1286 | "__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }\n" |
1287 | "__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }\n" |
1288 | "__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }\n" |
1289 | "__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }\n" |
1290 | "__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }\n" |
1291 | "__DEVICE__ double __ll2double_rd(long long __a) {\n" |
1292 | " return __nv_ll2double_rd(__a);\n" |
1293 | "}\n" |
1294 | "__DEVICE__ double __ll2double_rn(long long __a) {\n" |
1295 | " return __nv_ll2double_rn(__a);\n" |
1296 | "}\n" |
1297 | "__DEVICE__ double __ll2double_ru(long long __a) {\n" |
1298 | " return __nv_ll2double_ru(__a);\n" |
1299 | "}\n" |
1300 | "__DEVICE__ double __ll2double_rz(long long __a) {\n" |
1301 | " return __nv_ll2double_rz(__a);\n" |
1302 | "}\n" |
1303 | "__DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); }\n" |
1304 | "__DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); }\n" |
1305 | "__DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); }\n" |
1306 | "__DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); }\n" |
1307 | "__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) {\n" |
1308 | " return __nvvm_atom_and_gen_ll(__p, __v);\n" |
1309 | "}\n" |
1310 | "__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) {\n" |
1311 | " return __nvvm_atom_cta_and_gen_ll(__p, __v);\n" |
1312 | "}\n" |
1313 | "__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) {\n" |
1314 | " return __nvvm_atom_sys_and_gen_ll(__p, __v);\n" |
1315 | "}\n" |
1316 | "__DEVICE__ long long __llAtomicOr(long long *__p, long long __v) {\n" |
1317 | " return __nvvm_atom_or_gen_ll(__p, __v);\n" |
1318 | "}\n" |
1319 | "__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) {\n" |
1320 | " return __nvvm_atom_cta_or_gen_ll(__p, __v);\n" |
1321 | "}\n" |
1322 | "__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) {\n" |
1323 | " return __nvvm_atom_sys_or_gen_ll(__p, __v);\n" |
1324 | "}\n" |
1325 | "__DEVICE__ long long __llAtomicXor(long long *__p, long long __v) {\n" |
1326 | " return __nvvm_atom_xor_gen_ll(__p, __v);\n" |
1327 | "}\n" |
1328 | "__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) {\n" |
1329 | " return __nvvm_atom_cta_xor_gen_ll(__p, __v);\n" |
1330 | "}\n" |
1331 | "__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) {\n" |
1332 | " return __nvvm_atom_sys_xor_gen_ll(__p, __v);\n" |
1333 | "}\n" |
1334 | "__DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); }\n" |
1335 | "__DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); }\n" |
1336 | "__DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); }\n" |
1337 | "__DEVICE__ double __longlong_as_double(long long __a) {\n" |
1338 | " return __nv_longlong_as_double(__a);\n" |
1339 | "}\n" |
1340 | "__DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }\n" |
1341 | "__DEVICE__ long long __mul64hi(long long __a, long long __b) {\n" |
1342 | " return __nv_mul64hi(__a, __b);\n" |
1343 | "}\n" |
1344 | "__DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }\n" |
1345 | "__DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }\n" |
1346 | "__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }\n" |
1347 | "__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }\n" |
1348 | "__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }\n" |
1349 | "__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }\n" |
1350 | "__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }\n" |
1351 | "__DEVICE__ float __powf(float __a, float __b) {\n" |
1352 | " return __nv_fast_powf(__a, __b);\n" |
1353 | "}\n" |
1354 | "\n" |
1355 | "// Parameter must have a known integer value.\n" |
1356 | "#define __prof_trigger(__a) asm __volatile__(\"pmevent \\t%0;\" ::\"i\"(__a))\n" |
1357 | "__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }\n" |
1358 | "__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {\n" |
1359 | " return __nv_sad(__a, __b, __c);\n" |
1360 | "}\n" |
1361 | "__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }\n" |
1362 | "__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }\n" |
1363 | "__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }\n" |
1364 | "__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {\n" |
1365 | " return __nv_fast_sincosf(__a, __sptr, __cptr);\n" |
1366 | "}\n" |
1367 | "__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }\n" |
1368 | "__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }\n" |
1369 | "__DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); }\n" |
1370 | "__DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); }\n" |
1371 | "__DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }\n" |
1372 | "__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }\n" |
1373 | "__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };\n" |
1374 | "__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };\n" |
1375 | "__DEVICE__ void __trap(void) { asm volatile(\"trap;\"); }\n" |
1376 | "__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {\n" |
1377 | " return __nvvm_atom_add_gen_i((int *)__p, __v);\n" |
1378 | "}\n" |
1379 | "__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p,\n" |
1380 | " unsigned int __v) {\n" |
1381 | " return __nvvm_atom_cta_add_gen_i((int *)__p, __v);\n" |
1382 | "}\n" |
1383 | "__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p,\n" |
1384 | " unsigned int __v) {\n" |
1385 | " return __nvvm_atom_sys_add_gen_i((int *)__p, __v);\n" |
1386 | "}\n" |
1387 | "__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) {\n" |
1388 | " return __nvvm_atom_and_gen_i((int *)__p, __v);\n" |
1389 | "}\n" |
1390 | "__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p,\n" |
1391 | " unsigned int __v) {\n" |
1392 | " return __nvvm_atom_cta_and_gen_i((int *)__p, __v);\n" |
1393 | "}\n" |
1394 | "__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p,\n" |
1395 | " unsigned int __v) {\n" |
1396 | " return __nvvm_atom_sys_and_gen_i((int *)__p, __v);\n" |
1397 | "}\n" |
1398 | "__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp,\n" |
1399 | " unsigned int __v) {\n" |
1400 | " return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v);\n" |
1401 | "}\n" |
1402 | "__DEVICE__ unsigned int\n" |
1403 | "__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n" |
1404 | " return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v);\n" |
1405 | "}\n" |
1406 | "__DEVICE__ unsigned int\n" |
1407 | "__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n" |
1408 | " return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v);\n" |
1409 | "}\n" |
1410 | "__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) {\n" |
1411 | " return __nvvm_atom_dec_gen_ui(__p, __v);\n" |
1412 | "}\n" |
1413 | "__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p,\n" |
1414 | " unsigned int __v) {\n" |
1415 | " return __nvvm_atom_cta_dec_gen_ui(__p, __v);\n" |
1416 | "}\n" |
1417 | "__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p,\n" |
1418 | " unsigned int __v) {\n" |
1419 | " return __nvvm_atom_sys_dec_gen_ui(__p, __v);\n" |
1420 | "}\n" |
1421 | "__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) {\n" |
1422 | " return __nvvm_atom_xchg_gen_i((int *)__p, __v);\n" |
1423 | "}\n" |
1424 | "__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p,\n" |
1425 | " unsigned int __v) {\n" |
1426 | " return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v);\n" |
1427 | "}\n" |
1428 | "__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p,\n" |
1429 | " unsigned int __v) {\n" |
1430 | " return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v);\n" |
1431 | "}\n" |
1432 | "__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) {\n" |
1433 | " return __nvvm_atom_inc_gen_ui(__p, __v);\n" |
1434 | "}\n" |
1435 | "__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p,\n" |
1436 | " unsigned int __v) {\n" |
1437 | " return __nvvm_atom_cta_inc_gen_ui(__p, __v);\n" |
1438 | "}\n" |
1439 | "__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p,\n" |
1440 | " unsigned int __v) {\n" |
1441 | " return __nvvm_atom_sys_inc_gen_ui(__p, __v);\n" |
1442 | "}\n" |
1443 | "__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) {\n" |
1444 | " return __nvvm_atom_max_gen_ui(__p, __v);\n" |
1445 | "}\n" |
1446 | "__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p,\n" |
1447 | " unsigned int __v) {\n" |
1448 | " return __nvvm_atom_cta_max_gen_ui(__p, __v);\n" |
1449 | "}\n" |
1450 | "__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p,\n" |
1451 | " unsigned int __v) {\n" |
1452 | " return __nvvm_atom_sys_max_gen_ui(__p, __v);\n" |
1453 | "}\n" |
1454 | "__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) {\n" |
1455 | " return __nvvm_atom_min_gen_ui(__p, __v);\n" |
1456 | "}\n" |
1457 | "__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p,\n" |
1458 | " unsigned int __v) {\n" |
1459 | " return __nvvm_atom_cta_min_gen_ui(__p, __v);\n" |
1460 | "}\n" |
1461 | "__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p,\n" |
1462 | " unsigned int __v) {\n" |
1463 | " return __nvvm_atom_sys_min_gen_ui(__p, __v);\n" |
1464 | "}\n" |
1465 | "__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) {\n" |
1466 | " return __nvvm_atom_or_gen_i((int *)__p, __v);\n" |
1467 | "}\n" |
1468 | "__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) {\n" |
1469 | " return __nvvm_atom_cta_or_gen_i((int *)__p, __v);\n" |
1470 | "}\n" |
1471 | "__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p,\n" |
1472 | " unsigned int __v) {\n" |
1473 | " return __nvvm_atom_sys_or_gen_i((int *)__p, __v);\n" |
1474 | "}\n" |
1475 | "__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) {\n" |
1476 | " return __nvvm_atom_xor_gen_i((int *)__p, __v);\n" |
1477 | "}\n" |
1478 | "__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p,\n" |
1479 | " unsigned int __v) {\n" |
1480 | " return __nvvm_atom_cta_xor_gen_i((int *)__p, __v);\n" |
1481 | "}\n" |
1482 | "__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p,\n" |
1483 | " unsigned int __v) {\n" |
1484 | " return __nvvm_atom_sys_xor_gen_i((int *)__p, __v);\n" |
1485 | "}\n" |
1486 | "__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) {\n" |
1487 | " return __nv_uhadd(__a, __b);\n" |
1488 | "}\n" |
1489 | "__DEVICE__ double __uint2double_rn(unsigned int __a) {\n" |
1490 | " return __nv_uint2double_rn(__a);\n" |
1491 | "}\n" |
1492 | "__DEVICE__ float __uint2float_rd(unsigned int __a) {\n" |
1493 | " return __nv_uint2float_rd(__a);\n" |
1494 | "}\n" |
1495 | "__DEVICE__ float __uint2float_rn(unsigned int __a) {\n" |
1496 | " return __nv_uint2float_rn(__a);\n" |
1497 | "}\n" |
1498 | "__DEVICE__ float __uint2float_ru(unsigned int __a) {\n" |
1499 | " return __nv_uint2float_ru(__a);\n" |
1500 | "}\n" |
1501 | "__DEVICE__ float __uint2float_rz(unsigned int __a) {\n" |
1502 | " return __nv_uint2float_rz(__a);\n" |
1503 | "}\n" |
1504 | "__DEVICE__ float __uint_as_float(unsigned int __a) {\n" |
1505 | " return __nv_uint_as_float(__a);\n" |
1506 | "} //\n" |
1507 | "__DEVICE__ double __ull2double_rd(unsigned long long __a) {\n" |
1508 | " return __nv_ull2double_rd(__a);\n" |
1509 | "}\n" |
1510 | "__DEVICE__ double __ull2double_rn(unsigned long long __a) {\n" |
1511 | " return __nv_ull2double_rn(__a);\n" |
1512 | "}\n" |
1513 | "__DEVICE__ double __ull2double_ru(unsigned long long __a) {\n" |
1514 | " return __nv_ull2double_ru(__a);\n" |
1515 | "}\n" |
1516 | "__DEVICE__ double __ull2double_rz(unsigned long long __a) {\n" |
1517 | " return __nv_ull2double_rz(__a);\n" |
1518 | "}\n" |
1519 | "__DEVICE__ float __ull2float_rd(unsigned long long __a) {\n" |
1520 | " return __nv_ull2float_rd(__a);\n" |
1521 | "}\n" |
1522 | "__DEVICE__ float __ull2float_rn(unsigned long long __a) {\n" |
1523 | " return __nv_ull2float_rn(__a);\n" |
1524 | "}\n" |
1525 | "__DEVICE__ float __ull2float_ru(unsigned long long __a) {\n" |
1526 | " return __nv_ull2float_ru(__a);\n" |
1527 | "}\n" |
1528 | "__DEVICE__ float __ull2float_rz(unsigned long long __a) {\n" |
1529 | " return __nv_ull2float_rz(__a);\n" |
1530 | "}\n" |
1531 | "__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p,\n" |
1532 | " unsigned long long __v) {\n" |
1533 | " return __nvvm_atom_add_gen_ll((long long *)__p, __v);\n" |
1534 | "}\n" |
1535 | "__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p,\n" |
1536 | " unsigned long long __v) {\n" |
1537 | " return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v);\n" |
1538 | "}\n" |
1539 | "__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p,\n" |
1540 | " unsigned long long __v) {\n" |
1541 | " return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v);\n" |
1542 | "}\n" |
1543 | "__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p,\n" |
1544 | " unsigned long long __v) {\n" |
1545 | " return __nvvm_atom_and_gen_ll((long long *)__p, __v);\n" |
1546 | "}\n" |
1547 | "__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p,\n" |
1548 | " unsigned long long __v) {\n" |
1549 | " return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v);\n" |
1550 | "}\n" |
1551 | "__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p,\n" |
1552 | " unsigned long long __v) {\n" |
1553 | " return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v);\n" |
1554 | "}\n" |
1555 | "__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p,\n" |
1556 | " unsigned long long __cmp,\n" |
1557 | " unsigned long long __v) {\n" |
1558 | " return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v);\n" |
1559 | "}\n" |
1560 | "__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p,\n" |
1561 | " unsigned long long __cmp,\n" |
1562 | " unsigned long long __v) {\n" |
1563 | " return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v);\n" |
1564 | "}\n" |
1565 | "__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p,\n" |
1566 | " unsigned long long __cmp,\n" |
1567 | " unsigned long long __v) {\n" |
1568 | " return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v);\n" |
1569 | "}\n" |
1570 | "__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p,\n" |
1571 | " unsigned long long __v) {\n" |
1572 | " return __nvvm_atom_xchg_gen_ll((long long *)__p, __v);\n" |
1573 | "}\n" |
1574 | "__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p,\n" |
1575 | " unsigned long long __v) {\n" |
1576 | " return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v);\n" |
1577 | "}\n" |
1578 | "__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p,\n" |
1579 | " unsigned long long __v) {\n" |
1580 | " return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v);\n" |
1581 | "}\n" |
1582 | "__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p,\n" |
1583 | " unsigned long long __v) {\n" |
1584 | " return __nvvm_atom_max_gen_ull(__p, __v);\n" |
1585 | "}\n" |
1586 | "__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p,\n" |
1587 | " unsigned long long __v) {\n" |
1588 | " return __nvvm_atom_cta_max_gen_ull(__p, __v);\n" |
1589 | "}\n" |
1590 | "__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p,\n" |
1591 | " unsigned long long __v) {\n" |
1592 | " return __nvvm_atom_sys_max_gen_ull(__p, __v);\n" |
1593 | "}\n" |
1594 | "__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p,\n" |
1595 | " unsigned long long __v) {\n" |
1596 | " return __nvvm_atom_min_gen_ull(__p, __v);\n" |
1597 | "}\n" |
1598 | "__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p,\n" |
1599 | " unsigned long long __v) {\n" |
1600 | " return __nvvm_atom_cta_min_gen_ull(__p, __v);\n" |
1601 | "}\n" |
1602 | "__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p,\n" |
1603 | " unsigned long long __v) {\n" |
1604 | " return __nvvm_atom_sys_min_gen_ull(__p, __v);\n" |
1605 | "}\n" |
1606 | "__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p,\n" |
1607 | " unsigned long long __v) {\n" |
1608 | " return __nvvm_atom_or_gen_ll((long long *)__p, __v);\n" |
1609 | "}\n" |
1610 | "__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p,\n" |
1611 | " unsigned long long __v) {\n" |
1612 | " return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v);\n" |
1613 | "}\n" |
1614 | "__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p,\n" |
1615 | " unsigned long long __v) {\n" |
1616 | " return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v);\n" |
1617 | "}\n" |
1618 | "__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p,\n" |
1619 | " unsigned long long __v) {\n" |
1620 | " return __nvvm_atom_xor_gen_ll((long long *)__p, __v);\n" |
1621 | "}\n" |
1622 | "__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p,\n" |
1623 | " unsigned long long __v) {\n" |
1624 | " return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v);\n" |
1625 | "}\n" |
1626 | "__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p,\n" |
1627 | " unsigned long long __v) {\n" |
1628 | " return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v);\n" |
1629 | "}\n" |
1630 | "__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) {\n" |
1631 | " return __nv_umul24(__a, __b);\n" |
1632 | "}\n" |
1633 | "__DEVICE__ unsigned long long __umul64hi(unsigned long long __a,\n" |
1634 | " unsigned long long __b) {\n" |
1635 | " return __nv_umul64hi(__a, __b);\n" |
1636 | "}\n" |
1637 | "__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) {\n" |
1638 | " return __nv_umulhi(__a, __b);\n" |
1639 | "}\n" |
1640 | "__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) {\n" |
1641 | " return __nv_urhadd(__a, __b);\n" |
1642 | "}\n" |
1643 | "__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b,\n" |
1644 | " unsigned int __c) {\n" |
1645 | " return __nv_usad(__a, __b, __c);\n" |
1646 | "}\n" |
1647 | "\n" |
1648 | "#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n" |
1649 | "__DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); }\n" |
1650 | "__DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); }\n" |
1651 | "__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n" |
1652 | " return __nv_vabsdiffs2(__a, __b);\n" |
1653 | "}\n" |
1654 | "__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n" |
1655 | " return __nv_vabsdiffs4(__a, __b);\n" |
1656 | "}\n" |
1657 | "__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n" |
1658 | " return __nv_vabsdiffu2(__a, __b);\n" |
1659 | "}\n" |
1660 | "__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n" |
1661 | " return __nv_vabsdiffu4(__a, __b);\n" |
1662 | "}\n" |
1663 | "__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n" |
1664 | " return __nv_vabsss2(__a);\n" |
1665 | "}\n" |
1666 | "__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n" |
1667 | " return __nv_vabsss4(__a);\n" |
1668 | "}\n" |
1669 | "__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n" |
1670 | " return __nv_vadd2(__a, __b);\n" |
1671 | "}\n" |
1672 | "__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n" |
1673 | " return __nv_vadd4(__a, __b);\n" |
1674 | "}\n" |
1675 | "__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n" |
1676 | " return __nv_vaddss2(__a, __b);\n" |
1677 | "}\n" |
1678 | "__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n" |
1679 | " return __nv_vaddss4(__a, __b);\n" |
1680 | "}\n" |
1681 | "__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n" |
1682 | " return __nv_vaddus2(__a, __b);\n" |
1683 | "}\n" |
1684 | "__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n" |
1685 | " return __nv_vaddus4(__a, __b);\n" |
1686 | "}\n" |
1687 | "__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n" |
1688 | " return __nv_vavgs2(__a, __b);\n" |
1689 | "}\n" |
1690 | "__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n" |
1691 | " return __nv_vavgs4(__a, __b);\n" |
1692 | "}\n" |
1693 | "__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n" |
1694 | " return __nv_vavgu2(__a, __b);\n" |
1695 | "}\n" |
1696 | "__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n" |
1697 | " return __nv_vavgu4(__a, __b);\n" |
1698 | "}\n" |
1699 | "__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n" |
1700 | " return __nv_vcmpeq2(__a, __b);\n" |
1701 | "}\n" |
1702 | "__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n" |
1703 | " return __nv_vcmpeq4(__a, __b);\n" |
1704 | "}\n" |
1705 | "__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n" |
1706 | " return __nv_vcmpges2(__a, __b);\n" |
1707 | "}\n" |
1708 | "__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n" |
1709 | " return __nv_vcmpges4(__a, __b);\n" |
1710 | "}\n" |
1711 | "__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n" |
1712 | " return __nv_vcmpgeu2(__a, __b);\n" |
1713 | "}\n" |
1714 | "__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n" |
1715 | " return __nv_vcmpgeu4(__a, __b);\n" |
1716 | "}\n" |
1717 | "__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n" |
1718 | " return __nv_vcmpgts2(__a, __b);\n" |
1719 | "}\n" |
1720 | "__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n" |
1721 | " return __nv_vcmpgts4(__a, __b);\n" |
1722 | "}\n" |
1723 | "__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n" |
1724 | " return __nv_vcmpgtu2(__a, __b);\n" |
1725 | "}\n" |
1726 | "__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n" |
1727 | " return __nv_vcmpgtu4(__a, __b);\n" |
1728 | "}\n" |
1729 | "__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n" |
1730 | " return __nv_vcmples2(__a, __b);\n" |
1731 | "}\n" |
1732 | "__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n" |
1733 | " return __nv_vcmples4(__a, __b);\n" |
1734 | "}\n" |
1735 | "__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n" |
1736 | " return __nv_vcmpleu2(__a, __b);\n" |
1737 | "}\n" |
1738 | "__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n" |
1739 | " return __nv_vcmpleu4(__a, __b);\n" |
1740 | "}\n" |
1741 | "__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n" |
1742 | " return __nv_vcmplts2(__a, __b);\n" |
1743 | "}\n" |
1744 | "__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n" |
1745 | " return __nv_vcmplts4(__a, __b);\n" |
1746 | "}\n" |
1747 | "__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n" |
1748 | " return __nv_vcmpltu2(__a, __b);\n" |
1749 | "}\n" |
1750 | "__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n" |
1751 | " return __nv_vcmpltu4(__a, __b);\n" |
1752 | "}\n" |
1753 | "__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n" |
1754 | " return __nv_vcmpne2(__a, __b);\n" |
1755 | "}\n" |
1756 | "__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n" |
1757 | " return __nv_vcmpne4(__a, __b);\n" |
1758 | "}\n" |
1759 | "__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n" |
1760 | " return __nv_vhaddu2(__a, __b);\n" |
1761 | "}\n" |
1762 | "__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n" |
1763 | " return __nv_vhaddu4(__a, __b);\n" |
1764 | "}\n" |
1765 | "__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n" |
1766 | " return __nv_vmaxs2(__a, __b);\n" |
1767 | "}\n" |
1768 | "__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n" |
1769 | " return __nv_vmaxs4(__a, __b);\n" |
1770 | "}\n" |
1771 | "__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n" |
1772 | " return __nv_vmaxu2(__a, __b);\n" |
1773 | "}\n" |
1774 | "__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n" |
1775 | " return __nv_vmaxu4(__a, __b);\n" |
1776 | "}\n" |
1777 | "__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n" |
1778 | " return __nv_vmins2(__a, __b);\n" |
1779 | "}\n" |
1780 | "__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n" |
1781 | " return __nv_vmins4(__a, __b);\n" |
1782 | "}\n" |
1783 | "__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n" |
1784 | " return __nv_vminu2(__a, __b);\n" |
1785 | "}\n" |
1786 | "__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n" |
1787 | " return __nv_vminu4(__a, __b);\n" |
1788 | "}\n" |
1789 | "__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); }\n" |
1790 | "__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); }\n" |
1791 | "__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n" |
1792 | " return __nv_vnegss2(__a);\n" |
1793 | "}\n" |
1794 | "__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n" |
1795 | " return __nv_vnegss4(__a);\n" |
1796 | "}\n" |
1797 | "__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n" |
1798 | " return __nv_vsads2(__a, __b);\n" |
1799 | "}\n" |
1800 | "__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n" |
1801 | " return __nv_vsads4(__a, __b);\n" |
1802 | "}\n" |
1803 | "__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n" |
1804 | " return __nv_vsadu2(__a, __b);\n" |
1805 | "}\n" |
1806 | "__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n" |
1807 | " return __nv_vsadu4(__a, __b);\n" |
1808 | "}\n" |
1809 | "__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n" |
1810 | " return __nv_vseteq2(__a, __b);\n" |
1811 | "}\n" |
1812 | "__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n" |
1813 | " return __nv_vseteq4(__a, __b);\n" |
1814 | "}\n" |
1815 | "__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n" |
1816 | " return __nv_vsetges2(__a, __b);\n" |
1817 | "}\n" |
1818 | "__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n" |
1819 | " return __nv_vsetges4(__a, __b);\n" |
1820 | "}\n" |
1821 | "__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n" |
1822 | " return __nv_vsetgeu2(__a, __b);\n" |
1823 | "}\n" |
1824 | "__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n" |
1825 | " return __nv_vsetgeu4(__a, __b);\n" |
1826 | "}\n" |
1827 | "__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n" |
1828 | " return __nv_vsetgts2(__a, __b);\n" |
1829 | "}\n" |
1830 | "__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n" |
1831 | " return __nv_vsetgts4(__a, __b);\n" |
1832 | "}\n" |
1833 | "__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n" |
1834 | " return __nv_vsetgtu2(__a, __b);\n" |
1835 | "}\n" |
1836 | "__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n" |
1837 | " return __nv_vsetgtu4(__a, __b);\n" |
1838 | "}\n" |
1839 | "__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n" |
1840 | " return __nv_vsetles2(__a, __b);\n" |
1841 | "}\n" |
1842 | "__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n" |
1843 | " return __nv_vsetles4(__a, __b);\n" |
1844 | "}\n" |
1845 | "__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n" |
1846 | " return __nv_vsetleu2(__a, __b);\n" |
1847 | "}\n" |
1848 | "__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n" |
1849 | " return __nv_vsetleu4(__a, __b);\n" |
1850 | "}\n" |
1851 | "__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n" |
1852 | " return __nv_vsetlts2(__a, __b);\n" |
1853 | "}\n" |
1854 | "__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n" |
1855 | " return __nv_vsetlts4(__a, __b);\n" |
1856 | "}\n" |
1857 | "__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n" |
1858 | " return __nv_vsetltu2(__a, __b);\n" |
1859 | "}\n" |
1860 | "__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n" |
1861 | " return __nv_vsetltu4(__a, __b);\n" |
1862 | "}\n" |
1863 | "__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n" |
1864 | " return __nv_vsetne2(__a, __b);\n" |
1865 | "}\n" |
1866 | "__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n" |
1867 | " return __nv_vsetne4(__a, __b);\n" |
1868 | "}\n" |
1869 | "__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n" |
1870 | " return __nv_vsub2(__a, __b);\n" |
1871 | "}\n" |
1872 | "__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n" |
1873 | " return __nv_vsub4(__a, __b);\n" |
1874 | "}\n" |
1875 | "__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n" |
1876 | " return __nv_vsubss2(__a, __b);\n" |
1877 | "}\n" |
1878 | "__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n" |
1879 | " return __nv_vsubss4(__a, __b);\n" |
1880 | "}\n" |
1881 | "__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n" |
1882 | " return __nv_vsubus2(__a, __b);\n" |
1883 | "}\n" |
1884 | "__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n" |
1885 | " return __nv_vsubus4(__a, __b);\n" |
1886 | "}\n" |
1887 | "#else // CUDA_VERSION >= 9020\n" |
1888 | "// CUDA no longer provides inline assembly (or bitcode) implementation of these\n" |
1889 | "// functions, so we have to reimplment them. The implementation is naive and is\n" |
1890 | "// not optimized for performance.\n" |
1891 | "\n" |
1892 | "// Helper function to convert N-bit boolean subfields into all-0 or all-1.\n" |
1893 | "// E.g. __bool2mask(0x01000100,8) -> 0xff00ff00\n" |
1894 | "// __bool2mask(0x00010000,16) -> 0xffff0000\n" |
1895 | "__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {\n" |
1896 | " return (__a << shift) - __a;\n" |
1897 | "}\n" |
1898 | "__DEVICE__ unsigned int __vabs2(unsigned int __a) {\n" |
1899 | " unsigned int r;\n" |
1900 | " asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n" |
1901 | " : \"=r\"(r)\n" |
1902 | " : \"r\"(__a), \"r\"(0), \"r\"(0));\n" |
1903 | " return r;\n" |
1904 | "}\n" |
1905 | "__DEVICE__ unsigned int __vabs4(unsigned int __a) {\n" |
1906 | " unsigned int r;\n" |
1907 | " asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n" |
1908 | " : \"=r\"(r)\n" |
1909 | " : \"r\"(__a), \"r\"(0), \"r\"(0));\n" |
1910 | " return r;\n" |
1911 | "}\n" |
1912 | "__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n" |
1913 | " unsigned int r;\n" |
1914 | " asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n" |
1915 | " : \"=r\"(r)\n" |
1916 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1917 | " return r;\n" |
1918 | "}\n" |
1919 | "\n" |
1920 | "__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n" |
1921 | " unsigned int r;\n" |
1922 | " asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n" |
1923 | " : \"=r\"(r)\n" |
1924 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1925 | " return r;\n" |
1926 | "}\n" |
1927 | "__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n" |
1928 | " unsigned int r;\n" |
1929 | " asm(\"vabsdiff2.u32.u32.u32 %0,%1,%2,%3;\"\n" |
1930 | " : \"=r\"(r)\n" |
1931 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1932 | " return r;\n" |
1933 | "}\n" |
1934 | "__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n" |
1935 | " unsigned int r;\n" |
1936 | " asm(\"vabsdiff4.u32.u32.u32 %0,%1,%2,%3;\"\n" |
1937 | " : \"=r\"(r)\n" |
1938 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1939 | " return r;\n" |
1940 | "}\n" |
1941 | "__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n" |
1942 | " unsigned int r;\n" |
1943 | " asm(\"vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
1944 | " : \"=r\"(r)\n" |
1945 | " : \"r\"(__a), \"r\"(0), \"r\"(0));\n" |
1946 | " return r;\n" |
1947 | "}\n" |
1948 | "__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n" |
1949 | " unsigned int r;\n" |
1950 | " asm(\"vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
1951 | " : \"=r\"(r)\n" |
1952 | " : \"r\"(__a), \"r\"(0), \"r\"(0));\n" |
1953 | " return r;\n" |
1954 | "}\n" |
1955 | "__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n" |
1956 | " unsigned int r;\n" |
1957 | " asm(\"vadd2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1958 | " return r;\n" |
1959 | "}\n" |
1960 | "__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n" |
1961 | " unsigned int r;\n" |
1962 | " asm(\"vadd4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1963 | " return r;\n" |
1964 | "}\n" |
1965 | "__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n" |
1966 | " unsigned int r;\n" |
1967 | " asm(\"vadd2.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
1968 | " : \"=r\"(r)\n" |
1969 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1970 | " return r;\n" |
1971 | "}\n" |
1972 | "__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n" |
1973 | " unsigned int r;\n" |
1974 | " asm(\"vadd4.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
1975 | " : \"=r\"(r)\n" |
1976 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1977 | " return r;\n" |
1978 | "}\n" |
1979 | "__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n" |
1980 | " unsigned int r;\n" |
1981 | " asm(\"vadd2.u32.u32.u32.sat %0,%1,%2,%3;\"\n" |
1982 | " : \"=r\"(r)\n" |
1983 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1984 | " return r;\n" |
1985 | "}\n" |
1986 | "__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n" |
1987 | " unsigned int r;\n" |
1988 | " asm(\"vadd4.u32.u32.u32.sat %0,%1,%2,%3;\"\n" |
1989 | " : \"=r\"(r)\n" |
1990 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1991 | " return r;\n" |
1992 | "}\n" |
1993 | "__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n" |
1994 | " unsigned int r;\n" |
1995 | " asm(\"vavrg2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
1996 | " return r;\n" |
1997 | "}\n" |
1998 | "__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n" |
1999 | " unsigned int r;\n" |
2000 | " asm(\"vavrg4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2001 | " return r;\n" |
2002 | "}\n" |
2003 | "__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n" |
2004 | " unsigned int r;\n" |
2005 | " asm(\"vavrg2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2006 | " return r;\n" |
2007 | "}\n" |
2008 | "__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n" |
2009 | " unsigned int r;\n" |
2010 | " asm(\"vavrg4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2011 | " return r;\n" |
2012 | "}\n" |
2013 | "__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n" |
2014 | " unsigned int r;\n" |
2015 | " asm(\"vset2.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2016 | " return r;\n" |
2017 | "}\n" |
2018 | "__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n" |
2019 | " return __bool2mask(__vseteq2(__a, __b), 16);\n" |
2020 | "}\n" |
2021 | "__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n" |
2022 | " unsigned int r;\n" |
2023 | " asm(\"vset4.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2024 | " return r;\n" |
2025 | "}\n" |
2026 | "__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n" |
2027 | " return __bool2mask(__vseteq4(__a, __b), 8);\n" |
2028 | "}\n" |
2029 | "__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n" |
2030 | " unsigned int r;\n" |
2031 | " asm(\"vset2.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2032 | " return r;\n" |
2033 | "}\n" |
2034 | "__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n" |
2035 | " return __bool2mask(__vsetges2(__a, __b), 16);\n" |
2036 | "}\n" |
2037 | "__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n" |
2038 | " unsigned int r;\n" |
2039 | " asm(\"vset4.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2040 | " return r;\n" |
2041 | "}\n" |
2042 | "__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n" |
2043 | " return __bool2mask(__vsetges4(__a, __b), 8);\n" |
2044 | "}\n" |
2045 | "__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n" |
2046 | " unsigned int r;\n" |
2047 | " asm(\"vset2.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2048 | " return r;\n" |
2049 | "}\n" |
2050 | "__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n" |
2051 | " return __bool2mask(__vsetgeu2(__a, __b), 16);\n" |
2052 | "}\n" |
2053 | "__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n" |
2054 | " unsigned int r;\n" |
2055 | " asm(\"vset4.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2056 | " return r;\n" |
2057 | "}\n" |
2058 | "__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n" |
2059 | " return __bool2mask(__vsetgeu4(__a, __b), 8);\n" |
2060 | "}\n" |
2061 | "__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n" |
2062 | " unsigned int r;\n" |
2063 | " asm(\"vset2.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2064 | " return r;\n" |
2065 | "}\n" |
2066 | "__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n" |
2067 | " return __bool2mask(__vsetgts2(__a, __b), 16);\n" |
2068 | "}\n" |
2069 | "__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n" |
2070 | " unsigned int r;\n" |
2071 | " asm(\"vset4.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2072 | " return r;\n" |
2073 | "}\n" |
2074 | "__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n" |
2075 | " return __bool2mask(__vsetgts4(__a, __b), 8);\n" |
2076 | "}\n" |
2077 | "__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n" |
2078 | " unsigned int r;\n" |
2079 | " asm(\"vset2.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2080 | " return r;\n" |
2081 | "}\n" |
2082 | "__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n" |
2083 | " return __bool2mask(__vsetgtu2(__a, __b), 16);\n" |
2084 | "}\n" |
2085 | "__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n" |
2086 | " unsigned int r;\n" |
2087 | " asm(\"vset4.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2088 | " return r;\n" |
2089 | "}\n" |
2090 | "__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n" |
2091 | " return __bool2mask(__vsetgtu4(__a, __b), 8);\n" |
2092 | "}\n" |
2093 | "__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n" |
2094 | " unsigned int r;\n" |
2095 | " asm(\"vset2.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2096 | " return r;\n" |
2097 | "}\n" |
2098 | "__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n" |
2099 | " return __bool2mask(__vsetles2(__a, __b), 16);\n" |
2100 | "}\n" |
2101 | "__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n" |
2102 | " unsigned int r;\n" |
2103 | " asm(\"vset4.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2104 | " return r;\n" |
2105 | "}\n" |
2106 | "__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n" |
2107 | " return __bool2mask(__vsetles4(__a, __b), 8);\n" |
2108 | "}\n" |
2109 | "__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n" |
2110 | " unsigned int r;\n" |
2111 | " asm(\"vset2.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2112 | " return r;\n" |
2113 | "}\n" |
2114 | "__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n" |
2115 | " return __bool2mask(__vsetleu2(__a, __b), 16);\n" |
2116 | "}\n" |
2117 | "__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n" |
2118 | " unsigned int r;\n" |
2119 | " asm(\"vset4.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2120 | " return r;\n" |
2121 | "}\n" |
2122 | "__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n" |
2123 | " return __bool2mask(__vsetleu4(__a, __b), 8);\n" |
2124 | "}\n" |
2125 | "__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n" |
2126 | " unsigned int r;\n" |
2127 | " asm(\"vset2.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2128 | " return r;\n" |
2129 | "}\n" |
2130 | "__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n" |
2131 | " return __bool2mask(__vsetlts2(__a, __b), 16);\n" |
2132 | "}\n" |
2133 | "__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n" |
2134 | " unsigned int r;\n" |
2135 | " asm(\"vset4.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2136 | " return r;\n" |
2137 | "}\n" |
2138 | "__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n" |
2139 | " return __bool2mask(__vsetlts4(__a, __b), 8);\n" |
2140 | "}\n" |
2141 | "__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n" |
2142 | " unsigned int r;\n" |
2143 | " asm(\"vset2.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2144 | " return r;\n" |
2145 | "}\n" |
2146 | "__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n" |
2147 | " return __bool2mask(__vsetltu2(__a, __b), 16);\n" |
2148 | "}\n" |
2149 | "__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n" |
2150 | " unsigned int r;\n" |
2151 | " asm(\"vset4.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2152 | " return r;\n" |
2153 | "}\n" |
2154 | "__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n" |
2155 | " return __bool2mask(__vsetltu4(__a, __b), 8);\n" |
2156 | "}\n" |
2157 | "__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n" |
2158 | " unsigned int r;\n" |
2159 | " asm(\"vset2.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2160 | " return r;\n" |
2161 | "}\n" |
2162 | "__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n" |
2163 | " return __bool2mask(__vsetne2(__a, __b), 16);\n" |
2164 | "}\n" |
2165 | "__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n" |
2166 | " unsigned int r;\n" |
2167 | " asm(\"vset4.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2168 | " return r;\n" |
2169 | "}\n" |
2170 | "__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n" |
2171 | " return __bool2mask(__vsetne4(__a, __b), 8);\n" |
2172 | "}\n" |
2173 | "\n" |
2174 | "// Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086\n" |
2175 | "// (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) =>\n" |
2176 | "// (a + b) / 2 = ((a ^ b) >> 1) + (a & b)\n" |
2177 | "// To operate on multiple sub-elements we need to make sure to mask out bits\n" |
2178 | "// that crossed over into adjacent elements during the shift.\n" |
2179 | "__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n" |
2180 | " return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b);\n" |
2181 | "}\n" |
2182 | "__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n" |
2183 | " return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b);\n" |
2184 | "}\n" |
2185 | "\n" |
2186 | "__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n" |
2187 | " unsigned int r;\n" |
2188 | " if ((__a & 0x8000) && (__b & 0x8000)) {\n" |
2189 | " // Work around a bug in ptxas which produces invalid result if low element\n" |
2190 | " // is negative.\n" |
2191 | " unsigned mask = __vcmpgts2(__a, __b);\n" |
2192 | " r = (__a & mask) | (__b & ~mask);\n" |
2193 | " } else {\n" |
2194 | " asm(\"vmax2.s32.s32.s32 %0,%1,%2,%3;\"\n" |
2195 | " : \"=r\"(r)\n" |
2196 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2197 | " }\n" |
2198 | " return r;\n" |
2199 | "}\n" |
2200 | "__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n" |
2201 | " unsigned int r;\n" |
2202 | " asm(\"vmax4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2203 | " return r;\n" |
2204 | "}\n" |
2205 | "__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n" |
2206 | " unsigned int r;\n" |
2207 | " asm(\"vmax2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2208 | " return r;\n" |
2209 | "}\n" |
2210 | "__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n" |
2211 | " unsigned int r;\n" |
2212 | " asm(\"vmax4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2213 | " return r;\n" |
2214 | "}\n" |
2215 | "__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n" |
2216 | " unsigned int r;\n" |
2217 | " asm(\"vmin2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2218 | " return r;\n" |
2219 | "}\n" |
2220 | "__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n" |
2221 | " unsigned int r;\n" |
2222 | " asm(\"vmin4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2223 | " return r;\n" |
2224 | "}\n" |
2225 | "__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n" |
2226 | " unsigned int r;\n" |
2227 | " asm(\"vmin2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2228 | " return r;\n" |
2229 | "}\n" |
2230 | "__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n" |
2231 | " unsigned int r;\n" |
2232 | " asm(\"vmin4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2233 | " return r;\n" |
2234 | "}\n" |
2235 | "__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n" |
2236 | " unsigned int r;\n" |
2237 | " asm(\"vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;\"\n" |
2238 | " : \"=r\"(r)\n" |
2239 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2240 | " return r;\n" |
2241 | "}\n" |
2242 | "__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n" |
2243 | " unsigned int r;\n" |
2244 | " asm(\"vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;\"\n" |
2245 | " : \"=r\"(r)\n" |
2246 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2247 | " return r;\n" |
2248 | "}\n" |
2249 | "__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n" |
2250 | " unsigned int r;\n" |
2251 | " asm(\"vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;\"\n" |
2252 | " : \"=r\"(r)\n" |
2253 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2254 | " return r;\n" |
2255 | "}\n" |
2256 | "__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n" |
2257 | " unsigned int r;\n" |
2258 | " asm(\"vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;\"\n" |
2259 | " : \"=r\"(r)\n" |
2260 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2261 | " return r;\n" |
2262 | "}\n" |
2263 | "\n" |
2264 | "__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n" |
2265 | " unsigned int r;\n" |
2266 | " asm(\"vsub2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2267 | " return r;\n" |
2268 | "}\n" |
2269 | "__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }\n" |
2270 | "\n" |
2271 | "__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n" |
2272 | " unsigned int r;\n" |
2273 | " asm(\"vsub4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2274 | " return r;\n" |
2275 | "}\n" |
2276 | "__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }\n" |
2277 | "__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n" |
2278 | " unsigned int r;\n" |
2279 | " asm(\"vsub2.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
2280 | " : \"=r\"(r)\n" |
2281 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2282 | " return r;\n" |
2283 | "}\n" |
2284 | "__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n" |
2285 | " return __vsubss2(0, __a);\n" |
2286 | "}\n" |
2287 | "__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n" |
2288 | " unsigned int r;\n" |
2289 | " asm(\"vsub4.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
2290 | " : \"=r\"(r)\n" |
2291 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2292 | " return r;\n" |
2293 | "}\n" |
2294 | "__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n" |
2295 | " return __vsubss4(0, __a);\n" |
2296 | "}\n" |
2297 | "__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n" |
2298 | " unsigned int r;\n" |
2299 | " asm(\"vsub2.u32.u32.u32.sat %0,%1,%2,%3;\"\n" |
2300 | " : \"=r\"(r)\n" |
2301 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2302 | " return r;\n" |
2303 | "}\n" |
2304 | "__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n" |
2305 | " unsigned int r;\n" |
2306 | " asm(\"vsub4.u32.u32.u32.sat %0,%1,%2,%3;\"\n" |
2307 | " : \"=r\"(r)\n" |
2308 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
2309 | " return r;\n" |
2310 | "}\n" |
2311 | "#endif // CUDA_VERSION >= 9020\n" |
2312 | "__DEVICE__ int abs(int __a) { return __nv_abs(__a); }\n" |
2313 | "__DEVICE__ double acos(double __a) { return __nv_acos(__a); }\n" |
2314 | "__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }\n" |
2315 | "__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }\n" |
2316 | "__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }\n" |
2317 | "__DEVICE__ double asin(double __a) { return __nv_asin(__a); }\n" |
2318 | "__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }\n" |
2319 | "__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }\n" |
2320 | "__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }\n" |
2321 | "__DEVICE__ double atan(double __a) { return __nv_atan(__a); }\n" |
2322 | "__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }\n" |
2323 | "__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }\n" |
2324 | "__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }\n" |
2325 | "__DEVICE__ double atanh(double __a) { return __nv_atanh(__a); }\n" |
2326 | "__DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); }\n" |
2327 | "__DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }\n" |
2328 | "__DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }\n" |
2329 | "__DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }\n" |
2330 | "__DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }\n" |
2331 | "__DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }\n" |
2332 | "__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }\n" |
2333 | "__DEVICE__ double copysign(double __a, double __b) {\n" |
2334 | " return __nv_copysign(__a, __b);\n" |
2335 | "}\n" |
2336 | "__DEVICE__ float copysignf(float __a, float __b) {\n" |
2337 | " return __nv_copysignf(__a, __b);\n" |
2338 | "}\n" |
2339 | "__DEVICE__ double cos(double __a) { return __nv_cos(__a); }\n" |
2340 | "__DEVICE__ float cosf(float __a) {\n" |
2341 | " return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a);\n" |
2342 | "}\n" |
2343 | "__DEVICE__ double cosh(double __a) { return __nv_cosh(__a); }\n" |
2344 | "__DEVICE__ float coshf(float __a) { return __nv_coshf(__a); }\n" |
2345 | "__DEVICE__ double cospi(double __a) { return __nv_cospi(__a); }\n" |
2346 | "__DEVICE__ float cospif(float __a) { return __nv_cospif(__a); }\n" |
2347 | "__DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); }\n" |
2348 | "__DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); }\n" |
2349 | "__DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); }\n" |
2350 | "__DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); }\n" |
2351 | "__DEVICE__ double erf(double __a) { return __nv_erf(__a); }\n" |
2352 | "__DEVICE__ double erfc(double __a) { return __nv_erfc(__a); }\n" |
2353 | "__DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); }\n" |
2354 | "__DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); }\n" |
2355 | "__DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); }\n" |
2356 | "__DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); }\n" |
2357 | "__DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); }\n" |
2358 | "__DEVICE__ float erff(float __a) { return __nv_erff(__a); }\n" |
2359 | "__DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); }\n" |
2360 | "__DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); }\n" |
2361 | "__DEVICE__ double exp(double __a) { return __nv_exp(__a); }\n" |
2362 | "__DEVICE__ double exp10(double __a) { return __nv_exp10(__a); }\n" |
2363 | "__DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); }\n" |
2364 | "__DEVICE__ double exp2(double __a) { return __nv_exp2(__a); }\n" |
2365 | "__DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }\n" |
2366 | "__DEVICE__ float expf(float __a) { return __nv_expf(__a); }\n" |
2367 | "__DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }\n" |
2368 | "__DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }\n" |
2369 | "__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }\n" |
2370 | "__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }\n" |
2371 | "__DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }\n" |
2372 | "__DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }\n" |
2373 | "__DEVICE__ double fdivide(double __a, double __b) { return __a / __b; }\n" |
2374 | "__DEVICE__ float fdividef(float __a, float __b) {\n" |
2375 | "#if __FAST_MATH__ && !__CUDA_PREC_DIV\n" |
2376 | " return __nv_fast_fdividef(__a, __b);\n" |
2377 | "#else\n" |
2378 | " return __a / __b;\n" |
2379 | "#endif\n" |
2380 | "}\n" |
2381 | "__DEVICE__ double floor(double __f) { return __nv_floor(__f); }\n" |
2382 | "__DEVICE__ float floorf(float __f) { return __nv_floorf(__f); }\n" |
2383 | "__DEVICE__ double fma(double __a, double __b, double __c) {\n" |
2384 | " return __nv_fma(__a, __b, __c);\n" |
2385 | "}\n" |
2386 | "__DEVICE__ float fmaf(float __a, float __b, float __c) {\n" |
2387 | " return __nv_fmaf(__a, __b, __c);\n" |
2388 | "}\n" |
2389 | "__DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); }\n" |
2390 | "__DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); }\n" |
2391 | "__DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); }\n" |
2392 | "__DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); }\n" |
2393 | "__DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); }\n" |
2394 | "__DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); }\n" |
2395 | "__DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); }\n" |
2396 | "__DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); }\n" |
2397 | "__DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); }\n" |
2398 | "__DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); }\n" |
2399 | "__DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); }\n" |
2400 | "__DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); }\n" |
2401 | "__DEVICE__ double j0(double __a) { return __nv_j0(__a); }\n" |
2402 | "__DEVICE__ float j0f(float __a) { return __nv_j0f(__a); }\n" |
2403 | "__DEVICE__ double j1(double __a) { return __nv_j1(__a); }\n" |
2404 | "__DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }\n" |
2405 | "__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }\n" |
2406 | "__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }\n" |
2407 | "#if defined(__LP64__)\n" |
2408 | "__DEVICE__ long labs(long __a) { return llabs(__a); };\n" |
2409 | "#else\n" |
2410 | "__DEVICE__ long labs(long __a) { return __nv_abs(__a); };\n" |
2411 | "#endif\n" |
2412 | "__DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }\n" |
2413 | "__DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }\n" |
2414 | "__DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }\n" |
2415 | "__DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }\n" |
2416 | "__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }\n" |
2417 | "__DEVICE__ long long llmax(long long __a, long long __b) {\n" |
2418 | " return __nv_llmax(__a, __b);\n" |
2419 | "}\n" |
2420 | "__DEVICE__ long long llmin(long long __a, long long __b) {\n" |
2421 | " return __nv_llmin(__a, __b);\n" |
2422 | "}\n" |
2423 | "__DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); }\n" |
2424 | "__DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); }\n" |
2425 | "__DEVICE__ long long llround(double __a) { return __nv_llround(__a); }\n" |
2426 | "__DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); }\n" |
2427 | "__DEVICE__ double log(double __a) { return __nv_log(__a); }\n" |
2428 | "__DEVICE__ double log10(double __a) { return __nv_log10(__a); }\n" |
2429 | "__DEVICE__ float log10f(float __a) { return __nv_log10f(__a); }\n" |
2430 | "__DEVICE__ double log1p(double __a) { return __nv_log1p(__a); }\n" |
2431 | "__DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); }\n" |
2432 | "__DEVICE__ double log2(double __a) { return __nv_log2(__a); }\n" |
2433 | "__DEVICE__ float log2f(float __a) {\n" |
2434 | " return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a);\n" |
2435 | "}\n" |
2436 | "__DEVICE__ double logb(double __a) { return __nv_logb(__a); }\n" |
2437 | "__DEVICE__ float logbf(float __a) { return __nv_logbf(__a); }\n" |
2438 | "__DEVICE__ float logf(float __a) {\n" |
2439 | " return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);\n" |
2440 | "}\n" |
2441 | "#if defined(__LP64__)\n" |
2442 | "__DEVICE__ long lrint(double __a) { return llrint(__a); }\n" |
2443 | "__DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }\n" |
2444 | "__DEVICE__ long lround(double __a) { return llround(__a); }\n" |
2445 | "__DEVICE__ long lroundf(float __a) { return llroundf(__a); }\n" |
2446 | "#else\n" |
2447 | "__DEVICE__ long lrint(double __a) { return (long)rint(__a); }\n" |
2448 | "__DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); }\n" |
2449 | "__DEVICE__ long lround(double __a) { return round(__a); }\n" |
2450 | "__DEVICE__ long lroundf(float __a) { return roundf(__a); }\n" |
2451 | "#endif\n" |
2452 | "__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }\n" |
2453 | "__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {\n" |
2454 | " return __builtin_memcpy(__a, __b, __c);\n" |
2455 | "}\n" |
2456 | "__DEVICE__ void *memset(void *__a, int __b, size_t __c) {\n" |
2457 | " return __builtin_memset(__a, __b, __c);\n" |
2458 | "}\n" |
2459 | "__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }\n" |
2460 | "__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }\n" |
2461 | "__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }\n" |
2462 | "__DEVICE__ double nearbyint(double __a) { return __nv_nearbyint(__a); }\n" |
2463 | "__DEVICE__ float nearbyintf(float __a) { return __nv_nearbyintf(__a); }\n" |
2464 | "__DEVICE__ double nextafter(double __a, double __b) {\n" |
2465 | " return __nv_nextafter(__a, __b);\n" |
2466 | "}\n" |
2467 | "__DEVICE__ float nextafterf(float __a, float __b) {\n" |
2468 | " return __nv_nextafterf(__a, __b);\n" |
2469 | "}\n" |
2470 | "__DEVICE__ double norm(int __dim, const double *__t) {\n" |
2471 | " return __nv_norm(__dim, __t);\n" |
2472 | "}\n" |
2473 | "__DEVICE__ double norm3d(double __a, double __b, double __c) {\n" |
2474 | " return __nv_norm3d(__a, __b, __c);\n" |
2475 | "}\n" |
2476 | "__DEVICE__ float norm3df(float __a, float __b, float __c) {\n" |
2477 | " return __nv_norm3df(__a, __b, __c);\n" |
2478 | "}\n" |
2479 | "__DEVICE__ double norm4d(double __a, double __b, double __c, double __d) {\n" |
2480 | " return __nv_norm4d(__a, __b, __c, __d);\n" |
2481 | "}\n" |
2482 | "__DEVICE__ float norm4df(float __a, float __b, float __c, float __d) {\n" |
2483 | " return __nv_norm4df(__a, __b, __c, __d);\n" |
2484 | "}\n" |
2485 | "__DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); }\n" |
2486 | "__DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); }\n" |
2487 | "__DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); }\n" |
2488 | "__DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); }\n" |
2489 | "__DEVICE__ float normf(int __dim, const float *__t) {\n" |
2490 | " return __nv_normf(__dim, __t);\n" |
2491 | "}\n" |
2492 | "__DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); }\n" |
2493 | "__DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); }\n" |
2494 | "__DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); }\n" |
2495 | "__DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); }\n" |
2496 | "__DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); }\n" |
2497 | "__DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); }\n" |
2498 | "__DEVICE__ double remainder(double __a, double __b) {\n" |
2499 | " return __nv_remainder(__a, __b);\n" |
2500 | "}\n" |
2501 | "__DEVICE__ float remainderf(float __a, float __b) {\n" |
2502 | " return __nv_remainderf(__a, __b);\n" |
2503 | "}\n" |
2504 | "__DEVICE__ double remquo(double __a, double __b, int *__c) {\n" |
2505 | " return __nv_remquo(__a, __b, __c);\n" |
2506 | "}\n" |
2507 | "__DEVICE__ float remquof(float __a, float __b, int *__c) {\n" |
2508 | " return __nv_remquof(__a, __b, __c);\n" |
2509 | "}\n" |
2510 | "__DEVICE__ double rhypot(double __a, double __b) {\n" |
2511 | " return __nv_rhypot(__a, __b);\n" |
2512 | "}\n" |
2513 | "__DEVICE__ float rhypotf(float __a, float __b) {\n" |
2514 | " return __nv_rhypotf(__a, __b);\n" |
2515 | "}\n" |
2516 | "__DEVICE__ double rint(double __a) { return __nv_rint(__a); }\n" |
2517 | "__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); }\n" |
2518 | "__DEVICE__ double rnorm(int __a, const double *__b) {\n" |
2519 | " return __nv_rnorm(__a, __b);\n" |
2520 | "}\n" |
2521 | "__DEVICE__ double rnorm3d(double __a, double __b, double __c) {\n" |
2522 | " return __nv_rnorm3d(__a, __b, __c);\n" |
2523 | "}\n" |
2524 | "__DEVICE__ float rnorm3df(float __a, float __b, float __c) {\n" |
2525 | " return __nv_rnorm3df(__a, __b, __c);\n" |
2526 | "}\n" |
2527 | "__DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) {\n" |
2528 | " return __nv_rnorm4d(__a, __b, __c, __d);\n" |
2529 | "}\n" |
2530 | "__DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) {\n" |
2531 | " return __nv_rnorm4df(__a, __b, __c, __d);\n" |
2532 | "}\n" |
2533 | "__DEVICE__ float rnormf(int __dim, const float *__t) {\n" |
2534 | " return __nv_rnormf(__dim, __t);\n" |
2535 | "}\n" |
2536 | "__DEVICE__ double round(double __a) { return __nv_round(__a); }\n" |
2537 | "__DEVICE__ float roundf(float __a) { return __nv_roundf(__a); }\n" |
2538 | "__DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }\n" |
2539 | "__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }\n" |
2540 | "__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }\n" |
2541 | "__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }\n" |
2542 | "__DEVICE__ double scalbln(double __a, long __b) {\n" |
2543 | " if (__b > INT_MAX)\n" |
2544 | " return __a > 0 ? HUGE_VAL : -HUGE_VAL;\n" |
2545 | " if (__b < INT_MIN)\n" |
2546 | " return __a > 0 ? 0.0 : -0.0;\n" |
2547 | " return scalbn(__a, (int)__b);\n" |
2548 | "}\n" |
2549 | "__DEVICE__ float scalblnf(float __a, long __b) {\n" |
2550 | " if (__b > INT_MAX)\n" |
2551 | " return __a > 0 ? HUGE_VALF : -HUGE_VALF;\n" |
2552 | " if (__b < INT_MIN)\n" |
2553 | " return __a > 0 ? 0.f : -0.f;\n" |
2554 | " return scalbnf(__a, (int)__b);\n" |
2555 | "}\n" |
2556 | "__DEVICE__ double sin(double __a) { return __nv_sin(__a); }\n" |
2557 | "__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {\n" |
2558 | " return __nv_sincos(__a, __sptr, __cptr);\n" |
2559 | "}\n" |
2560 | "__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) {\n" |
2561 | " return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr);\n" |
2562 | "}\n" |
2563 | "__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) {\n" |
2564 | " return __nv_sincospi(__a, __sptr, __cptr);\n" |
2565 | "}\n" |
2566 | "__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) {\n" |
2567 | " return __nv_sincospif(__a, __sptr, __cptr);\n" |
2568 | "}\n" |
2569 | "__DEVICE__ float sinf(float __a) {\n" |
2570 | " return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);\n" |
2571 | "}\n" |
2572 | "__DEVICE__ double sinh(double __a) { return __nv_sinh(__a); }\n" |
2573 | "__DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); }\n" |
2574 | "__DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); }\n" |
2575 | "__DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); }\n" |
2576 | "__DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); }\n" |
2577 | "__DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); }\n" |
2578 | "__DEVICE__ double tan(double __a) { return __nv_tan(__a); }\n" |
2579 | "__DEVICE__ float tanf(float __a) { return __nv_tanf(__a); }\n" |
2580 | "__DEVICE__ double tanh(double __a) { return __nv_tanh(__a); }\n" |
2581 | "__DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); }\n" |
2582 | "__DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); }\n" |
2583 | "__DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); }\n" |
2584 | "__DEVICE__ double trunc(double __a) { return __nv_trunc(__a); }\n" |
2585 | "__DEVICE__ float truncf(float __a) { return __nv_truncf(__a); }\n" |
2586 | "__DEVICE__ unsigned long long ullmax(unsigned long long __a,\n" |
2587 | " unsigned long long __b) {\n" |
2588 | " return __nv_ullmax(__a, __b);\n" |
2589 | "}\n" |
2590 | "__DEVICE__ unsigned long long ullmin(unsigned long long __a,\n" |
2591 | " unsigned long long __b) {\n" |
2592 | " return __nv_ullmin(__a, __b);\n" |
2593 | "}\n" |
2594 | "__DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) {\n" |
2595 | " return __nv_umax(__a, __b);\n" |
2596 | "}\n" |
2597 | "__DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) {\n" |
2598 | " return __nv_umin(__a, __b);\n" |
2599 | "}\n" |
2600 | "__DEVICE__ double y0(double __a) { return __nv_y0(__a); }\n" |
2601 | "__DEVICE__ float y0f(float __a) { return __nv_y0f(__a); }\n" |
2602 | "__DEVICE__ double y1(double __a) { return __nv_y1(__a); }\n" |
2603 | "__DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }\n" |
2604 | "__DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }\n" |
2605 | "__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }\n" |
2606 | "\n" |
2607 | "#pragma pop_macro(\"__DEVICE__\")\n" |
2608 | "#pragma pop_macro(\"__FAST_OR_SLOW\")\n" |
2609 | "#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n" |
2610 | "" } , |
2611 | { "/builtins/__clang_cuda_intrinsics.h" , "/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---===\n" |
2612 | " *\n" |
2613 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
2614 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
2615 | " * in the Software without restriction, including without limitation the rights\n" |
2616 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
2617 | " * copies of the Software, and to permit persons to whom the Software is\n" |
2618 | " * furnished to do so, subject to the following conditions:\n" |
2619 | " *\n" |
2620 | " * The above copyright notice and this permission notice shall be included in\n" |
2621 | " * all copies or substantial portions of the Software.\n" |
2622 | " *\n" |
2623 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
2624 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
2625 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
2626 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
2627 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
2628 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
2629 | " * THE SOFTWARE.\n" |
2630 | " *\n" |
2631 | " *===-----------------------------------------------------------------------===\n" |
2632 | " */\n" |
2633 | "#ifndef __CLANG_CUDA_INTRINSICS_H__\n" |
2634 | "#define __CLANG_CUDA_INTRINSICS_H__\n" |
2635 | "#ifndef __CUDA__\n" |
2636 | "#error \"This file is for CUDA compilation only.\"\n" |
2637 | "#endif\n" |
2638 | "\n" |
2639 | "// sm_30 intrinsics: __shfl_{up,down,xor}.\n" |
2640 | "\n" |
2641 | "#define __SM_30_INTRINSICS_H__\n" |
2642 | "#define __SM_30_INTRINSICS_HPP__\n" |
2643 | "\n" |
2644 | "#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n" |
2645 | "\n" |
2646 | "#pragma push_macro(\"__MAKE_SHUFFLES\")\n" |
2647 | "#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \\\n" |
2648 | " __Type) \\\n" |
2649 | " inline __device__ int __FnName(int __val, __Type __offset, \\\n" |
2650 | " int __width = warpSize) { \\\n" |
2651 | " return __IntIntrinsic(__val, __offset, \\\n" |
2652 | " ((warpSize - __width) << 8) | (__Mask)); \\\n" |
2653 | " } \\\n" |
2654 | " inline __device__ float __FnName(float __val, __Type __offset, \\\n" |
2655 | " int __width = warpSize) { \\\n" |
2656 | " return __FloatIntrinsic(__val, __offset, \\\n" |
2657 | " ((warpSize - __width) << 8) | (__Mask)); \\\n" |
2658 | " } \\\n" |
2659 | " inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \\\n" |
2660 | " int __width = warpSize) { \\\n" |
2661 | " return static_cast<unsigned int>( \\\n" |
2662 | " ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n" |
2663 | " } \\\n" |
2664 | " inline __device__ long long __FnName(long long __val, __Type __offset, \\\n" |
2665 | " int __width = warpSize) { \\\n" |
2666 | " struct __Bits { \\\n" |
2667 | " int __a, __b; \\\n" |
2668 | " }; \\\n" |
2669 | " _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n" |
2670 | " _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n" |
2671 | " __Bits __tmp; \\\n" |
2672 | " memcpy(&__val, &__tmp, sizeof(__val)); \\\n" |
2673 | " __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \\\n" |
2674 | " __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \\\n" |
2675 | " long long __ret; \\\n" |
2676 | " memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n" |
2677 | " return __ret; \\\n" |
2678 | " } \\\n" |
2679 | " inline __device__ long __FnName(long __val, __Type __offset, \\\n" |
2680 | " int __width = warpSize) { \\\n" |
2681 | " _Static_assert(sizeof(long) == sizeof(long long) || \\\n" |
2682 | " sizeof(long) == sizeof(int)); \\\n" |
2683 | " if (sizeof(long) == sizeof(long long)) { \\\n" |
2684 | " return static_cast<long>( \\\n" |
2685 | " ::__FnName(static_cast<long long>(__val), __offset, __width)); \\\n" |
2686 | " } else if (sizeof(long) == sizeof(int)) { \\\n" |
2687 | " return static_cast<long>( \\\n" |
2688 | " ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n" |
2689 | " } \\\n" |
2690 | " } \\\n" |
2691 | " inline __device__ unsigned long __FnName( \\\n" |
2692 | " unsigned long __val, __Type __offset, int __width = warpSize) { \\\n" |
2693 | " return static_cast<unsigned long>( \\\n" |
2694 | " ::__FnName(static_cast<long>(__val), __offset, __width)); \\\n" |
2695 | " } \\\n" |
2696 | " inline __device__ unsigned long long __FnName( \\\n" |
2697 | " unsigned long long __val, __Type __offset, int __width = warpSize) { \\\n" |
2698 | " return static_cast<unsigned long long>(::__FnName( \\\n" |
2699 | " static_cast<unsigned long long>(__val), __offset, __width)); \\\n" |
2700 | " } \\\n" |
2701 | " inline __device__ double __FnName(double __val, __Type __offset, \\\n" |
2702 | " int __width = warpSize) { \\\n" |
2703 | " long long __tmp; \\\n" |
2704 | " _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n" |
2705 | " memcpy(&__tmp, &__val, sizeof(__val)); \\\n" |
2706 | " __tmp = ::__FnName(__tmp, __offset, __width); \\\n" |
2707 | " double __ret; \\\n" |
2708 | " memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n" |
2709 | " return __ret; \\\n" |
2710 | " }\n" |
2711 | "\n" |
2712 | "__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);\n" |
2713 | "// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n" |
2714 | "// maxLane.\n" |
2715 | "__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,\n" |
2716 | " unsigned int);\n" |
2717 | "__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,\n" |
2718 | " unsigned int);\n" |
2719 | "__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,\n" |
2720 | " int);\n" |
2721 | "#pragma pop_macro(\"__MAKE_SHUFFLES\")\n" |
2722 | "\n" |
2723 | "#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n" |
2724 | "\n" |
2725 | "#if CUDA_VERSION >= 9000\n" |
2726 | "#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)\n" |
2727 | "// __shfl_sync_* variants available in CUDA-9\n" |
2728 | "#pragma push_macro(\"__MAKE_SYNC_SHUFFLES\")\n" |
2729 | "#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \\\n" |
2730 | " __Mask, __Type) \\\n" |
2731 | " inline __device__ int __FnName(unsigned int __mask, int __val, \\\n" |
2732 | " __Type __offset, int __width = warpSize) { \\\n" |
2733 | " return __IntIntrinsic(__mask, __val, __offset, \\\n" |
2734 | " ((warpSize - __width) << 8) | (__Mask)); \\\n" |
2735 | " } \\\n" |
2736 | " inline __device__ float __FnName(unsigned int __mask, float __val, \\\n" |
2737 | " __Type __offset, int __width = warpSize) { \\\n" |
2738 | " return __FloatIntrinsic(__mask, __val, __offset, \\\n" |
2739 | " ((warpSize - __width) << 8) | (__Mask)); \\\n" |
2740 | " } \\\n" |
2741 | " inline __device__ unsigned int __FnName(unsigned int __mask, \\\n" |
2742 | " unsigned int __val, __Type __offset, \\\n" |
2743 | " int __width = warpSize) { \\\n" |
2744 | " return static_cast<unsigned int>( \\\n" |
2745 | " ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n" |
2746 | " } \\\n" |
2747 | " inline __device__ long long __FnName(unsigned int __mask, long long __val, \\\n" |
2748 | " __Type __offset, \\\n" |
2749 | " int __width = warpSize) { \\\n" |
2750 | " struct __Bits { \\\n" |
2751 | " int __a, __b; \\\n" |
2752 | " }; \\\n" |
2753 | " _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n" |
2754 | " _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n" |
2755 | " __Bits __tmp; \\\n" |
2756 | " memcpy(&__val, &__tmp, sizeof(__val)); \\\n" |
2757 | " __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \\\n" |
2758 | " __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \\\n" |
2759 | " long long __ret; \\\n" |
2760 | " memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n" |
2761 | " return __ret; \\\n" |
2762 | " } \\\n" |
2763 | " inline __device__ unsigned long long __FnName( \\\n" |
2764 | " unsigned int __mask, unsigned long long __val, __Type __offset, \\\n" |
2765 | " int __width = warpSize) { \\\n" |
2766 | " return static_cast<unsigned long long>(::__FnName( \\\n" |
2767 | " __mask, static_cast<unsigned long long>(__val), __offset, __width)); \\\n" |
2768 | " } \\\n" |
2769 | " inline __device__ long __FnName(unsigned int __mask, long __val, \\\n" |
2770 | " __Type __offset, int __width = warpSize) { \\\n" |
2771 | " _Static_assert(sizeof(long) == sizeof(long long) || \\\n" |
2772 | " sizeof(long) == sizeof(int)); \\\n" |
2773 | " if (sizeof(long) == sizeof(long long)) { \\\n" |
2774 | " return static_cast<long>(::__FnName( \\\n" |
2775 | " __mask, static_cast<long long>(__val), __offset, __width)); \\\n" |
2776 | " } else if (sizeof(long) == sizeof(int)) { \\\n" |
2777 | " return static_cast<long>( \\\n" |
2778 | " ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n" |
2779 | " } \\\n" |
2780 | " } \\\n" |
2781 | " inline __device__ unsigned long __FnName( \\\n" |
2782 | " unsigned int __mask, unsigned long __val, __Type __offset, \\\n" |
2783 | " int __width = warpSize) { \\\n" |
2784 | " return static_cast<unsigned long>( \\\n" |
2785 | " ::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \\\n" |
2786 | " } \\\n" |
2787 | " inline __device__ double __FnName(unsigned int __mask, double __val, \\\n" |
2788 | " __Type __offset, int __width = warpSize) { \\\n" |
2789 | " long long __tmp; \\\n" |
2790 | " _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n" |
2791 | " memcpy(&__tmp, &__val, sizeof(__val)); \\\n" |
2792 | " __tmp = ::__FnName(__mask, __tmp, __offset, __width); \\\n" |
2793 | " double __ret; \\\n" |
2794 | " memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n" |
2795 | " return __ret; \\\n" |
2796 | " }\n" |
2797 | "__MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,\n" |
2798 | " __nvvm_shfl_sync_idx_f32, 0x1f, int);\n" |
2799 | "// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n" |
2800 | "// maxLane.\n" |
2801 | "__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,\n" |
2802 | " __nvvm_shfl_sync_up_f32, 0, unsigned int);\n" |
2803 | "__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,\n" |
2804 | " __nvvm_shfl_sync_down_f32, 0x1f, unsigned int);\n" |
2805 | "__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,\n" |
2806 | " __nvvm_shfl_sync_bfly_f32, 0x1f, int);\n" |
2807 | "#pragma pop_macro(\"__MAKE_SYNC_SHUFFLES\")\n" |
2808 | "\n" |
2809 | "inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {\n" |
2810 | " return __nvvm_bar_warp_sync(mask);\n" |
2811 | "}\n" |
2812 | "\n" |
2813 | "inline __device__ void __barrier_sync(unsigned int id) {\n" |
2814 | " __nvvm_barrier_sync(id);\n" |
2815 | "}\n" |
2816 | "\n" |
2817 | "inline __device__ void __barrier_sync_count(unsigned int id,\n" |
2818 | " unsigned int count) {\n" |
2819 | " __nvvm_barrier_sync_cnt(id, count);\n" |
2820 | "}\n" |
2821 | "\n" |
2822 | "inline __device__ int __all_sync(unsigned int mask, int pred) {\n" |
2823 | " return __nvvm_vote_all_sync(mask, pred);\n" |
2824 | "}\n" |
2825 | "\n" |
2826 | "inline __device__ int __any_sync(unsigned int mask, int pred) {\n" |
2827 | " return __nvvm_vote_any_sync(mask, pred);\n" |
2828 | "}\n" |
2829 | "\n" |
2830 | "inline __device__ int __uni_sync(unsigned int mask, int pred) {\n" |
2831 | " return __nvvm_vote_uni_sync(mask, pred);\n" |
2832 | "}\n" |
2833 | "\n" |
2834 | "inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {\n" |
2835 | " return __nvvm_vote_ballot_sync(mask, pred);\n" |
2836 | "}\n" |
2837 | "\n" |
2838 | "inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }\n" |
2839 | "\n" |
2840 | "inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {\n" |
2841 | " return __nvvm_fns(mask, base, offset);\n" |
2842 | "}\n" |
2843 | "\n" |
2844 | "#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n" |
2845 | "\n" |
2846 | "// Define __match* builtins CUDA-9 headers expect to see.\n" |
2847 | "#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n" |
2848 | "inline __device__ unsigned int __match32_any_sync(unsigned int mask,\n" |
2849 | " unsigned int value) {\n" |
2850 | " return __nvvm_match_any_sync_i32(mask, value);\n" |
2851 | "}\n" |
2852 | "\n" |
2853 | "inline __device__ unsigned long long\n" |
2854 | "__match64_any_sync(unsigned int mask, unsigned long long value) {\n" |
2855 | " return __nvvm_match_any_sync_i64(mask, value);\n" |
2856 | "}\n" |
2857 | "\n" |
2858 | "inline __device__ unsigned int\n" |
2859 | "__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {\n" |
2860 | " return __nvvm_match_all_sync_i32p(mask, value, pred);\n" |
2861 | "}\n" |
2862 | "\n" |
2863 | "inline __device__ unsigned long long\n" |
2864 | "__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {\n" |
2865 | " return __nvvm_match_all_sync_i64p(mask, value, pred);\n" |
2866 | "}\n" |
2867 | "#include \"crt/sm_70_rt.hpp\"\n" |
2868 | "\n" |
2869 | "#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n" |
2870 | "#endif // __CUDA_VERSION >= 9000\n" |
2871 | "\n" |
2872 | "// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.\n" |
2873 | "\n" |
2874 | "// Prevent the vanilla sm_32 intrinsics header from being included.\n" |
2875 | "#define __SM_32_INTRINSICS_H__\n" |
2876 | "#define __SM_32_INTRINSICS_HPP__\n" |
2877 | "\n" |
2878 | "#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n" |
2879 | "\n" |
2880 | "inline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); }\n" |
2881 | "inline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); }\n" |
2882 | "inline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); }\n" |
2883 | "inline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); }\n" |
2884 | "inline __device__ long long __ldg(const long long *ptr) {\n" |
2885 | " return __nvvm_ldg_ll(ptr);\n" |
2886 | "}\n" |
2887 | "inline __device__ unsigned char __ldg(const unsigned char *ptr) {\n" |
2888 | " return __nvvm_ldg_uc(ptr);\n" |
2889 | "}\n" |
2890 | "inline __device__ signed char __ldg(const signed char *ptr) {\n" |
2891 | " return __nvvm_ldg_uc((const unsigned char *)ptr);\n" |
2892 | "}\n" |
2893 | "inline __device__ unsigned short __ldg(const unsigned short *ptr) {\n" |
2894 | " return __nvvm_ldg_us(ptr);\n" |
2895 | "}\n" |
2896 | "inline __device__ unsigned int __ldg(const unsigned int *ptr) {\n" |
2897 | " return __nvvm_ldg_ui(ptr);\n" |
2898 | "}\n" |
2899 | "inline __device__ unsigned long __ldg(const unsigned long *ptr) {\n" |
2900 | " return __nvvm_ldg_ul(ptr);\n" |
2901 | "}\n" |
2902 | "inline __device__ unsigned long long __ldg(const unsigned long long *ptr) {\n" |
2903 | " return __nvvm_ldg_ull(ptr);\n" |
2904 | "}\n" |
2905 | "inline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); }\n" |
2906 | "inline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); }\n" |
2907 | "\n" |
2908 | "inline __device__ char2 __ldg(const char2 *ptr) {\n" |
2909 | " typedef char c2 __attribute__((ext_vector_type(2)));\n" |
2910 | " // We can assume that ptr is aligned at least to char2's alignment, but the\n" |
2911 | " // load will assume that ptr is aligned to char2's alignment. This is only\n" |
2912 | " // safe if alignof(c2) <= alignof(char2).\n" |
2913 | " c2 rv = __nvvm_ldg_c2(reinterpret_cast<const c2 *>(ptr));\n" |
2914 | " char2 ret;\n" |
2915 | " ret.x = rv[0];\n" |
2916 | " ret.y = rv[1];\n" |
2917 | " return ret;\n" |
2918 | "}\n" |
2919 | "inline __device__ char4 __ldg(const char4 *ptr) {\n" |
2920 | " typedef char c4 __attribute__((ext_vector_type(4)));\n" |
2921 | " c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr));\n" |
2922 | " char4 ret;\n" |
2923 | " ret.x = rv[0];\n" |
2924 | " ret.y = rv[1];\n" |
2925 | " ret.z = rv[2];\n" |
2926 | " ret.w = rv[3];\n" |
2927 | " return ret;\n" |
2928 | "}\n" |
2929 | "inline __device__ short2 __ldg(const short2 *ptr) {\n" |
2930 | " typedef short s2 __attribute__((ext_vector_type(2)));\n" |
2931 | " s2 rv = __nvvm_ldg_s2(reinterpret_cast<const s2 *>(ptr));\n" |
2932 | " short2 ret;\n" |
2933 | " ret.x = rv[0];\n" |
2934 | " ret.y = rv[1];\n" |
2935 | " return ret;\n" |
2936 | "}\n" |
2937 | "inline __device__ short4 __ldg(const short4 *ptr) {\n" |
2938 | " typedef short s4 __attribute__((ext_vector_type(4)));\n" |
2939 | " s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr));\n" |
2940 | " short4 ret;\n" |
2941 | " ret.x = rv[0];\n" |
2942 | " ret.y = rv[1];\n" |
2943 | " ret.z = rv[2];\n" |
2944 | " ret.w = rv[3];\n" |
2945 | " return ret;\n" |
2946 | "}\n" |
2947 | "inline __device__ int2 __ldg(const int2 *ptr) {\n" |
2948 | " typedef int i2 __attribute__((ext_vector_type(2)));\n" |
2949 | " i2 rv = __nvvm_ldg_i2(reinterpret_cast<const i2 *>(ptr));\n" |
2950 | " int2 ret;\n" |
2951 | " ret.x = rv[0];\n" |
2952 | " ret.y = rv[1];\n" |
2953 | " return ret;\n" |
2954 | "}\n" |
2955 | "inline __device__ int4 __ldg(const int4 *ptr) {\n" |
2956 | " typedef int i4 __attribute__((ext_vector_type(4)));\n" |
2957 | " i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr));\n" |
2958 | " int4 ret;\n" |
2959 | " ret.x = rv[0];\n" |
2960 | " ret.y = rv[1];\n" |
2961 | " ret.z = rv[2];\n" |
2962 | " ret.w = rv[3];\n" |
2963 | " return ret;\n" |
2964 | "}\n" |
2965 | "inline __device__ longlong2 __ldg(const longlong2 *ptr) {\n" |
2966 | " typedef long long ll2 __attribute__((ext_vector_type(2)));\n" |
2967 | " ll2 rv = __nvvm_ldg_ll2(reinterpret_cast<const ll2 *>(ptr));\n" |
2968 | " longlong2 ret;\n" |
2969 | " ret.x = rv[0];\n" |
2970 | " ret.y = rv[1];\n" |
2971 | " return ret;\n" |
2972 | "}\n" |
2973 | "\n" |
2974 | "inline __device__ uchar2 __ldg(const uchar2 *ptr) {\n" |
2975 | " typedef unsigned char uc2 __attribute__((ext_vector_type(2)));\n" |
2976 | " uc2 rv = __nvvm_ldg_uc2(reinterpret_cast<const uc2 *>(ptr));\n" |
2977 | " uchar2 ret;\n" |
2978 | " ret.x = rv[0];\n" |
2979 | " ret.y = rv[1];\n" |
2980 | " return ret;\n" |
2981 | "}\n" |
2982 | "inline __device__ uchar4 __ldg(const uchar4 *ptr) {\n" |
2983 | " typedef unsigned char uc4 __attribute__((ext_vector_type(4)));\n" |
2984 | " uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr));\n" |
2985 | " uchar4 ret;\n" |
2986 | " ret.x = rv[0];\n" |
2987 | " ret.y = rv[1];\n" |
2988 | " ret.z = rv[2];\n" |
2989 | " ret.w = rv[3];\n" |
2990 | " return ret;\n" |
2991 | "}\n" |
2992 | "inline __device__ ushort2 __ldg(const ushort2 *ptr) {\n" |
2993 | " typedef unsigned short us2 __attribute__((ext_vector_type(2)));\n" |
2994 | " us2 rv = __nvvm_ldg_us2(reinterpret_cast<const us2 *>(ptr));\n" |
2995 | " ushort2 ret;\n" |
2996 | " ret.x = rv[0];\n" |
2997 | " ret.y = rv[1];\n" |
2998 | " return ret;\n" |
2999 | "}\n" |
3000 | "inline __device__ ushort4 __ldg(const ushort4 *ptr) {\n" |
3001 | " typedef unsigned short us4 __attribute__((ext_vector_type(4)));\n" |
3002 | " us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr));\n" |
3003 | " ushort4 ret;\n" |
3004 | " ret.x = rv[0];\n" |
3005 | " ret.y = rv[1];\n" |
3006 | " ret.z = rv[2];\n" |
3007 | " ret.w = rv[3];\n" |
3008 | " return ret;\n" |
3009 | "}\n" |
3010 | "inline __device__ uint2 __ldg(const uint2 *ptr) {\n" |
3011 | " typedef unsigned int ui2 __attribute__((ext_vector_type(2)));\n" |
3012 | " ui2 rv = __nvvm_ldg_ui2(reinterpret_cast<const ui2 *>(ptr));\n" |
3013 | " uint2 ret;\n" |
3014 | " ret.x = rv[0];\n" |
3015 | " ret.y = rv[1];\n" |
3016 | " return ret;\n" |
3017 | "}\n" |
3018 | "inline __device__ uint4 __ldg(const uint4 *ptr) {\n" |
3019 | " typedef unsigned int ui4 __attribute__((ext_vector_type(4)));\n" |
3020 | " ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr));\n" |
3021 | " uint4 ret;\n" |
3022 | " ret.x = rv[0];\n" |
3023 | " ret.y = rv[1];\n" |
3024 | " ret.z = rv[2];\n" |
3025 | " ret.w = rv[3];\n" |
3026 | " return ret;\n" |
3027 | "}\n" |
3028 | "inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {\n" |
3029 | " typedef unsigned long long ull2 __attribute__((ext_vector_type(2)));\n" |
3030 | " ull2 rv = __nvvm_ldg_ull2(reinterpret_cast<const ull2 *>(ptr));\n" |
3031 | " ulonglong2 ret;\n" |
3032 | " ret.x = rv[0];\n" |
3033 | " ret.y = rv[1];\n" |
3034 | " return ret;\n" |
3035 | "}\n" |
3036 | "\n" |
3037 | "inline __device__ float2 __ldg(const float2 *ptr) {\n" |
3038 | " typedef float f2 __attribute__((ext_vector_type(2)));\n" |
3039 | " f2 rv = __nvvm_ldg_f2(reinterpret_cast<const f2 *>(ptr));\n" |
3040 | " float2 ret;\n" |
3041 | " ret.x = rv[0];\n" |
3042 | " ret.y = rv[1];\n" |
3043 | " return ret;\n" |
3044 | "}\n" |
3045 | "inline __device__ float4 __ldg(const float4 *ptr) {\n" |
3046 | " typedef float f4 __attribute__((ext_vector_type(4)));\n" |
3047 | " f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr));\n" |
3048 | " float4 ret;\n" |
3049 | " ret.x = rv[0];\n" |
3050 | " ret.y = rv[1];\n" |
3051 | " ret.z = rv[2];\n" |
3052 | " ret.w = rv[3];\n" |
3053 | " return ret;\n" |
3054 | "}\n" |
3055 | "inline __device__ double2 __ldg(const double2 *ptr) {\n" |
3056 | " typedef double d2 __attribute__((ext_vector_type(2)));\n" |
3057 | " d2 rv = __nvvm_ldg_d2(reinterpret_cast<const d2 *>(ptr));\n" |
3058 | " double2 ret;\n" |
3059 | " ret.x = rv[0];\n" |
3060 | " ret.y = rv[1];\n" |
3061 | " return ret;\n" |
3062 | "}\n" |
3063 | "\n" |
3064 | "// TODO: Implement these as intrinsics, so the backend can work its magic on\n" |
3065 | "// these. Alternatively, we could implement these as plain C and try to get\n" |
3066 | "// llvm to recognize the relevant patterns.\n" |
3067 | "inline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32,\n" |
3068 | " unsigned shiftWidth) {\n" |
3069 | " unsigned result;\n" |
3070 | " asm(\"shf.l.wrap.b32 %0, %1, %2, %3;\"\n" |
3071 | " : \"=r\"(result)\n" |
3072 | " : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n" |
3073 | " return result;\n" |
3074 | "}\n" |
3075 | "inline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32,\n" |
3076 | " unsigned shiftWidth) {\n" |
3077 | " unsigned result;\n" |
3078 | " asm(\"shf.l.clamp.b32 %0, %1, %2, %3;\"\n" |
3079 | " : \"=r\"(result)\n" |
3080 | " : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n" |
3081 | " return result;\n" |
3082 | "}\n" |
3083 | "inline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32,\n" |
3084 | " unsigned shiftWidth) {\n" |
3085 | " unsigned result;\n" |
3086 | " asm(\"shf.r.wrap.b32 %0, %1, %2, %3;\"\n" |
3087 | " : \"=r\"(result)\n" |
3088 | " : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n" |
3089 | " return result;\n" |
3090 | "}\n" |
3091 | "inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,\n" |
3092 | " unsigned shiftWidth) {\n" |
3093 | " unsigned ret;\n" |
3094 | " asm(\"shf.r.clamp.b32 %0, %1, %2, %3;\"\n" |
3095 | " : \"=r\"(ret)\n" |
3096 | " : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n" |
3097 | " return ret;\n" |
3098 | "}\n" |
3099 | "\n" |
3100 | "#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n" |
3101 | "\n" |
3102 | "#endif // defined(__CLANG_CUDA_INTRINSICS_H__)\n" |
3103 | "" } , |
3104 | { "/builtins/__clang_cuda_libdevice_declares.h" , "/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===\n" |
3105 | " *\n" |
3106 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
3107 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
3108 | " * in the Software without restriction, including without limitation the rights\n" |
3109 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
3110 | " * copies of the Software, and to permit persons to whom the Software is\n" |
3111 | " * furnished to do so, subject to the following conditions:\n" |
3112 | " *\n" |
3113 | " * The above copyright notice and this permission notice shall be included in\n" |
3114 | " * all copies or substantial portions of the Software.\n" |
3115 | " *\n" |
3116 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
3117 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
3118 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
3119 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
3120 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
3121 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
3122 | " * THE SOFTWARE.\n" |
3123 | " *\n" |
3124 | " *===-----------------------------------------------------------------------===\n" |
3125 | " */\n" |
3126 | "\n" |
3127 | "#ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n" |
3128 | "#define __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n" |
3129 | "\n" |
3130 | "extern \"C\" {\n" |
3131 | "\n" |
3132 | "__device__ int __nv_abs(int __a);\n" |
3133 | "__device__ double __nv_acos(double __a);\n" |
3134 | "__device__ float __nv_acosf(float __a);\n" |
3135 | "__device__ double __nv_acosh(double __a);\n" |
3136 | "__device__ float __nv_acoshf(float __a);\n" |
3137 | "__device__ double __nv_asin(double __a);\n" |
3138 | "__device__ float __nv_asinf(float __a);\n" |
3139 | "__device__ double __nv_asinh(double __a);\n" |
3140 | "__device__ float __nv_asinhf(float __a);\n" |
3141 | "__device__ double __nv_atan2(double __a, double __b);\n" |
3142 | "__device__ float __nv_atan2f(float __a, float __b);\n" |
3143 | "__device__ double __nv_atan(double __a);\n" |
3144 | "__device__ float __nv_atanf(float __a);\n" |
3145 | "__device__ double __nv_atanh(double __a);\n" |
3146 | "__device__ float __nv_atanhf(float __a);\n" |
3147 | "__device__ int __nv_brev(int __a);\n" |
3148 | "__device__ long long __nv_brevll(long long __a);\n" |
3149 | "__device__ int __nv_byte_perm(int __a, int __b, int __c);\n" |
3150 | "__device__ double __nv_cbrt(double __a);\n" |
3151 | "__device__ float __nv_cbrtf(float __a);\n" |
3152 | "__device__ double __nv_ceil(double __a);\n" |
3153 | "__device__ float __nv_ceilf(float __a);\n" |
3154 | "__device__ int __nv_clz(int __a);\n" |
3155 | "__device__ int __nv_clzll(long long __a);\n" |
3156 | "__device__ double __nv_copysign(double __a, double __b);\n" |
3157 | "__device__ float __nv_copysignf(float __a, float __b);\n" |
3158 | "__device__ double __nv_cos(double __a);\n" |
3159 | "__device__ float __nv_cosf(float __a);\n" |
3160 | "__device__ double __nv_cosh(double __a);\n" |
3161 | "__device__ float __nv_coshf(float __a);\n" |
3162 | "__device__ double __nv_cospi(double __a);\n" |
3163 | "__device__ float __nv_cospif(float __a);\n" |
3164 | "__device__ double __nv_cyl_bessel_i0(double __a);\n" |
3165 | "__device__ float __nv_cyl_bessel_i0f(float __a);\n" |
3166 | "__device__ double __nv_cyl_bessel_i1(double __a);\n" |
3167 | "__device__ float __nv_cyl_bessel_i1f(float __a);\n" |
3168 | "__device__ double __nv_dadd_rd(double __a, double __b);\n" |
3169 | "__device__ double __nv_dadd_rn(double __a, double __b);\n" |
3170 | "__device__ double __nv_dadd_ru(double __a, double __b);\n" |
3171 | "__device__ double __nv_dadd_rz(double __a, double __b);\n" |
3172 | "__device__ double __nv_ddiv_rd(double __a, double __b);\n" |
3173 | "__device__ double __nv_ddiv_rn(double __a, double __b);\n" |
3174 | "__device__ double __nv_ddiv_ru(double __a, double __b);\n" |
3175 | "__device__ double __nv_ddiv_rz(double __a, double __b);\n" |
3176 | "__device__ double __nv_dmul_rd(double __a, double __b);\n" |
3177 | "__device__ double __nv_dmul_rn(double __a, double __b);\n" |
3178 | "__device__ double __nv_dmul_ru(double __a, double __b);\n" |
3179 | "__device__ double __nv_dmul_rz(double __a, double __b);\n" |
3180 | "__device__ float __nv_double2float_rd(double __a);\n" |
3181 | "__device__ float __nv_double2float_rn(double __a);\n" |
3182 | "__device__ float __nv_double2float_ru(double __a);\n" |
3183 | "__device__ float __nv_double2float_rz(double __a);\n" |
3184 | "__device__ int __nv_double2hiint(double __a);\n" |
3185 | "__device__ int __nv_double2int_rd(double __a);\n" |
3186 | "__device__ int __nv_double2int_rn(double __a);\n" |
3187 | "__device__ int __nv_double2int_ru(double __a);\n" |
3188 | "__device__ int __nv_double2int_rz(double __a);\n" |
3189 | "__device__ long long __nv_double2ll_rd(double __a);\n" |
3190 | "__device__ long long __nv_double2ll_rn(double __a);\n" |
3191 | "__device__ long long __nv_double2ll_ru(double __a);\n" |
3192 | "__device__ long long __nv_double2ll_rz(double __a);\n" |
3193 | "__device__ int __nv_double2loint(double __a);\n" |
3194 | "__device__ unsigned int __nv_double2uint_rd(double __a);\n" |
3195 | "__device__ unsigned int __nv_double2uint_rn(double __a);\n" |
3196 | "__device__ unsigned int __nv_double2uint_ru(double __a);\n" |
3197 | "__device__ unsigned int __nv_double2uint_rz(double __a);\n" |
3198 | "__device__ unsigned long long __nv_double2ull_rd(double __a);\n" |
3199 | "__device__ unsigned long long __nv_double2ull_rn(double __a);\n" |
3200 | "__device__ unsigned long long __nv_double2ull_ru(double __a);\n" |
3201 | "__device__ unsigned long long __nv_double2ull_rz(double __a);\n" |
3202 | "__device__ unsigned long long __nv_double_as_longlong(double __a);\n" |
3203 | "__device__ double __nv_drcp_rd(double __a);\n" |
3204 | "__device__ double __nv_drcp_rn(double __a);\n" |
3205 | "__device__ double __nv_drcp_ru(double __a);\n" |
3206 | "__device__ double __nv_drcp_rz(double __a);\n" |
3207 | "__device__ double __nv_dsqrt_rd(double __a);\n" |
3208 | "__device__ double __nv_dsqrt_rn(double __a);\n" |
3209 | "__device__ double __nv_dsqrt_ru(double __a);\n" |
3210 | "__device__ double __nv_dsqrt_rz(double __a);\n" |
3211 | "__device__ double __nv_dsub_rd(double __a, double __b);\n" |
3212 | "__device__ double __nv_dsub_rn(double __a, double __b);\n" |
3213 | "__device__ double __nv_dsub_ru(double __a, double __b);\n" |
3214 | "__device__ double __nv_dsub_rz(double __a, double __b);\n" |
3215 | "__device__ double __nv_erfc(double __a);\n" |
3216 | "__device__ float __nv_erfcf(float __a);\n" |
3217 | "__device__ double __nv_erfcinv(double __a);\n" |
3218 | "__device__ float __nv_erfcinvf(float __a);\n" |
3219 | "__device__ double __nv_erfcx(double __a);\n" |
3220 | "__device__ float __nv_erfcxf(float __a);\n" |
3221 | "__device__ double __nv_erf(double __a);\n" |
3222 | "__device__ float __nv_erff(float __a);\n" |
3223 | "__device__ double __nv_erfinv(double __a);\n" |
3224 | "__device__ float __nv_erfinvf(float __a);\n" |
3225 | "__device__ double __nv_exp10(double __a);\n" |
3226 | "__device__ float __nv_exp10f(float __a);\n" |
3227 | "__device__ double __nv_exp2(double __a);\n" |
3228 | "__device__ float __nv_exp2f(float __a);\n" |
3229 | "__device__ double __nv_exp(double __a);\n" |
3230 | "__device__ float __nv_expf(float __a);\n" |
3231 | "__device__ double __nv_expm1(double __a);\n" |
3232 | "__device__ float __nv_expm1f(float __a);\n" |
3233 | "__device__ double __nv_fabs(double __a);\n" |
3234 | "__device__ float __nv_fabsf(float __a);\n" |
3235 | "__device__ float __nv_fadd_rd(float __a, float __b);\n" |
3236 | "__device__ float __nv_fadd_rn(float __a, float __b);\n" |
3237 | "__device__ float __nv_fadd_ru(float __a, float __b);\n" |
3238 | "__device__ float __nv_fadd_rz(float __a, float __b);\n" |
3239 | "__device__ float __nv_fast_cosf(float __a);\n" |
3240 | "__device__ float __nv_fast_exp10f(float __a);\n" |
3241 | "__device__ float __nv_fast_expf(float __a);\n" |
3242 | "__device__ float __nv_fast_fdividef(float __a, float __b);\n" |
3243 | "__device__ float __nv_fast_log10f(float __a);\n" |
3244 | "__device__ float __nv_fast_log2f(float __a);\n" |
3245 | "__device__ float __nv_fast_logf(float __a);\n" |
3246 | "__device__ float __nv_fast_powf(float __a, float __b);\n" |
3247 | "__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);\n" |
3248 | "__device__ float __nv_fast_sinf(float __a);\n" |
3249 | "__device__ float __nv_fast_tanf(float __a);\n" |
3250 | "__device__ double __nv_fdim(double __a, double __b);\n" |
3251 | "__device__ float __nv_fdimf(float __a, float __b);\n" |
3252 | "__device__ float __nv_fdiv_rd(float __a, float __b);\n" |
3253 | "__device__ float __nv_fdiv_rn(float __a, float __b);\n" |
3254 | "__device__ float __nv_fdiv_ru(float __a, float __b);\n" |
3255 | "__device__ float __nv_fdiv_rz(float __a, float __b);\n" |
3256 | "__device__ int __nv_ffs(int __a);\n" |
3257 | "__device__ int __nv_ffsll(long long __a);\n" |
3258 | "__device__ int __nv_finitef(float __a);\n" |
3259 | "__device__ unsigned short __nv_float2half_rn(float __a);\n" |
3260 | "__device__ int __nv_float2int_rd(float __a);\n" |
3261 | "__device__ int __nv_float2int_rn(float __a);\n" |
3262 | "__device__ int __nv_float2int_ru(float __a);\n" |
3263 | "__device__ int __nv_float2int_rz(float __a);\n" |
3264 | "__device__ long long __nv_float2ll_rd(float __a);\n" |
3265 | "__device__ long long __nv_float2ll_rn(float __a);\n" |
3266 | "__device__ long long __nv_float2ll_ru(float __a);\n" |
3267 | "__device__ long long __nv_float2ll_rz(float __a);\n" |
3268 | "__device__ unsigned int __nv_float2uint_rd(float __a);\n" |
3269 | "__device__ unsigned int __nv_float2uint_rn(float __a);\n" |
3270 | "__device__ unsigned int __nv_float2uint_ru(float __a);\n" |
3271 | "__device__ unsigned int __nv_float2uint_rz(float __a);\n" |
3272 | "__device__ unsigned long long __nv_float2ull_rd(float __a);\n" |
3273 | "__device__ unsigned long long __nv_float2ull_rn(float __a);\n" |
3274 | "__device__ unsigned long long __nv_float2ull_ru(float __a);\n" |
3275 | "__device__ unsigned long long __nv_float2ull_rz(float __a);\n" |
3276 | "__device__ int __nv_float_as_int(float __a);\n" |
3277 | "__device__ unsigned int __nv_float_as_uint(float __a);\n" |
3278 | "__device__ double __nv_floor(double __a);\n" |
3279 | "__device__ float __nv_floorf(float __a);\n" |
3280 | "__device__ double __nv_fma(double __a, double __b, double __c);\n" |
3281 | "__device__ float __nv_fmaf(float __a, float __b, float __c);\n" |
3282 | "__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);\n" |
3283 | "__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);\n" |
3284 | "__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);\n" |
3285 | "__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);\n" |
3286 | "__device__ float __nv_fmaf_rd(float __a, float __b, float __c);\n" |
3287 | "__device__ float __nv_fmaf_rn(float __a, float __b, float __c);\n" |
3288 | "__device__ float __nv_fmaf_ru(float __a, float __b, float __c);\n" |
3289 | "__device__ float __nv_fmaf_rz(float __a, float __b, float __c);\n" |
3290 | "__device__ double __nv_fma_rd(double __a, double __b, double __c);\n" |
3291 | "__device__ double __nv_fma_rn(double __a, double __b, double __c);\n" |
3292 | "__device__ double __nv_fma_ru(double __a, double __b, double __c);\n" |
3293 | "__device__ double __nv_fma_rz(double __a, double __b, double __c);\n" |
3294 | "__device__ double __nv_fmax(double __a, double __b);\n" |
3295 | "__device__ float __nv_fmaxf(float __a, float __b);\n" |
3296 | "__device__ double __nv_fmin(double __a, double __b);\n" |
3297 | "__device__ float __nv_fminf(float __a, float __b);\n" |
3298 | "__device__ double __nv_fmod(double __a, double __b);\n" |
3299 | "__device__ float __nv_fmodf(float __a, float __b);\n" |
3300 | "__device__ float __nv_fmul_rd(float __a, float __b);\n" |
3301 | "__device__ float __nv_fmul_rn(float __a, float __b);\n" |
3302 | "__device__ float __nv_fmul_ru(float __a, float __b);\n" |
3303 | "__device__ float __nv_fmul_rz(float __a, float __b);\n" |
3304 | "__device__ float __nv_frcp_rd(float __a);\n" |
3305 | "__device__ float __nv_frcp_rn(float __a);\n" |
3306 | "__device__ float __nv_frcp_ru(float __a);\n" |
3307 | "__device__ float __nv_frcp_rz(float __a);\n" |
3308 | "__device__ double __nv_frexp(double __a, int *__b);\n" |
3309 | "__device__ float __nv_frexpf(float __a, int *__b);\n" |
3310 | "__device__ float __nv_frsqrt_rn(float __a);\n" |
3311 | "__device__ float __nv_fsqrt_rd(float __a);\n" |
3312 | "__device__ float __nv_fsqrt_rn(float __a);\n" |
3313 | "__device__ float __nv_fsqrt_ru(float __a);\n" |
3314 | "__device__ float __nv_fsqrt_rz(float __a);\n" |
3315 | "__device__ float __nv_fsub_rd(float __a, float __b);\n" |
3316 | "__device__ float __nv_fsub_rn(float __a, float __b);\n" |
3317 | "__device__ float __nv_fsub_ru(float __a, float __b);\n" |
3318 | "__device__ float __nv_fsub_rz(float __a, float __b);\n" |
3319 | "__device__ int __nv_hadd(int __a, int __b);\n" |
3320 | "__device__ float __nv_half2float(unsigned short __h);\n" |
3321 | "__device__ double __nv_hiloint2double(int __a, int __b);\n" |
3322 | "__device__ double __nv_hypot(double __a, double __b);\n" |
3323 | "__device__ float __nv_hypotf(float __a, float __b);\n" |
3324 | "__device__ int __nv_ilogb(double __a);\n" |
3325 | "__device__ int __nv_ilogbf(float __a);\n" |
3326 | "__device__ double __nv_int2double_rn(int __a);\n" |
3327 | "__device__ float __nv_int2float_rd(int __a);\n" |
3328 | "__device__ float __nv_int2float_rn(int __a);\n" |
3329 | "__device__ float __nv_int2float_ru(int __a);\n" |
3330 | "__device__ float __nv_int2float_rz(int __a);\n" |
3331 | "__device__ float __nv_int_as_float(int __a);\n" |
3332 | "__device__ int __nv_isfinited(double __a);\n" |
3333 | "__device__ int __nv_isinfd(double __a);\n" |
3334 | "__device__ int __nv_isinff(float __a);\n" |
3335 | "__device__ int __nv_isnand(double __a);\n" |
3336 | "__device__ int __nv_isnanf(float __a);\n" |
3337 | "__device__ double __nv_j0(double __a);\n" |
3338 | "__device__ float __nv_j0f(float __a);\n" |
3339 | "__device__ double __nv_j1(double __a);\n" |
3340 | "__device__ float __nv_j1f(float __a);\n" |
3341 | "__device__ float __nv_jnf(int __a, float __b);\n" |
3342 | "__device__ double __nv_jn(int __a, double __b);\n" |
3343 | "__device__ double __nv_ldexp(double __a, int __b);\n" |
3344 | "__device__ float __nv_ldexpf(float __a, int __b);\n" |
3345 | "__device__ double __nv_lgamma(double __a);\n" |
3346 | "__device__ float __nv_lgammaf(float __a);\n" |
3347 | "__device__ double __nv_ll2double_rd(long long __a);\n" |
3348 | "__device__ double __nv_ll2double_rn(long long __a);\n" |
3349 | "__device__ double __nv_ll2double_ru(long long __a);\n" |
3350 | "__device__ double __nv_ll2double_rz(long long __a);\n" |
3351 | "__device__ float __nv_ll2float_rd(long long __a);\n" |
3352 | "__device__ float __nv_ll2float_rn(long long __a);\n" |
3353 | "__device__ float __nv_ll2float_ru(long long __a);\n" |
3354 | "__device__ float __nv_ll2float_rz(long long __a);\n" |
3355 | "__device__ long long __nv_llabs(long long __a);\n" |
3356 | "__device__ long long __nv_llmax(long long __a, long long __b);\n" |
3357 | "__device__ long long __nv_llmin(long long __a, long long __b);\n" |
3358 | "__device__ long long __nv_llrint(double __a);\n" |
3359 | "__device__ long long __nv_llrintf(float __a);\n" |
3360 | "__device__ long long __nv_llround(double __a);\n" |
3361 | "__device__ long long __nv_llroundf(float __a);\n" |
3362 | "__device__ double __nv_log10(double __a);\n" |
3363 | "__device__ float __nv_log10f(float __a);\n" |
3364 | "__device__ double __nv_log1p(double __a);\n" |
3365 | "__device__ float __nv_log1pf(float __a);\n" |
3366 | "__device__ double __nv_log2(double __a);\n" |
3367 | "__device__ float __nv_log2f(float __a);\n" |
3368 | "__device__ double __nv_logb(double __a);\n" |
3369 | "__device__ float __nv_logbf(float __a);\n" |
3370 | "__device__ double __nv_log(double __a);\n" |
3371 | "__device__ float __nv_logf(float __a);\n" |
3372 | "__device__ double __nv_longlong_as_double(long long __a);\n" |
3373 | "__device__ int __nv_max(int __a, int __b);\n" |
3374 | "__device__ int __nv_min(int __a, int __b);\n" |
3375 | "__device__ double __nv_modf(double __a, double *__b);\n" |
3376 | "__device__ float __nv_modff(float __a, float *__b);\n" |
3377 | "__device__ int __nv_mul24(int __a, int __b);\n" |
3378 | "__device__ long long __nv_mul64hi(long long __a, long long __b);\n" |
3379 | "__device__ int __nv_mulhi(int __a, int __b);\n" |
3380 | "__device__ double __nv_nan(const signed char *__a);\n" |
3381 | "__device__ float __nv_nanf(const signed char *__a);\n" |
3382 | "__device__ double __nv_nearbyint(double __a);\n" |
3383 | "__device__ float __nv_nearbyintf(float __a);\n" |
3384 | "__device__ double __nv_nextafter(double __a, double __b);\n" |
3385 | "__device__ float __nv_nextafterf(float __a, float __b);\n" |
3386 | "__device__ double __nv_norm3d(double __a, double __b, double __c);\n" |
3387 | "__device__ float __nv_norm3df(float __a, float __b, float __c);\n" |
3388 | "__device__ double __nv_norm4d(double __a, double __b, double __c, double __d);\n" |
3389 | "__device__ float __nv_norm4df(float __a, float __b, float __c, float __d);\n" |
3390 | "__device__ double __nv_normcdf(double __a);\n" |
3391 | "__device__ float __nv_normcdff(float __a);\n" |
3392 | "__device__ double __nv_normcdfinv(double __a);\n" |
3393 | "__device__ float __nv_normcdfinvf(float __a);\n" |
3394 | "__device__ float __nv_normf(int __a, const float *__b);\n" |
3395 | "__device__ double __nv_norm(int __a, const double *__b);\n" |
3396 | "__device__ int __nv_popc(int __a);\n" |
3397 | "__device__ int __nv_popcll(long long __a);\n" |
3398 | "__device__ double __nv_pow(double __a, double __b);\n" |
3399 | "__device__ float __nv_powf(float __a, float __b);\n" |
3400 | "__device__ double __nv_powi(double __a, int __b);\n" |
3401 | "__device__ float __nv_powif(float __a, int __b);\n" |
3402 | "__device__ double __nv_rcbrt(double __a);\n" |
3403 | "__device__ float __nv_rcbrtf(float __a);\n" |
3404 | "__device__ double __nv_rcp64h(double __a);\n" |
3405 | "__device__ double __nv_remainder(double __a, double __b);\n" |
3406 | "__device__ float __nv_remainderf(float __a, float __b);\n" |
3407 | "__device__ double __nv_remquo(double __a, double __b, int *__c);\n" |
3408 | "__device__ float __nv_remquof(float __a, float __b, int *__c);\n" |
3409 | "__device__ int __nv_rhadd(int __a, int __b);\n" |
3410 | "__device__ double __nv_rhypot(double __a, double __b);\n" |
3411 | "__device__ float __nv_rhypotf(float __a, float __b);\n" |
3412 | "__device__ double __nv_rint(double __a);\n" |
3413 | "__device__ float __nv_rintf(float __a);\n" |
3414 | "__device__ double __nv_rnorm3d(double __a, double __b, double __c);\n" |
3415 | "__device__ float __nv_rnorm3df(float __a, float __b, float __c);\n" |
3416 | "__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);\n" |
3417 | "__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);\n" |
3418 | "__device__ float __nv_rnormf(int __a, const float *__b);\n" |
3419 | "__device__ double __nv_rnorm(int __a, const double *__b);\n" |
3420 | "__device__ double __nv_round(double __a);\n" |
3421 | "__device__ float __nv_roundf(float __a);\n" |
3422 | "__device__ double __nv_rsqrt(double __a);\n" |
3423 | "__device__ float __nv_rsqrtf(float __a);\n" |
3424 | "__device__ int __nv_sad(int __a, int __b, int __c);\n" |
3425 | "__device__ float __nv_saturatef(float __a);\n" |
3426 | "__device__ double __nv_scalbn(double __a, int __b);\n" |
3427 | "__device__ float __nv_scalbnf(float __a, int __b);\n" |
3428 | "__device__ int __nv_signbitd(double __a);\n" |
3429 | "__device__ int __nv_signbitf(float __a);\n" |
3430 | "__device__ void __nv_sincos(double __a, double *__b, double *__c);\n" |
3431 | "__device__ void __nv_sincosf(float __a, float *__b, float *__c);\n" |
3432 | "__device__ void __nv_sincospi(double __a, double *__b, double *__c);\n" |
3433 | "__device__ void __nv_sincospif(float __a, float *__b, float *__c);\n" |
3434 | "__device__ double __nv_sin(double __a);\n" |
3435 | "__device__ float __nv_sinf(float __a);\n" |
3436 | "__device__ double __nv_sinh(double __a);\n" |
3437 | "__device__ float __nv_sinhf(float __a);\n" |
3438 | "__device__ double __nv_sinpi(double __a);\n" |
3439 | "__device__ float __nv_sinpif(float __a);\n" |
3440 | "__device__ double __nv_sqrt(double __a);\n" |
3441 | "__device__ float __nv_sqrtf(float __a);\n" |
3442 | "__device__ double __nv_tan(double __a);\n" |
3443 | "__device__ float __nv_tanf(float __a);\n" |
3444 | "__device__ double __nv_tanh(double __a);\n" |
3445 | "__device__ float __nv_tanhf(float __a);\n" |
3446 | "__device__ double __nv_tgamma(double __a);\n" |
3447 | "__device__ float __nv_tgammaf(float __a);\n" |
3448 | "__device__ double __nv_trunc(double __a);\n" |
3449 | "__device__ float __nv_truncf(float __a);\n" |
3450 | "__device__ int __nv_uhadd(unsigned int __a, unsigned int __b);\n" |
3451 | "__device__ double __nv_uint2double_rn(unsigned int __i);\n" |
3452 | "__device__ float __nv_uint2float_rd(unsigned int __a);\n" |
3453 | "__device__ float __nv_uint2float_rn(unsigned int __a);\n" |
3454 | "__device__ float __nv_uint2float_ru(unsigned int __a);\n" |
3455 | "__device__ float __nv_uint2float_rz(unsigned int __a);\n" |
3456 | "__device__ float __nv_uint_as_float(unsigned int __a);\n" |
3457 | "__device__ double __nv_ull2double_rd(unsigned long long __a);\n" |
3458 | "__device__ double __nv_ull2double_rn(unsigned long long __a);\n" |
3459 | "__device__ double __nv_ull2double_ru(unsigned long long __a);\n" |
3460 | "__device__ double __nv_ull2double_rz(unsigned long long __a);\n" |
3461 | "__device__ float __nv_ull2float_rd(unsigned long long __a);\n" |
3462 | "__device__ float __nv_ull2float_rn(unsigned long long __a);\n" |
3463 | "__device__ float __nv_ull2float_ru(unsigned long long __a);\n" |
3464 | "__device__ float __nv_ull2float_rz(unsigned long long __a);\n" |
3465 | "__device__ unsigned long long __nv_ullmax(unsigned long long __a,\n" |
3466 | " unsigned long long __b);\n" |
3467 | "__device__ unsigned long long __nv_ullmin(unsigned long long __a,\n" |
3468 | " unsigned long long __b);\n" |
3469 | "__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);\n" |
3470 | "__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);\n" |
3471 | "__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);\n" |
3472 | "__device__ unsigned long long __nv_umul64hi(unsigned long long __a,\n" |
3473 | " unsigned long long __b);\n" |
3474 | "__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);\n" |
3475 | "__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);\n" |
3476 | "__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,\n" |
3477 | " unsigned int __c);\n" |
3478 | "#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n" |
3479 | "__device__ int __nv_vabs2(int __a);\n" |
3480 | "__device__ int __nv_vabs4(int __a);\n" |
3481 | "__device__ int __nv_vabsdiffs2(int __a, int __b);\n" |
3482 | "__device__ int __nv_vabsdiffs4(int __a, int __b);\n" |
3483 | "__device__ int __nv_vabsdiffu2(int __a, int __b);\n" |
3484 | "__device__ int __nv_vabsdiffu4(int __a, int __b);\n" |
3485 | "__device__ int __nv_vabsss2(int __a);\n" |
3486 | "__device__ int __nv_vabsss4(int __a);\n" |
3487 | "__device__ int __nv_vadd2(int __a, int __b);\n" |
3488 | "__device__ int __nv_vadd4(int __a, int __b);\n" |
3489 | "__device__ int __nv_vaddss2(int __a, int __b);\n" |
3490 | "__device__ int __nv_vaddss4(int __a, int __b);\n" |
3491 | "__device__ int __nv_vaddus2(int __a, int __b);\n" |
3492 | "__device__ int __nv_vaddus4(int __a, int __b);\n" |
3493 | "__device__ int __nv_vavgs2(int __a, int __b);\n" |
3494 | "__device__ int __nv_vavgs4(int __a, int __b);\n" |
3495 | "__device__ int __nv_vavgu2(int __a, int __b);\n" |
3496 | "__device__ int __nv_vavgu4(int __a, int __b);\n" |
3497 | "__device__ int __nv_vcmpeq2(int __a, int __b);\n" |
3498 | "__device__ int __nv_vcmpeq4(int __a, int __b);\n" |
3499 | "__device__ int __nv_vcmpges2(int __a, int __b);\n" |
3500 | "__device__ int __nv_vcmpges4(int __a, int __b);\n" |
3501 | "__device__ int __nv_vcmpgeu2(int __a, int __b);\n" |
3502 | "__device__ int __nv_vcmpgeu4(int __a, int __b);\n" |
3503 | "__device__ int __nv_vcmpgts2(int __a, int __b);\n" |
3504 | "__device__ int __nv_vcmpgts4(int __a, int __b);\n" |
3505 | "__device__ int __nv_vcmpgtu2(int __a, int __b);\n" |
3506 | "__device__ int __nv_vcmpgtu4(int __a, int __b);\n" |
3507 | "__device__ int __nv_vcmples2(int __a, int __b);\n" |
3508 | "__device__ int __nv_vcmples4(int __a, int __b);\n" |
3509 | "__device__ int __nv_vcmpleu2(int __a, int __b);\n" |
3510 | "__device__ int __nv_vcmpleu4(int __a, int __b);\n" |
3511 | "__device__ int __nv_vcmplts2(int __a, int __b);\n" |
3512 | "__device__ int __nv_vcmplts4(int __a, int __b);\n" |
3513 | "__device__ int __nv_vcmpltu2(int __a, int __b);\n" |
3514 | "__device__ int __nv_vcmpltu4(int __a, int __b);\n" |
3515 | "__device__ int __nv_vcmpne2(int __a, int __b);\n" |
3516 | "__device__ int __nv_vcmpne4(int __a, int __b);\n" |
3517 | "__device__ int __nv_vhaddu2(int __a, int __b);\n" |
3518 | "__device__ int __nv_vhaddu4(int __a, int __b);\n" |
3519 | "__device__ int __nv_vmaxs2(int __a, int __b);\n" |
3520 | "__device__ int __nv_vmaxs4(int __a, int __b);\n" |
3521 | "__device__ int __nv_vmaxu2(int __a, int __b);\n" |
3522 | "__device__ int __nv_vmaxu4(int __a, int __b);\n" |
3523 | "__device__ int __nv_vmins2(int __a, int __b);\n" |
3524 | "__device__ int __nv_vmins4(int __a, int __b);\n" |
3525 | "__device__ int __nv_vminu2(int __a, int __b);\n" |
3526 | "__device__ int __nv_vminu4(int __a, int __b);\n" |
3527 | "__device__ int __nv_vneg2(int __a);\n" |
3528 | "__device__ int __nv_vneg4(int __a);\n" |
3529 | "__device__ int __nv_vnegss2(int __a);\n" |
3530 | "__device__ int __nv_vnegss4(int __a);\n" |
3531 | "__device__ int __nv_vsads2(int __a, int __b);\n" |
3532 | "__device__ int __nv_vsads4(int __a, int __b);\n" |
3533 | "__device__ int __nv_vsadu2(int __a, int __b);\n" |
3534 | "__device__ int __nv_vsadu4(int __a, int __b);\n" |
3535 | "__device__ int __nv_vseteq2(int __a, int __b);\n" |
3536 | "__device__ int __nv_vseteq4(int __a, int __b);\n" |
3537 | "__device__ int __nv_vsetges2(int __a, int __b);\n" |
3538 | "__device__ int __nv_vsetges4(int __a, int __b);\n" |
3539 | "__device__ int __nv_vsetgeu2(int __a, int __b);\n" |
3540 | "__device__ int __nv_vsetgeu4(int __a, int __b);\n" |
3541 | "__device__ int __nv_vsetgts2(int __a, int __b);\n" |
3542 | "__device__ int __nv_vsetgts4(int __a, int __b);\n" |
3543 | "__device__ int __nv_vsetgtu2(int __a, int __b);\n" |
3544 | "__device__ int __nv_vsetgtu4(int __a, int __b);\n" |
3545 | "__device__ int __nv_vsetles2(int __a, int __b);\n" |
3546 | "__device__ int __nv_vsetles4(int __a, int __b);\n" |
3547 | "__device__ int __nv_vsetleu2(int __a, int __b);\n" |
3548 | "__device__ int __nv_vsetleu4(int __a, int __b);\n" |
3549 | "__device__ int __nv_vsetlts2(int __a, int __b);\n" |
3550 | "__device__ int __nv_vsetlts4(int __a, int __b);\n" |
3551 | "__device__ int __nv_vsetltu2(int __a, int __b);\n" |
3552 | "__device__ int __nv_vsetltu4(int __a, int __b);\n" |
3553 | "__device__ int __nv_vsetne2(int __a, int __b);\n" |
3554 | "__device__ int __nv_vsetne4(int __a, int __b);\n" |
3555 | "__device__ int __nv_vsub2(int __a, int __b);\n" |
3556 | "__device__ int __nv_vsub4(int __a, int __b);\n" |
3557 | "__device__ int __nv_vsubss2(int __a, int __b);\n" |
3558 | "__device__ int __nv_vsubss4(int __a, int __b);\n" |
3559 | "__device__ int __nv_vsubus2(int __a, int __b);\n" |
3560 | "__device__ int __nv_vsubus4(int __a, int __b);\n" |
3561 | "#endif // CUDA_VERSION\n" |
3562 | "__device__ double __nv_y0(double __a);\n" |
3563 | "__device__ float __nv_y0f(float __a);\n" |
3564 | "__device__ double __nv_y1(double __a);\n" |
3565 | "__device__ float __nv_y1f(float __a);\n" |
3566 | "__device__ float __nv_ynf(int __a, float __b);\n" |
3567 | "__device__ double __nv_yn(int __a, double __b);\n" |
3568 | "} // extern \"C\"\n" |
3569 | "#endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n" |
3570 | "" } , |
3571 | { "/builtins/__clang_cuda_math_forward_declares.h" , "/*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --===\n" |
3572 | " *\n" |
3573 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
3574 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
3575 | " * in the Software without restriction, including without limitation the rights\n" |
3576 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
3577 | " * copies of the Software, and to permit persons to whom the Software is\n" |
3578 | " * furnished to do so, subject to the following conditions:\n" |
3579 | " *\n" |
3580 | " * The above copyright notice and this permission notice shall be included in\n" |
3581 | " * all copies or substantial portions of the Software.\n" |
3582 | " *\n" |
3583 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
3584 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
3585 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
3586 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
3587 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
3588 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
3589 | " * THE SOFTWARE.\n" |
3590 | " *\n" |
3591 | " *===-----------------------------------------------------------------------===\n" |
3592 | " */\n" |
3593 | "#ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n" |
3594 | "#define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n" |
3595 | "#ifndef __CUDA__\n" |
3596 | "#error \"This file is for CUDA compilation only.\"\n" |
3597 | "#endif\n" |
3598 | "\n" |
3599 | "// This file forward-declares of some math functions we (or the CUDA headers)\n" |
3600 | "// will define later. We need to do this, and do it before cmath is included,\n" |
3601 | "// because the standard library may have constexpr math functions. In the\n" |
3602 | "// absence of a prior __device__ decl, those constexpr functions may become\n" |
3603 | "// implicitly host+device. host+device functions can't be overloaded, so that\n" |
3604 | "// would preclude the use of our own __device__ overloads for these functions.\n" |
3605 | "\n" |
3606 | "#pragma push_macro(\"__DEVICE__\")\n" |
3607 | "#define __DEVICE__ \\\n" |
3608 | " static __inline__ __attribute__((always_inline)) __attribute__((device))\n" |
3609 | "\n" |
3610 | "__DEVICE__ double abs(double);\n" |
3611 | "__DEVICE__ float abs(float);\n" |
3612 | "__DEVICE__ int abs(int);\n" |
3613 | "__DEVICE__ long abs(long);\n" |
3614 | "__DEVICE__ long long abs(long long);\n" |
3615 | "__DEVICE__ double acos(double);\n" |
3616 | "__DEVICE__ float acos(float);\n" |
3617 | "__DEVICE__ double acosh(double);\n" |
3618 | "__DEVICE__ float acosh(float);\n" |
3619 | "__DEVICE__ double asin(double);\n" |
3620 | "__DEVICE__ float asin(float);\n" |
3621 | "__DEVICE__ double asinh(double);\n" |
3622 | "__DEVICE__ float asinh(float);\n" |
3623 | "__DEVICE__ double atan2(double, double);\n" |
3624 | "__DEVICE__ float atan2(float, float);\n" |
3625 | "__DEVICE__ double atan(double);\n" |
3626 | "__DEVICE__ float atan(float);\n" |
3627 | "__DEVICE__ double atanh(double);\n" |
3628 | "__DEVICE__ float atanh(float);\n" |
3629 | "__DEVICE__ double cbrt(double);\n" |
3630 | "__DEVICE__ float cbrt(float);\n" |
3631 | "__DEVICE__ double ceil(double);\n" |
3632 | "__DEVICE__ float ceil(float);\n" |
3633 | "__DEVICE__ double copysign(double, double);\n" |
3634 | "__DEVICE__ float copysign(float, float);\n" |
3635 | "__DEVICE__ double cos(double);\n" |
3636 | "__DEVICE__ float cos(float);\n" |
3637 | "__DEVICE__ double cosh(double);\n" |
3638 | "__DEVICE__ float cosh(float);\n" |
3639 | "__DEVICE__ double erfc(double);\n" |
3640 | "__DEVICE__ float erfc(float);\n" |
3641 | "__DEVICE__ double erf(double);\n" |
3642 | "__DEVICE__ float erf(float);\n" |
3643 | "__DEVICE__ double exp2(double);\n" |
3644 | "__DEVICE__ float exp2(float);\n" |
3645 | "__DEVICE__ double exp(double);\n" |
3646 | "__DEVICE__ float exp(float);\n" |
3647 | "__DEVICE__ double expm1(double);\n" |
3648 | "__DEVICE__ float expm1(float);\n" |
3649 | "__DEVICE__ double fabs(double);\n" |
3650 | "__DEVICE__ float fabs(float);\n" |
3651 | "__DEVICE__ double fdim(double, double);\n" |
3652 | "__DEVICE__ float fdim(float, float);\n" |
3653 | "__DEVICE__ double floor(double);\n" |
3654 | "__DEVICE__ float floor(float);\n" |
3655 | "__DEVICE__ double fma(double, double, double);\n" |
3656 | "__DEVICE__ float fma(float, float, float);\n" |
3657 | "__DEVICE__ double fmax(double, double);\n" |
3658 | "__DEVICE__ float fmax(float, float);\n" |
3659 | "__DEVICE__ double fmin(double, double);\n" |
3660 | "__DEVICE__ float fmin(float, float);\n" |
3661 | "__DEVICE__ double fmod(double, double);\n" |
3662 | "__DEVICE__ float fmod(float, float);\n" |
3663 | "__DEVICE__ int fpclassify(double);\n" |
3664 | "__DEVICE__ int fpclassify(float);\n" |
3665 | "__DEVICE__ double frexp(double, int *);\n" |
3666 | "__DEVICE__ float frexp(float, int *);\n" |
3667 | "__DEVICE__ double hypot(double, double);\n" |
3668 | "__DEVICE__ float hypot(float, float);\n" |
3669 | "__DEVICE__ int ilogb(double);\n" |
3670 | "__DEVICE__ int ilogb(float);\n" |
3671 | "__DEVICE__ bool isfinite(double);\n" |
3672 | "__DEVICE__ bool isfinite(float);\n" |
3673 | "__DEVICE__ bool isgreater(double, double);\n" |
3674 | "__DEVICE__ bool isgreaterequal(double, double);\n" |
3675 | "__DEVICE__ bool isgreaterequal(float, float);\n" |
3676 | "__DEVICE__ bool isgreater(float, float);\n" |
3677 | "__DEVICE__ bool isinf(double);\n" |
3678 | "__DEVICE__ bool isinf(float);\n" |
3679 | "__DEVICE__ bool isless(double, double);\n" |
3680 | "__DEVICE__ bool islessequal(double, double);\n" |
3681 | "__DEVICE__ bool islessequal(float, float);\n" |
3682 | "__DEVICE__ bool isless(float, float);\n" |
3683 | "__DEVICE__ bool islessgreater(double, double);\n" |
3684 | "__DEVICE__ bool islessgreater(float, float);\n" |
3685 | "__DEVICE__ bool isnan(double);\n" |
3686 | "__DEVICE__ bool isnan(float);\n" |
3687 | "__DEVICE__ bool isnormal(double);\n" |
3688 | "__DEVICE__ bool isnormal(float);\n" |
3689 | "__DEVICE__ bool isunordered(double, double);\n" |
3690 | "__DEVICE__ bool isunordered(float, float);\n" |
3691 | "__DEVICE__ long labs(long);\n" |
3692 | "__DEVICE__ double ldexp(double, int);\n" |
3693 | "__DEVICE__ float ldexp(float, int);\n" |
3694 | "__DEVICE__ double lgamma(double);\n" |
3695 | "__DEVICE__ float lgamma(float);\n" |
3696 | "__DEVICE__ long long llabs(long long);\n" |
3697 | "__DEVICE__ long long llrint(double);\n" |
3698 | "__DEVICE__ long long llrint(float);\n" |
3699 | "__DEVICE__ double log10(double);\n" |
3700 | "__DEVICE__ float log10(float);\n" |
3701 | "__DEVICE__ double log1p(double);\n" |
3702 | "__DEVICE__ float log1p(float);\n" |
3703 | "__DEVICE__ double log2(double);\n" |
3704 | "__DEVICE__ float log2(float);\n" |
3705 | "__DEVICE__ double logb(double);\n" |
3706 | "__DEVICE__ float logb(float);\n" |
3707 | "__DEVICE__ double log(double);\n" |
3708 | "__DEVICE__ float log(float);\n" |
3709 | "__DEVICE__ long lrint(double);\n" |
3710 | "__DEVICE__ long lrint(float);\n" |
3711 | "__DEVICE__ long lround(double);\n" |
3712 | "__DEVICE__ long lround(float);\n" |
3713 | "__DEVICE__ long long llround(float); // No llround(double).\n" |
3714 | "__DEVICE__ double modf(double, double *);\n" |
3715 | "__DEVICE__ float modf(float, float *);\n" |
3716 | "__DEVICE__ double nan(const char *);\n" |
3717 | "__DEVICE__ float nanf(const char *);\n" |
3718 | "__DEVICE__ double nearbyint(double);\n" |
3719 | "__DEVICE__ float nearbyint(float);\n" |
3720 | "__DEVICE__ double nextafter(double, double);\n" |
3721 | "__DEVICE__ float nextafter(float, float);\n" |
3722 | "__DEVICE__ double pow(double, double);\n" |
3723 | "__DEVICE__ double pow(double, int);\n" |
3724 | "__DEVICE__ float pow(float, float);\n" |
3725 | "__DEVICE__ float pow(float, int);\n" |
3726 | "__DEVICE__ double remainder(double, double);\n" |
3727 | "__DEVICE__ float remainder(float, float);\n" |
3728 | "__DEVICE__ double remquo(double, double, int *);\n" |
3729 | "__DEVICE__ float remquo(float, float, int *);\n" |
3730 | "__DEVICE__ double rint(double);\n" |
3731 | "__DEVICE__ float rint(float);\n" |
3732 | "__DEVICE__ double round(double);\n" |
3733 | "__DEVICE__ float round(float);\n" |
3734 | "__DEVICE__ double scalbln(double, long);\n" |
3735 | "__DEVICE__ float scalbln(float, long);\n" |
3736 | "__DEVICE__ double scalbn(double, int);\n" |
3737 | "__DEVICE__ float scalbn(float, int);\n" |
3738 | "__DEVICE__ bool signbit(double);\n" |
3739 | "__DEVICE__ bool signbit(float);\n" |
3740 | "__DEVICE__ double sin(double);\n" |
3741 | "__DEVICE__ float sin(float);\n" |
3742 | "__DEVICE__ double sinh(double);\n" |
3743 | "__DEVICE__ float sinh(float);\n" |
3744 | "__DEVICE__ double sqrt(double);\n" |
3745 | "__DEVICE__ float sqrt(float);\n" |
3746 | "__DEVICE__ double tan(double);\n" |
3747 | "__DEVICE__ float tan(float);\n" |
3748 | "__DEVICE__ double tanh(double);\n" |
3749 | "__DEVICE__ float tanh(float);\n" |
3750 | "__DEVICE__ double tgamma(double);\n" |
3751 | "__DEVICE__ float tgamma(float);\n" |
3752 | "__DEVICE__ double trunc(double);\n" |
3753 | "__DEVICE__ float trunc(float);\n" |
3754 | "\n" |
3755 | "// Notably missing above is nexttoward, which we don't define on\n" |
3756 | "// the device side because libdevice doesn't give us an implementation, and we\n" |
3757 | "// don't want to be in the business of writing one ourselves.\n" |
3758 | "\n" |
3759 | "// We need to define these overloads in exactly the namespace our standard\n" |
3760 | "// library uses (including the right inline namespace), otherwise they won't be\n" |
3761 | "// picked up by other functions in the standard library (e.g. functions in\n" |
3762 | "// <complex>). Thus the ugliness below.\n" |
3763 | "#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n" |
3764 | "_LIBCPP_BEGIN_NAMESPACE_STD\n" |
3765 | "#else\n" |
3766 | "namespace std {\n" |
3767 | "#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
3768 | "_GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
3769 | "#endif\n" |
3770 | "#endif\n" |
3771 | "\n" |
3772 | "using ::abs;\n" |
3773 | "using ::acos;\n" |
3774 | "using ::acosh;\n" |
3775 | "using ::asin;\n" |
3776 | "using ::asinh;\n" |
3777 | "using ::atan;\n" |
3778 | "using ::atan2;\n" |
3779 | "using ::atanh;\n" |
3780 | "using ::cbrt;\n" |
3781 | "using ::ceil;\n" |
3782 | "using ::copysign;\n" |
3783 | "using ::cos;\n" |
3784 | "using ::cosh;\n" |
3785 | "using ::erf;\n" |
3786 | "using ::erfc;\n" |
3787 | "using ::exp;\n" |
3788 | "using ::exp2;\n" |
3789 | "using ::expm1;\n" |
3790 | "using ::fabs;\n" |
3791 | "using ::fdim;\n" |
3792 | "using ::floor;\n" |
3793 | "using ::fma;\n" |
3794 | "using ::fmax;\n" |
3795 | "using ::fmin;\n" |
3796 | "using ::fmod;\n" |
3797 | "using ::fpclassify;\n" |
3798 | "using ::frexp;\n" |
3799 | "using ::hypot;\n" |
3800 | "using ::ilogb;\n" |
3801 | "using ::isfinite;\n" |
3802 | "using ::isgreater;\n" |
3803 | "using ::isgreaterequal;\n" |
3804 | "using ::isinf;\n" |
3805 | "using ::isless;\n" |
3806 | "using ::islessequal;\n" |
3807 | "using ::islessgreater;\n" |
3808 | "using ::isnan;\n" |
3809 | "using ::isnormal;\n" |
3810 | "using ::isunordered;\n" |
3811 | "using ::labs;\n" |
3812 | "using ::ldexp;\n" |
3813 | "using ::lgamma;\n" |
3814 | "using ::llabs;\n" |
3815 | "using ::llrint;\n" |
3816 | "using ::log;\n" |
3817 | "using ::log10;\n" |
3818 | "using ::log1p;\n" |
3819 | "using ::log2;\n" |
3820 | "using ::logb;\n" |
3821 | "using ::lrint;\n" |
3822 | "using ::lround;\n" |
3823 | "using ::llround;\n" |
3824 | "using ::modf;\n" |
3825 | "using ::nan;\n" |
3826 | "using ::nanf;\n" |
3827 | "using ::nearbyint;\n" |
3828 | "using ::nextafter;\n" |
3829 | "using ::pow;\n" |
3830 | "using ::remainder;\n" |
3831 | "using ::remquo;\n" |
3832 | "using ::rint;\n" |
3833 | "using ::round;\n" |
3834 | "using ::scalbln;\n" |
3835 | "using ::scalbn;\n" |
3836 | "using ::signbit;\n" |
3837 | "using ::sin;\n" |
3838 | "using ::sinh;\n" |
3839 | "using ::sqrt;\n" |
3840 | "using ::tan;\n" |
3841 | "using ::tanh;\n" |
3842 | "using ::tgamma;\n" |
3843 | "using ::trunc;\n" |
3844 | "\n" |
3845 | "#ifdef _LIBCPP_END_NAMESPACE_STD\n" |
3846 | "_LIBCPP_END_NAMESPACE_STD\n" |
3847 | "#else\n" |
3848 | "#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
3849 | "_GLIBCXX_END_NAMESPACE_VERSION\n" |
3850 | "#endif\n" |
3851 | "} // namespace std\n" |
3852 | "#endif\n" |
3853 | "\n" |
3854 | "#pragma pop_macro(\"__DEVICE__\")\n" |
3855 | "\n" |
3856 | "#endif\n" |
3857 | "" } , |
3858 | { "/builtins/__clang_cuda_runtime_wrapper.h" , "/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===\n" |
3859 | " *\n" |
3860 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
3861 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
3862 | " * in the Software without restriction, including without limitation the rights\n" |
3863 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
3864 | " * copies of the Software, and to permit persons to whom the Software is\n" |
3865 | " * furnished to do so, subject to the following conditions:\n" |
3866 | " *\n" |
3867 | " * The above copyright notice and this permission notice shall be included in\n" |
3868 | " * all copies or substantial portions of the Software.\n" |
3869 | " *\n" |
3870 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
3871 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
3872 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
3873 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
3874 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
3875 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
3876 | " * THE SOFTWARE.\n" |
3877 | " *\n" |
3878 | " *===-----------------------------------------------------------------------===\n" |
3879 | " */\n" |
3880 | "\n" |
3881 | "/*\n" |
3882 | " * WARNING: This header is intended to be directly -include'd by\n" |
3883 | " * the compiler and is not supposed to be included by users.\n" |
3884 | " *\n" |
3885 | " * CUDA headers are implemented in a way that currently makes it\n" |
3886 | " * impossible for user code to #include directly when compiling with\n" |
3887 | " * Clang. They present different view of CUDA-supplied functions\n" |
3888 | " * depending on where in NVCC's compilation pipeline the headers are\n" |
3889 | " * included. Neither of these modes provides function definitions with\n" |
3890 | " * correct attributes, so we use preprocessor to force the headers\n" |
3891 | " * into a form that Clang can use.\n" |
3892 | " *\n" |
3893 | " * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's\n" |
3894 | " * this file during every CUDA compilation.\n" |
3895 | " */\n" |
3896 | "\n" |
3897 | "#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__\n" |
3898 | "#define __CLANG_CUDA_RUNTIME_WRAPPER_H__\n" |
3899 | "\n" |
3900 | "#if defined(__CUDA__) && defined(__clang__)\n" |
3901 | "\n" |
3902 | "// Include some forward declares that must come before cmath.\n" |
3903 | "#include <__clang_cuda_math_forward_declares.h>\n" |
3904 | "\n" |
3905 | "// Include some standard headers to avoid CUDA headers including them\n" |
3906 | "// while some required macros (like __THROW) are in a weird state.\n" |
3907 | "#include <cmath>\n" |
3908 | "#include <cstdlib>\n" |
3909 | "#include <stdlib.h>\n" |
3910 | "\n" |
3911 | "// Preserve common macros that will be changed below by us or by CUDA\n" |
3912 | "// headers.\n" |
3913 | "#pragma push_macro(\"__THROW\")\n" |
3914 | "#pragma push_macro(\"__CUDA_ARCH__\")\n" |
3915 | "\n" |
3916 | "// WARNING: Preprocessor hacks below are based on specific details of\n" |
3917 | "// CUDA-7.x headers and are not expected to work with any other\n" |
3918 | "// version of CUDA headers.\n" |
3919 | "#include \"cuda.h\"\n" |
3920 | "#if !defined(CUDA_VERSION)\n" |
3921 | "#error \"cuda.h did not define CUDA_VERSION\"\n" |
3922 | "#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000\n" |
3923 | "#error \"Unsupported CUDA version!\"\n" |
3924 | "#endif\n" |
3925 | "\n" |
3926 | "#pragma push_macro(\"__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__\")\n" |
3927 | "#if CUDA_VERSION >= 10000\n" |
3928 | "#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__\n" |
3929 | "#endif\n" |
3930 | "\n" |
3931 | "// Make largest subset of device functions available during host\n" |
3932 | "// compilation -- SM_35 for the time being.\n" |
3933 | "#ifndef __CUDA_ARCH__\n" |
3934 | "#define __CUDA_ARCH__ 350\n" |
3935 | "#endif\n" |
3936 | "\n" |
3937 | "#include \"__clang_cuda_builtin_vars.h\"\n" |
3938 | "\n" |
3939 | "// No need for device_launch_parameters.h as __clang_cuda_builtin_vars.h above\n" |
3940 | "// has taken care of builtin variables declared in the file.\n" |
3941 | "#define __DEVICE_LAUNCH_PARAMETERS_H__\n" |
3942 | "\n" |
3943 | "// {math,device}_functions.h only have declarations of the\n" |
3944 | "// functions. We don't need them as we're going to pull in their\n" |
3945 | "// definitions from .hpp files.\n" |
3946 | "#define __DEVICE_FUNCTIONS_H__\n" |
3947 | "#define __MATH_FUNCTIONS_H__\n" |
3948 | "#define __COMMON_FUNCTIONS_H__\n" |
3949 | "// device_functions_decls is replaced by __clang_cuda_device_functions.h\n" |
3950 | "// included below.\n" |
3951 | "#define __DEVICE_FUNCTIONS_DECLS_H__\n" |
3952 | "\n" |
3953 | "#undef __CUDACC__\n" |
3954 | "#if CUDA_VERSION < 9000\n" |
3955 | "#define __CUDABE__\n" |
3956 | "#else\n" |
3957 | "#define __CUDA_LIBDEVICE__\n" |
3958 | "#endif\n" |
3959 | "// Disables definitions of device-side runtime support stubs in\n" |
3960 | "// cuda_device_runtime_api.h\n" |
3961 | "#include \"driver_types.h\"\n" |
3962 | "#include \"host_config.h\"\n" |
3963 | "#include \"host_defines.h\"\n" |
3964 | "\n" |
3965 | "// Temporarily replace \"nv_weak\" with weak, so __attribute__((nv_weak)) in\n" |
3966 | "// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the\n" |
3967 | "// functional equivalent of what we need.\n" |
3968 | "#pragma push_macro(\"nv_weak\")\n" |
3969 | "#define nv_weak weak\n" |
3970 | "#undef __CUDABE__\n" |
3971 | "#undef __CUDA_LIBDEVICE__\n" |
3972 | "#define __CUDACC__\n" |
3973 | "#include \"cuda_runtime.h\"\n" |
3974 | "\n" |
3975 | "#pragma pop_macro(\"nv_weak\")\n" |
3976 | "#undef __CUDACC__\n" |
3977 | "#define __CUDABE__\n" |
3978 | "\n" |
3979 | "// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does\n" |
3980 | "// not have at the moment. Emulate them with a builtin memcpy/memset.\n" |
3981 | "#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)\n" |
3982 | "#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)\n" |
3983 | "\n" |
3984 | "#if CUDA_VERSION < 9000\n" |
3985 | "#include \"crt/device_runtime.h\"\n" |
3986 | "#endif\n" |
3987 | "#include \"crt/host_runtime.h\"\n" |
3988 | "// device_runtime.h defines __cxa_* macros that will conflict with\n" |
3989 | "// cxxabi.h.\n" |
3990 | "// FIXME: redefine these as __device__ functions.\n" |
3991 | "#undef __cxa_vec_ctor\n" |
3992 | "#undef __cxa_vec_cctor\n" |
3993 | "#undef __cxa_vec_dtor\n" |
3994 | "#undef __cxa_vec_new\n" |
3995 | "#undef __cxa_vec_new2\n" |
3996 | "#undef __cxa_vec_new3\n" |
3997 | "#undef __cxa_vec_delete2\n" |
3998 | "#undef __cxa_vec_delete\n" |
3999 | "#undef __cxa_vec_delete3\n" |
4000 | "#undef __cxa_pure_virtual\n" |
4001 | "\n" |
4002 | "// math_functions.hpp expects this host function be defined on MacOS, but it\n" |
4003 | "// ends up not being there because of the games we play here. Just define it\n" |
4004 | "// ourselves; it's simple enough.\n" |
4005 | "#ifdef __APPLE__\n" |
4006 | "inline __host__ double __signbitd(double x) {\n" |
4007 | " return std::signbit(x);\n" |
4008 | "}\n" |
4009 | "#endif\n" |
4010 | "\n" |
4011 | "// CUDA 9.1 no longer provides declarations for libdevice functions, so we need\n" |
4012 | "// to provide our own.\n" |
4013 | "#include <__clang_cuda_libdevice_declares.h>\n" |
4014 | "\n" |
4015 | "// Wrappers for many device-side standard library functions became compiler\n" |
4016 | "// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now\n" |
4017 | "// provides its own implementation of the wrappers.\n" |
4018 | "#if CUDA_VERSION >= 9000\n" |
4019 | "#include <__clang_cuda_device_functions.h>\n" |
4020 | "#endif\n" |
4021 | "\n" |
4022 | "// __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's\n" |
4023 | "// counterpart does not do it, so we need to make it empty here to keep\n" |
4024 | "// following CUDA includes happy.\n" |
4025 | "#undef __THROW\n" |
4026 | "#define __THROW\n" |
4027 | "\n" |
4028 | "// CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.\n" |
4029 | "// Previous versions used to check whether they are defined or not.\n" |
4030 | "// CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it\n" |
4031 | "// here to detect the switch.\n" |
4032 | "\n" |
4033 | "#if defined(CU_DEVICE_INVALID)\n" |
4034 | "#if !defined(__USE_FAST_MATH__)\n" |
4035 | "#define __USE_FAST_MATH__ 0\n" |
4036 | "#endif\n" |
4037 | "\n" |
4038 | "#if !defined(__CUDA_PREC_DIV)\n" |
4039 | "#define __CUDA_PREC_DIV 0\n" |
4040 | "#endif\n" |
4041 | "#endif\n" |
4042 | "\n" |
4043 | "// Temporarily poison __host__ macro to ensure it's not used by any of\n" |
4044 | "// the headers we're about to include.\n" |
4045 | "#pragma push_macro(\"__host__\")\n" |
4046 | "#define __host__ UNEXPECTED_HOST_ATTRIBUTE\n" |
4047 | "\n" |
4048 | "// device_functions.hpp and math_functions*.hpp use 'static\n" |
4049 | "// __forceinline__' (with no __device__) for definitions of device\n" |
4050 | "// functions. Temporarily redefine __forceinline__ to include\n" |
4051 | "// __device__.\n" |
4052 | "#pragma push_macro(\"__forceinline__\")\n" |
4053 | "#define __forceinline__ __device__ __inline__ __attribute__((always_inline))\n" |
4054 | "#if CUDA_VERSION < 9000\n" |
4055 | "#include \"device_functions.hpp\"\n" |
4056 | "#endif\n" |
4057 | "\n" |
4058 | "// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we\n" |
4059 | "// get the slow-but-accurate or fast-but-inaccurate versions of functions like\n" |
4060 | "// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.\n" |
4061 | "//\n" |
4062 | "// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.\n" |
4063 | "// slow divides), so we need to scope our define carefully here.\n" |
4064 | "#pragma push_macro(\"__USE_FAST_MATH__\")\n" |
4065 | "#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n" |
4066 | "#define __USE_FAST_MATH__ 1\n" |
4067 | "#endif\n" |
4068 | "\n" |
4069 | "#if CUDA_VERSION >= 9000\n" |
4070 | "// CUDA-9.2 needs host-side memcpy for some host functions in\n" |
4071 | "// device_functions.hpp\n" |
4072 | "#if CUDA_VERSION >= 9020\n" |
4073 | "#include <string.h>\n" |
4074 | "#endif\n" |
4075 | "#include \"crt/math_functions.hpp\"\n" |
4076 | "#else\n" |
4077 | "#include \"math_functions.hpp\"\n" |
4078 | "#endif\n" |
4079 | "\n" |
4080 | "#pragma pop_macro(\"__USE_FAST_MATH__\")\n" |
4081 | "\n" |
4082 | "#if CUDA_VERSION < 9000\n" |
4083 | "#include \"math_functions_dbl_ptx3.hpp\"\n" |
4084 | "#endif\n" |
4085 | "#pragma pop_macro(\"__forceinline__\")\n" |
4086 | "\n" |
4087 | "// Pull in host-only functions that are only available when neither\n" |
4088 | "// __CUDACC__ nor __CUDABE__ are defined.\n" |
4089 | "#undef __MATH_FUNCTIONS_HPP__\n" |
4090 | "#undef __CUDABE__\n" |
4091 | "#if CUDA_VERSION < 9000\n" |
4092 | "#include \"math_functions.hpp\"\n" |
4093 | "#endif\n" |
4094 | "// Alas, additional overloads for these functions are hard to get to.\n" |
4095 | "// Considering that we only need these overloads for a few functions,\n" |
4096 | "// we can provide them here.\n" |
4097 | "static inline float rsqrt(float __a) { return rsqrtf(__a); }\n" |
4098 | "static inline float rcbrt(float __a) { return rcbrtf(__a); }\n" |
4099 | "static inline float sinpi(float __a) { return sinpif(__a); }\n" |
4100 | "static inline float cospi(float __a) { return cospif(__a); }\n" |
4101 | "static inline void sincospi(float __a, float *__b, float *__c) {\n" |
4102 | " return sincospif(__a, __b, __c);\n" |
4103 | "}\n" |
4104 | "static inline float erfcinv(float __a) { return erfcinvf(__a); }\n" |
4105 | "static inline float normcdfinv(float __a) { return normcdfinvf(__a); }\n" |
4106 | "static inline float normcdf(float __a) { return normcdff(__a); }\n" |
4107 | "static inline float erfcx(float __a) { return erfcxf(__a); }\n" |
4108 | "\n" |
4109 | "#if CUDA_VERSION < 9000\n" |
4110 | "// For some reason single-argument variant is not always declared by\n" |
4111 | "// CUDA headers. Alas, device_functions.hpp included below needs it.\n" |
4112 | "static inline __device__ void __brkpt(int __c) { __brkpt(); }\n" |
4113 | "#endif\n" |
4114 | "\n" |
4115 | "// Now include *.hpp with definitions of various GPU functions. Alas,\n" |
4116 | "// a lot of thins get declared/defined with __host__ attribute which\n" |
4117 | "// we don't want and we have to define it out. We also have to include\n" |
4118 | "// {device,math}_functions.hpp again in order to extract the other\n" |
4119 | "// branch of #if/else inside.\n" |
4120 | "#define __host__\n" |
4121 | "#undef __CUDABE__\n" |
4122 | "#define __CUDACC__\n" |
4123 | "#if CUDA_VERSION >= 9000\n" |
4124 | "// Some atomic functions became compiler builtins in CUDA-9 , so we need their\n" |
4125 | "// declarations.\n" |
4126 | "#include \"device_atomic_functions.h\"\n" |
4127 | "#endif\n" |
4128 | "#undef __DEVICE_FUNCTIONS_HPP__\n" |
4129 | "#include \"device_atomic_functions.hpp\"\n" |
4130 | "#if CUDA_VERSION >= 9000\n" |
4131 | "#include \"crt/device_functions.hpp\"\n" |
4132 | "#include \"crt/device_double_functions.hpp\"\n" |
4133 | "#else\n" |
4134 | "#include \"device_functions.hpp\"\n" |
4135 | "#define __CUDABE__\n" |
4136 | "#include \"device_double_functions.h\"\n" |
4137 | "#undef __CUDABE__\n" |
4138 | "#endif\n" |
4139 | "#include \"sm_20_atomic_functions.hpp\"\n" |
4140 | "#include \"sm_20_intrinsics.hpp\"\n" |
4141 | "#include \"sm_32_atomic_functions.hpp\"\n" |
4142 | "\n" |
4143 | "// Don't include sm_30_intrinsics.h and sm_32_intrinsics.h. These define the\n" |
4144 | "// __shfl and __ldg intrinsics using inline (volatile) asm, but we want to\n" |
4145 | "// define them using builtins so that the optimizer can reason about and across\n" |
4146 | "// these instructions. In particular, using intrinsics for ldg gets us the\n" |
4147 | "// [addr+imm] addressing mode, which, although it doesn't actually exist in the\n" |
4148 | "// hardware, seems to generate faster machine code because ptxas can more easily\n" |
4149 | "// reason about our code.\n" |
4150 | "\n" |
4151 | "#if CUDA_VERSION >= 8000\n" |
4152 | "#pragma push_macro(\"__CUDA_ARCH__\")\n" |
4153 | "#undef __CUDA_ARCH__\n" |
4154 | "#include \"sm_60_atomic_functions.hpp\"\n" |
4155 | "#include \"sm_61_intrinsics.hpp\"\n" |
4156 | "#pragma pop_macro(\"__CUDA_ARCH__\")\n" |
4157 | "#endif\n" |
4158 | "\n" |
4159 | "#undef __MATH_FUNCTIONS_HPP__\n" |
4160 | "\n" |
4161 | "// math_functions.hpp defines ::signbit as a __host__ __device__ function. This\n" |
4162 | "// conflicts with libstdc++'s constexpr ::signbit, so we have to rename\n" |
4163 | "// math_function.hpp's ::signbit. It's guarded by #undef signbit, but that's\n" |
4164 | "// conditional on __GNUC__. :)\n" |
4165 | "#pragma push_macro(\"signbit\")\n" |
4166 | "#pragma push_macro(\"__GNUC__\")\n" |
4167 | "#undef __GNUC__\n" |
4168 | "#define signbit __ignored_cuda_signbit\n" |
4169 | "\n" |
4170 | "// CUDA-9 omits device-side definitions of some math functions if it sees\n" |
4171 | "// include guard from math.h wrapper from libstdc++. We have to undo the header\n" |
4172 | "// guard temporarily to get the definitions we need.\n" |
4173 | "#pragma push_macro(\"_GLIBCXX_MATH_H\")\n" |
4174 | "#pragma push_macro(\"_LIBCPP_VERSION\")\n" |
4175 | "#if CUDA_VERSION >= 9000\n" |
4176 | "#undef _GLIBCXX_MATH_H\n" |
4177 | "// We also need to undo another guard that checks for libc++ 3.8+\n" |
4178 | "#ifdef _LIBCPP_VERSION\n" |
4179 | "#define _LIBCPP_VERSION 3700\n" |
4180 | "#endif\n" |
4181 | "#endif\n" |
4182 | "\n" |
4183 | "#if CUDA_VERSION >= 9000\n" |
4184 | "#include \"crt/math_functions.hpp\"\n" |
4185 | "#else\n" |
4186 | "#include \"math_functions.hpp\"\n" |
4187 | "#endif\n" |
4188 | "#pragma pop_macro(\"_GLIBCXX_MATH_H\")\n" |
4189 | "#pragma pop_macro(\"_LIBCPP_VERSION\")\n" |
4190 | "#pragma pop_macro(\"__GNUC__\")\n" |
4191 | "#pragma pop_macro(\"signbit\")\n" |
4192 | "\n" |
4193 | "#pragma pop_macro(\"__host__\")\n" |
4194 | "\n" |
4195 | "#include \"texture_indirect_functions.h\"\n" |
4196 | "\n" |
4197 | "// Restore state of __CUDA_ARCH__ and __THROW we had on entry.\n" |
4198 | "#pragma pop_macro(\"__CUDA_ARCH__\")\n" |
4199 | "#pragma pop_macro(\"__THROW\")\n" |
4200 | "\n" |
4201 | "// Set up compiler macros expected to be seen during compilation.\n" |
4202 | "#undef __CUDABE__\n" |
4203 | "#define __CUDACC__\n" |
4204 | "\n" |
4205 | "extern \"C\" {\n" |
4206 | "// Device-side CUDA system calls.\n" |
4207 | "// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls\n" |
4208 | "// We need these declarations and wrappers for device-side\n" |
4209 | "// malloc/free/printf calls to work without relying on\n" |
4210 | "// -fcuda-disable-target-call-checks option.\n" |
4211 | "__device__ int vprintf(const char *, const char *);\n" |
4212 | "__device__ void free(void *) __attribute((nothrow));\n" |
4213 | "__device__ void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc));\n" |
4214 | "__device__ void __assertfail(const char *__message, const char *__file,\n" |
4215 | " unsigned __line, const char *__function,\n" |
4216 | " size_t __charSize) __attribute__((noreturn));\n" |
4217 | "\n" |
4218 | "// In order for standard assert() macro on linux to work we need to\n" |
4219 | "// provide device-side __assert_fail()\n" |
4220 | "__device__ static inline void __assert_fail(const char *__message,\n" |
4221 | " const char *__file, unsigned __line,\n" |
4222 | " const char *__function) {\n" |
4223 | " __assertfail(__message, __file, __line, __function, sizeof(char));\n" |
4224 | "}\n" |
4225 | "\n" |
4226 | "// Clang will convert printf into vprintf, but we still need\n" |
4227 | "// device-side declaration for it.\n" |
4228 | "__device__ int printf(const char *, ...);\n" |
4229 | "} // extern \"C\"\n" |
4230 | "\n" |
4231 | "// We also need device-side std::malloc and std::free.\n" |
4232 | "namespace std {\n" |
4233 | "__device__ static inline void free(void *__ptr) { ::free(__ptr); }\n" |
4234 | "__device__ static inline void *malloc(size_t __size) {\n" |
4235 | " return ::malloc(__size);\n" |
4236 | "}\n" |
4237 | "} // namespace std\n" |
4238 | "\n" |
4239 | "// Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to\n" |
4240 | "// come after we've pulled in the definition of uint3 and dim3.\n" |
4241 | "\n" |
4242 | "__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {\n" |
4243 | " uint3 ret;\n" |
4244 | " ret.x = x;\n" |
4245 | " ret.y = y;\n" |
4246 | " ret.z = z;\n" |
4247 | " return ret;\n" |
4248 | "}\n" |
4249 | "\n" |
4250 | "__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {\n" |
4251 | " uint3 ret;\n" |
4252 | " ret.x = x;\n" |
4253 | " ret.y = y;\n" |
4254 | " ret.z = z;\n" |
4255 | " return ret;\n" |
4256 | "}\n" |
4257 | "\n" |
4258 | "__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {\n" |
4259 | " return dim3(x, y, z);\n" |
4260 | "}\n" |
4261 | "\n" |
4262 | "__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {\n" |
4263 | " return dim3(x, y, z);\n" |
4264 | "}\n" |
4265 | "\n" |
4266 | "#include <__clang_cuda_cmath.h>\n" |
4267 | "#include <__clang_cuda_intrinsics.h>\n" |
4268 | "#include <__clang_cuda_complex_builtins.h>\n" |
4269 | "\n" |
4270 | "// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host\n" |
4271 | "// mode, giving them their \"proper\" types of dim3 and uint3. This is\n" |
4272 | "// incompatible with the types we give in __clang_cuda_builtin_vars.h. As as\n" |
4273 | "// hack, force-include the header (nvcc doesn't include it by default) but\n" |
4274 | "// redefine dim3 and uint3 to our builtin types. (Thankfully dim3 and uint3 are\n" |
4275 | "// only used here for the redeclarations of blockDim and threadIdx.)\n" |
4276 | "#pragma push_macro(\"dim3\")\n" |
4277 | "#pragma push_macro(\"uint3\")\n" |
4278 | "#define dim3 __cuda_builtin_blockDim_t\n" |
4279 | "#define uint3 __cuda_builtin_threadIdx_t\n" |
4280 | "#include \"curand_mtgp32_kernel.h\"\n" |
4281 | "#pragma pop_macro(\"dim3\")\n" |
4282 | "#pragma pop_macro(\"uint3\")\n" |
4283 | "#pragma pop_macro(\"__USE_FAST_MATH__\")\n" |
4284 | "#pragma pop_macro(\"__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__\")\n" |
4285 | "\n" |
4286 | "#endif // __CUDA__\n" |
4287 | "#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__\n" |
4288 | "" } , |
4289 | { "/builtins/__stddef_max_align_t.h" , "/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===\n" |
4290 | " *\n" |
4291 | " * Copyright (c) 2014 Chandler Carruth\n" |
4292 | " *\n" |
4293 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
4294 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
4295 | " * in the Software without restriction, including without limitation the rights\n" |
4296 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
4297 | " * copies of the Software, and to permit persons to whom the Software is\n" |
4298 | " * furnished to do so, subject to the following conditions:\n" |
4299 | " *\n" |
4300 | " * The above copyright notice and this permission notice shall be included in\n" |
4301 | " * all copies or substantial portions of the Software.\n" |
4302 | " *\n" |
4303 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
4304 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
4305 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
4306 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
4307 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
4308 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
4309 | " * THE SOFTWARE.\n" |
4310 | " *\n" |
4311 | " *===-----------------------------------------------------------------------===\n" |
4312 | " */\n" |
4313 | "\n" |
4314 | "#ifndef __CLANG_MAX_ALIGN_T_DEFINED\n" |
4315 | "#define __CLANG_MAX_ALIGN_T_DEFINED\n" |
4316 | "\n" |
4317 | "#if defined(_MSC_VER)\n" |
4318 | "typedef double max_align_t;\n" |
4319 | "#elif defined(__APPLE__)\n" |
4320 | "typedef long double max_align_t;\n" |
4321 | "#else\n" |
4322 | "// Define 'max_align_t' to match the GCC definition.\n" |
4323 | "typedef struct {\n" |
4324 | " long long __clang_max_align_nonce1\n" |
4325 | " __attribute__((__aligned__(__alignof__(long long))));\n" |
4326 | " long double __clang_max_align_nonce2\n" |
4327 | " __attribute__((__aligned__(__alignof__(long double))));\n" |
4328 | "} max_align_t;\n" |
4329 | "#endif\n" |
4330 | "\n" |
4331 | "#endif\n" |
4332 | "" } , |
4333 | { "/builtins/__wmmintrin_aes.h" , "/*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------===\n" |
4334 | " *\n" |
4335 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
4336 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
4337 | " * in the Software without restriction, including without limitation the rights\n" |
4338 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
4339 | " * copies of the Software, and to permit persons to whom the Software is\n" |
4340 | " * furnished to do so, subject to the following conditions:\n" |
4341 | " *\n" |
4342 | " * The above copyright notice and this permission notice shall be included in\n" |
4343 | " * all copies or substantial portions of the Software.\n" |
4344 | " *\n" |
4345 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
4346 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
4347 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
4348 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
4349 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
4350 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
4351 | " * THE SOFTWARE.\n" |
4352 | " *\n" |
4353 | " *===-----------------------------------------------------------------------===\n" |
4354 | " */\n" |
4355 | "\n" |
4356 | "#ifndef __WMMINTRIN_H\n" |
4357 | "#error \"Never use <__wmmintrin_aes.h> directly; include <wmmintrin.h> instead.\"\n" |
4358 | "#endif\n" |
4359 | "\n" |
4360 | "#ifndef __WMMINTRIN_AES_H\n" |
4361 | "#define __WMMINTRIN_AES_H\n" |
4362 | "\n" |
4363 | "/* Define the default attributes for the functions in this file. */\n" |
4364 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"aes\"), __min_vector_width__(128)))\n" |
4365 | "\n" |
4366 | "/// Performs a single round of AES encryption using the Equivalent\n" |
4367 | "/// Inverse Cipher, transforming the state value from the first source\n" |
4368 | "/// operand using a 128-bit round key value contained in the second source\n" |
4369 | "/// operand, and writes the result to the destination.\n" |
4370 | "///\n" |
4371 | "/// \\headerfile <x86intrin.h>\n" |
4372 | "///\n" |
4373 | "/// This intrinsic corresponds to the <c> VAESENC </c> instruction.\n" |
4374 | "///\n" |
4375 | "/// \\param __V\n" |
4376 | "/// A 128-bit integer vector containing the state value.\n" |
4377 | "/// \\param __R\n" |
4378 | "/// A 128-bit integer vector containing the round key value.\n" |
4379 | "/// \\returns A 128-bit integer vector containing the encrypted value.\n" |
4380 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
4381 | "_mm_aesenc_si128(__m128i __V, __m128i __R)\n" |
4382 | "{\n" |
4383 | " return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R);\n" |
4384 | "}\n" |
4385 | "\n" |
4386 | "/// Performs the final round of AES encryption using the Equivalent\n" |
4387 | "/// Inverse Cipher, transforming the state value from the first source\n" |
4388 | "/// operand using a 128-bit round key value contained in the second source\n" |
4389 | "/// operand, and writes the result to the destination.\n" |
4390 | "///\n" |
4391 | "/// \\headerfile <x86intrin.h>\n" |
4392 | "///\n" |
4393 | "/// This intrinsic corresponds to the <c> VAESENCLAST </c> instruction.\n" |
4394 | "///\n" |
4395 | "/// \\param __V\n" |
4396 | "/// A 128-bit integer vector containing the state value.\n" |
4397 | "/// \\param __R\n" |
4398 | "/// A 128-bit integer vector containing the round key value.\n" |
4399 | "/// \\returns A 128-bit integer vector containing the encrypted value.\n" |
4400 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
4401 | "_mm_aesenclast_si128(__m128i __V, __m128i __R)\n" |
4402 | "{\n" |
4403 | " return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R);\n" |
4404 | "}\n" |
4405 | "\n" |
4406 | "/// Performs a single round of AES decryption using the Equivalent\n" |
4407 | "/// Inverse Cipher, transforming the state value from the first source\n" |
4408 | "/// operand using a 128-bit round key value contained in the second source\n" |
4409 | "/// operand, and writes the result to the destination.\n" |
4410 | "///\n" |
4411 | "/// \\headerfile <x86intrin.h>\n" |
4412 | "///\n" |
4413 | "/// This intrinsic corresponds to the <c> VAESDEC </c> instruction.\n" |
4414 | "///\n" |
4415 | "/// \\param __V\n" |
4416 | "/// A 128-bit integer vector containing the state value.\n" |
4417 | "/// \\param __R\n" |
4418 | "/// A 128-bit integer vector containing the round key value.\n" |
4419 | "/// \\returns A 128-bit integer vector containing the decrypted value.\n" |
4420 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
4421 | "_mm_aesdec_si128(__m128i __V, __m128i __R)\n" |
4422 | "{\n" |
4423 | " return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R);\n" |
4424 | "}\n" |
4425 | "\n" |
4426 | "/// Performs the final round of AES decryption using the Equivalent\n" |
4427 | "/// Inverse Cipher, transforming the state value from the first source\n" |
4428 | "/// operand using a 128-bit round key value contained in the second source\n" |
4429 | "/// operand, and writes the result to the destination.\n" |
4430 | "///\n" |
4431 | "/// \\headerfile <x86intrin.h>\n" |
4432 | "///\n" |
4433 | "/// This intrinsic corresponds to the <c> VAESDECLAST </c> instruction.\n" |
4434 | "///\n" |
4435 | "/// \\param __V\n" |
4436 | "/// A 128-bit integer vector containing the state value.\n" |
4437 | "/// \\param __R\n" |
4438 | "/// A 128-bit integer vector containing the round key value.\n" |
4439 | "/// \\returns A 128-bit integer vector containing the decrypted value.\n" |
4440 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
4441 | "_mm_aesdeclast_si128(__m128i __V, __m128i __R)\n" |
4442 | "{\n" |
4443 | " return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R);\n" |
4444 | "}\n" |
4445 | "\n" |
4446 | "/// Applies the AES InvMixColumns() transformation to an expanded key\n" |
4447 | "/// contained in the source operand, and writes the result to the\n" |
4448 | "/// destination.\n" |
4449 | "///\n" |
4450 | "/// \\headerfile <x86intrin.h>\n" |
4451 | "///\n" |
4452 | "/// This intrinsic corresponds to the <c> VAESIMC </c> instruction.\n" |
4453 | "///\n" |
4454 | "/// \\param __V\n" |
4455 | "/// A 128-bit integer vector containing the expanded key.\n" |
4456 | "/// \\returns A 128-bit integer vector containing the transformed value.\n" |
4457 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
4458 | "_mm_aesimc_si128(__m128i __V)\n" |
4459 | "{\n" |
4460 | " return (__m128i)__builtin_ia32_aesimc128((__v2di)__V);\n" |
4461 | "}\n" |
4462 | "\n" |
4463 | "/// Generates a round key for AES encryption, operating on 128-bit data\n" |
4464 | "/// specified in the first source operand and using an 8-bit round constant\n" |
4465 | "/// specified by the second source operand, and writes the result to the\n" |
4466 | "/// destination.\n" |
4467 | "///\n" |
4468 | "/// \\headerfile <x86intrin.h>\n" |
4469 | "///\n" |
4470 | "/// \\code\n" |
4471 | "/// __m128i _mm_aeskeygenassist_si128(__m128i C, const int R);\n" |
4472 | "/// \\endcode\n" |
4473 | "///\n" |
4474 | "/// This intrinsic corresponds to the <c> AESKEYGENASSIST </c> instruction.\n" |
4475 | "///\n" |
4476 | "/// \\param C\n" |
4477 | "/// A 128-bit integer vector that is used to generate the AES encryption key.\n" |
4478 | "/// \\param R\n" |
4479 | "/// An 8-bit round constant used to generate the AES encryption key.\n" |
4480 | "/// \\returns A 128-bit round key for AES encryption.\n" |
4481 | "#define _mm_aeskeygenassist_si128(C, R) \\\n" |
4482 | " (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))\n" |
4483 | "\n" |
4484 | "#undef __DEFAULT_FN_ATTRS\n" |
4485 | "\n" |
4486 | "#endif /* __WMMINTRIN_AES_H */\n" |
4487 | "" } , |
4488 | { "/builtins/__wmmintrin_pclmul.h" , "/*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------===\n" |
4489 | " *\n" |
4490 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
4491 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
4492 | " * in the Software without restriction, including without limitation the rights\n" |
4493 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
4494 | " * copies of the Software, and to permit persons to whom the Software is\n" |
4495 | " * furnished to do so, subject to the following conditions:\n" |
4496 | " *\n" |
4497 | " * The above copyright notice and this permission notice shall be included in\n" |
4498 | " * all copies or substantial portions of the Software.\n" |
4499 | " *\n" |
4500 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
4501 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
4502 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
4503 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
4504 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
4505 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
4506 | " * THE SOFTWARE.\n" |
4507 | " *\n" |
4508 | " *===-----------------------------------------------------------------------===\n" |
4509 | " */\n" |
4510 | "\n" |
4511 | "#ifndef __WMMINTRIN_H\n" |
4512 | "#error \"Never use <__wmmintrin_pclmul.h> directly; include <wmmintrin.h> instead.\"\n" |
4513 | "#endif\n" |
4514 | "\n" |
4515 | "#ifndef __WMMINTRIN_PCLMUL_H\n" |
4516 | "#define __WMMINTRIN_PCLMUL_H\n" |
4517 | "\n" |
4518 | "/// Multiplies two 64-bit integer values, which are selected from source\n" |
4519 | "/// operands using the immediate-value operand. The multiplication is a\n" |
4520 | "/// carry-less multiplication, and the 128-bit integer product is stored in\n" |
4521 | "/// the destination.\n" |
4522 | "///\n" |
4523 | "/// \\headerfile <x86intrin.h>\n" |
4524 | "///\n" |
4525 | "/// \\code\n" |
4526 | "/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);\n" |
4527 | "/// \\endcode\n" |
4528 | "///\n" |
4529 | "/// This intrinsic corresponds to the <c> VPCLMULQDQ </c> instruction.\n" |
4530 | "///\n" |
4531 | "/// \\param __X\n" |
4532 | "/// A 128-bit vector of [2 x i64] containing one of the source operands.\n" |
4533 | "/// \\param __Y\n" |
4534 | "/// A 128-bit vector of [2 x i64] containing one of the source operands.\n" |
4535 | "/// \\param __I\n" |
4536 | "/// An immediate value specifying which 64-bit values to select from the\n" |
4537 | "/// operands. Bit 0 is used to select a value from operand \\a __X, and bit\n" |
4538 | "/// 4 is used to select a value from operand \\a __Y: \\n\n" |
4539 | "/// Bit[0]=0 indicates that bits[63:0] of operand \\a __X are used. \\n\n" |
4540 | "/// Bit[0]=1 indicates that bits[127:64] of operand \\a __X are used. \\n\n" |
4541 | "/// Bit[4]=0 indicates that bits[63:0] of operand \\a __Y are used. \\n\n" |
4542 | "/// Bit[4]=1 indicates that bits[127:64] of operand \\a __Y are used.\n" |
4543 | "/// \\returns The 128-bit integer vector containing the result of the carry-less\n" |
4544 | "/// multiplication of the selected 64-bit values.\n" |
4545 | "#define _mm_clmulepi64_si128(X, Y, I) \\\n" |
4546 | " ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \\\n" |
4547 | " (__v2di)(__m128i)(Y), (char)(I)))\n" |
4548 | "\n" |
4549 | "#endif /* __WMMINTRIN_PCLMUL_H */\n" |
4550 | "" } , |
4551 | { "/builtins/adxintrin.h" , "/*===---- adxintrin.h - ADX intrinsics -------------------------------------===\n" |
4552 | " *\n" |
4553 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
4554 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
4555 | " * in the Software without restriction, including without limitation the rights\n" |
4556 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
4557 | " * copies of the Software, and to permit persons to whom the Software is\n" |
4558 | " * furnished to do so, subject to the following conditions:\n" |
4559 | " *\n" |
4560 | " * The above copyright notice and this permission notice shall be included in\n" |
4561 | " * all copies or substantial portions of the Software.\n" |
4562 | " *\n" |
4563 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
4564 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
4565 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
4566 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
4567 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
4568 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
4569 | " * THE SOFTWARE.\n" |
4570 | " *\n" |
4571 | " *===-----------------------------------------------------------------------===\n" |
4572 | " */\n" |
4573 | "\n" |
4574 | "#ifndef __IMMINTRIN_H\n" |
4575 | "#error \"Never use <adxintrin.h> directly; include <immintrin.h> instead.\"\n" |
4576 | "#endif\n" |
4577 | "\n" |
4578 | "#ifndef __ADXINTRIN_H\n" |
4579 | "#define __ADXINTRIN_H\n" |
4580 | "\n" |
4581 | "/* Define the default attributes for the functions in this file. */\n" |
4582 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n" |
4583 | "\n" |
4584 | "/* Intrinsics that are available only if __ADX__ defined */\n" |
4585 | "static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n" |
4586 | "_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n" |
4587 | " unsigned int *__p)\n" |
4588 | "{\n" |
4589 | " return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);\n" |
4590 | "}\n" |
4591 | "\n" |
4592 | "#ifdef __x86_64__\n" |
4593 | "static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n" |
4594 | "_addcarryx_u64(unsigned char __cf, unsigned long long __x,\n" |
4595 | " unsigned long long __y, unsigned long long *__p)\n" |
4596 | "{\n" |
4597 | " return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);\n" |
4598 | "}\n" |
4599 | "#endif\n" |
4600 | "\n" |
4601 | "/* Intrinsics that are also available if __ADX__ undefined */\n" |
4602 | "static __inline unsigned char __DEFAULT_FN_ATTRS\n" |
4603 | "_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n" |
4604 | " unsigned int *__p)\n" |
4605 | "{\n" |
4606 | " return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);\n" |
4607 | "}\n" |
4608 | "\n" |
4609 | "#ifdef __x86_64__\n" |
4610 | "static __inline unsigned char __DEFAULT_FN_ATTRS\n" |
4611 | "_addcarry_u64(unsigned char __cf, unsigned long long __x,\n" |
4612 | " unsigned long long __y, unsigned long long *__p)\n" |
4613 | "{\n" |
4614 | " return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);\n" |
4615 | "}\n" |
4616 | "#endif\n" |
4617 | "\n" |
4618 | "static __inline unsigned char __DEFAULT_FN_ATTRS\n" |
4619 | "_subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n" |
4620 | " unsigned int *__p)\n" |
4621 | "{\n" |
4622 | " return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);\n" |
4623 | "}\n" |
4624 | "\n" |
4625 | "#ifdef __x86_64__\n" |
4626 | "static __inline unsigned char __DEFAULT_FN_ATTRS\n" |
4627 | "_subborrow_u64(unsigned char __cf, unsigned long long __x,\n" |
4628 | " unsigned long long __y, unsigned long long *__p)\n" |
4629 | "{\n" |
4630 | " return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);\n" |
4631 | "}\n" |
4632 | "#endif\n" |
4633 | "\n" |
4634 | "#undef __DEFAULT_FN_ATTRS\n" |
4635 | "\n" |
4636 | "#endif /* __ADXINTRIN_H */\n" |
4637 | "" } , |
4638 | { "/builtins/ammintrin.h" , "/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===\n" |
4639 | " *\n" |
4640 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
4641 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
4642 | " * in the Software without restriction, including without limitation the rights\n" |
4643 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
4644 | " * copies of the Software, and to permit persons to whom the Software is\n" |
4645 | " * furnished to do so, subject to the following conditions:\n" |
4646 | " *\n" |
4647 | " * The above copyright notice and this permission notice shall be included in\n" |
4648 | " * all copies or substantial portions of the Software.\n" |
4649 | " *\n" |
4650 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
4651 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
4652 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
4653 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
4654 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
4655 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
4656 | " * THE SOFTWARE.\n" |
4657 | " *\n" |
4658 | " *===-----------------------------------------------------------------------===\n" |
4659 | " */\n" |
4660 | "\n" |
4661 | "#ifndef __AMMINTRIN_H\n" |
4662 | "#define __AMMINTRIN_H\n" |
4663 | "\n" |
4664 | "#include <pmmintrin.h>\n" |
4665 | "\n" |
4666 | "/* Define the default attributes for the functions in this file. */\n" |
4667 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4a\"), __min_vector_width__(128)))\n" |
4668 | "\n" |
4669 | "/// Extracts the specified bits from the lower 64 bits of the 128-bit\n" |
4670 | "/// integer vector operand at the index \\a idx and of the length \\a len.\n" |
4671 | "///\n" |
4672 | "/// \\headerfile <x86intrin.h>\n" |
4673 | "///\n" |
4674 | "/// \\code\n" |
4675 | "/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);\n" |
4676 | "/// \\endcode\n" |
4677 | "///\n" |
4678 | "/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n" |
4679 | "///\n" |
4680 | "/// \\param x\n" |
4681 | "/// The value from which bits are extracted.\n" |
4682 | "/// \\param len\n" |
4683 | "/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n" |
4684 | "/// are zero, the length is interpreted as 64.\n" |
4685 | "/// \\param idx\n" |
4686 | "/// Bits [5:0] specify the index of the least significant bit; the other\n" |
4687 | "/// bits are ignored. If the sum of the index and length is greater than 64,\n" |
4688 | "/// the result is undefined. If the length and index are both zero, bits\n" |
4689 | "/// [63:0] of parameter \\a x are extracted. If the length is zero but the\n" |
4690 | "/// index is non-zero, the result is undefined.\n" |
4691 | "/// \\returns A 128-bit integer vector whose lower 64 bits contain the bits\n" |
4692 | "/// extracted from the source operand.\n" |
4693 | "#define _mm_extracti_si64(x, len, idx) \\\n" |
4694 | " ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \\\n" |
4695 | " (char)(len), (char)(idx)))\n" |
4696 | "\n" |
4697 | "/// Extracts the specified bits from the lower 64 bits of the 128-bit\n" |
4698 | "/// integer vector operand at the index and of the length specified by\n" |
4699 | "/// \\a __y.\n" |
4700 | "///\n" |
4701 | "/// \\headerfile <x86intrin.h>\n" |
4702 | "///\n" |
4703 | "/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n" |
4704 | "///\n" |
4705 | "/// \\param __x\n" |
4706 | "/// The value from which bits are extracted.\n" |
4707 | "/// \\param __y\n" |
4708 | "/// Specifies the index of the least significant bit at [13:8] and the\n" |
4709 | "/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the\n" |
4710 | "/// length is interpreted as 64. If the sum of the index and length is\n" |
4711 | "/// greater than 64, the result is undefined. If the length and index are\n" |
4712 | "/// both zero, bits [63:0] of parameter \\a __x are extracted. If the length\n" |
4713 | "/// is zero but the index is non-zero, the result is undefined.\n" |
4714 | "/// \\returns A 128-bit vector whose lower 64 bits contain the bits extracted\n" |
4715 | "/// from the source operand.\n" |
4716 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
4717 | "_mm_extract_si64(__m128i __x, __m128i __y)\n" |
4718 | "{\n" |
4719 | " return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);\n" |
4720 | "}\n" |
4721 | "\n" |
4722 | "/// Inserts bits of a specified length from the source integer vector\n" |
4723 | "/// \\a y into the lower 64 bits of the destination integer vector \\a x at\n" |
4724 | "/// the index \\a idx and of the length \\a len.\n" |
4725 | "///\n" |
4726 | "/// \\headerfile <x86intrin.h>\n" |
4727 | "///\n" |
4728 | "/// \\code\n" |
4729 | "/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,\n" |
4730 | "/// const int idx);\n" |
4731 | "/// \\endcode\n" |
4732 | "///\n" |
4733 | "/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n" |
4734 | "///\n" |
4735 | "/// \\param x\n" |
4736 | "/// The destination operand where bits will be inserted. The inserted bits\n" |
4737 | "/// are defined by the length \\a len and by the index \\a idx specifying the\n" |
4738 | "/// least significant bit.\n" |
4739 | "/// \\param y\n" |
4740 | "/// The source operand containing the bits to be extracted. The extracted\n" |
4741 | "/// bits are the least significant bits of operand \\a y of length \\a len.\n" |
4742 | "/// \\param len\n" |
4743 | "/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n" |
4744 | "/// are zero, the length is interpreted as 64.\n" |
4745 | "/// \\param idx\n" |
4746 | "/// Bits [5:0] specify the index of the least significant bit; the other\n" |
4747 | "/// bits are ignored. If the sum of the index and length is greater than 64,\n" |
4748 | "/// the result is undefined. If the length and index are both zero, bits\n" |
4749 | "/// [63:0] of parameter \\a y are inserted into parameter \\a x. If the length\n" |
4750 | "/// is zero but the index is non-zero, the result is undefined.\n" |
4751 | "/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n" |
4752 | "/// destination operand \\a x with the specified bitfields replaced by the\n" |
4753 | "/// lower bits of source operand \\a y. The upper 64 bits of the return value\n" |
4754 | "/// are undefined.\n" |
4755 | "#define _mm_inserti_si64(x, y, len, idx) \\\n" |
4756 | " ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \\\n" |
4757 | " (__v2di)(__m128i)(y), \\\n" |
4758 | " (char)(len), (char)(idx)))\n" |
4759 | "\n" |
4760 | "/// Inserts bits of a specified length from the source integer vector\n" |
4761 | "/// \\a __y into the lower 64 bits of the destination integer vector \\a __x\n" |
4762 | "/// at the index and of the length specified by \\a __y.\n" |
4763 | "///\n" |
4764 | "/// \\headerfile <x86intrin.h>\n" |
4765 | "///\n" |
4766 | "/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n" |
4767 | "///\n" |
4768 | "/// \\param __x\n" |
4769 | "/// The destination operand where bits will be inserted. The inserted bits\n" |
4770 | "/// are defined by the length and by the index of the least significant bit\n" |
4771 | "/// specified by operand \\a __y.\n" |
4772 | "/// \\param __y\n" |
4773 | "/// The source operand containing the bits to be extracted. The extracted\n" |
4774 | "/// bits are the least significant bits of operand \\a __y with length\n" |
4775 | "/// specified by bits [69:64]. These are inserted into the destination at the\n" |
4776 | "/// index specified by bits [77:72]; all other bits are ignored. If bits\n" |
4777 | "/// [69:64] are zero, the length is interpreted as 64. If the sum of the\n" |
4778 | "/// index and length is greater than 64, the result is undefined. If the\n" |
4779 | "/// length and index are both zero, bits [63:0] of parameter \\a __y are\n" |
4780 | "/// inserted into parameter \\a __x. If the length is zero but the index is\n" |
4781 | "/// non-zero, the result is undefined.\n" |
4782 | "/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n" |
4783 | "/// destination operand \\a __x with the specified bitfields replaced by the\n" |
4784 | "/// lower bits of source operand \\a __y. The upper 64 bits of the return\n" |
4785 | "/// value are undefined.\n" |
4786 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
4787 | "_mm_insert_si64(__m128i __x, __m128i __y)\n" |
4788 | "{\n" |
4789 | " return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);\n" |
4790 | "}\n" |
4791 | "\n" |
4792 | "/// Stores a 64-bit double-precision value in a 64-bit memory location.\n" |
4793 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
4794 | "/// used again soon).\n" |
4795 | "///\n" |
4796 | "/// \\headerfile <x86intrin.h>\n" |
4797 | "///\n" |
4798 | "/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.\n" |
4799 | "///\n" |
4800 | "/// \\param __p\n" |
4801 | "/// The 64-bit memory location used to store the register value.\n" |
4802 | "/// \\param __a\n" |
4803 | "/// The 64-bit double-precision floating-point register value to be stored.\n" |
4804 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
4805 | "_mm_stream_sd(double *__p, __m128d __a)\n" |
4806 | "{\n" |
4807 | " __builtin_ia32_movntsd(__p, (__v2df)__a);\n" |
4808 | "}\n" |
4809 | "\n" |
4810 | "/// Stores a 32-bit single-precision floating-point value in a 32-bit\n" |
4811 | "/// memory location. To minimize caching, the data is flagged as\n" |
4812 | "/// non-temporal (unlikely to be used again soon).\n" |
4813 | "///\n" |
4814 | "/// \\headerfile <x86intrin.h>\n" |
4815 | "///\n" |
4816 | "/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.\n" |
4817 | "///\n" |
4818 | "/// \\param __p\n" |
4819 | "/// The 32-bit memory location used to store the register value.\n" |
4820 | "/// \\param __a\n" |
4821 | "/// The 32-bit single-precision floating-point register value to be stored.\n" |
4822 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
4823 | "_mm_stream_ss(float *__p, __m128 __a)\n" |
4824 | "{\n" |
4825 | " __builtin_ia32_movntss(__p, (__v4sf)__a);\n" |
4826 | "}\n" |
4827 | "\n" |
4828 | "#undef __DEFAULT_FN_ATTRS\n" |
4829 | "\n" |
4830 | "#endif /* __AMMINTRIN_H */\n" |
4831 | "" } , |
4832 | { "/builtins/arm64intr.h" , "/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===\n" |
4833 | " *\n" |
4834 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
4835 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
4836 | " * in the Software without restriction, including without limitation the rights\n" |
4837 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
4838 | " * copies of the Software, and to permit persons to whom the Software is\n" |
4839 | " * furnished to do so, subject to the following conditions:\n" |
4840 | " *\n" |
4841 | " * The above copyright notice and this permission notice shall be included in\n" |
4842 | " * all copies or substantial portions of the Software.\n" |
4843 | " *\n" |
4844 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
4845 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
4846 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
4847 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
4848 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
4849 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
4850 | " * THE SOFTWARE.\n" |
4851 | " *\n" |
4852 | " *===-----------------------------------------------------------------------===\n" |
4853 | " */\n" |
4854 | "\n" |
4855 | "/* Only include this if we're compiling for the windows platform. */\n" |
4856 | "#ifndef _MSC_VER\n" |
4857 | "#include_next <arm64intr.h>\n" |
4858 | "#else\n" |
4859 | "\n" |
4860 | "#ifndef __ARM64INTR_H\n" |
4861 | "#define __ARM64INTR_H\n" |
4862 | "\n" |
4863 | "typedef enum\n" |
4864 | "{\n" |
4865 | " _ARM64_BARRIER_SY = 0xF,\n" |
4866 | " _ARM64_BARRIER_ST = 0xE,\n" |
4867 | " _ARM64_BARRIER_LD = 0xD,\n" |
4868 | " _ARM64_BARRIER_ISH = 0xB,\n" |
4869 | " _ARM64_BARRIER_ISHST = 0xA,\n" |
4870 | " _ARM64_BARRIER_ISHLD = 0x9,\n" |
4871 | " _ARM64_BARRIER_NSH = 0x7,\n" |
4872 | " _ARM64_BARRIER_NSHST = 0x6,\n" |
4873 | " _ARM64_BARRIER_NSHLD = 0x5,\n" |
4874 | " _ARM64_BARRIER_OSH = 0x3,\n" |
4875 | " _ARM64_BARRIER_OSHST = 0x2,\n" |
4876 | " _ARM64_BARRIER_OSHLD = 0x1\n" |
4877 | "} _ARM64INTR_BARRIER_TYPE;\n" |
4878 | "\n" |
4879 | "#endif /* __ARM64INTR_H */\n" |
4880 | "#endif /* _MSC_VER */\n" |
4881 | "" } , |
4882 | { "/builtins/arm_acle.h" , "/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===\n" |
4883 | " *\n" |
4884 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
4885 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
4886 | " * in the Software without restriction, including without limitation the rights\n" |
4887 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
4888 | " * copies of the Software, and to permit persons to whom the Software is\n" |
4889 | " * furnished to do so, subject to the following conditions:\n" |
4890 | " *\n" |
4891 | " * The above copyright notice and this permission notice shall be included in\n" |
4892 | " * all copies or substantial portions of the Software.\n" |
4893 | " *\n" |
4894 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
4895 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
4896 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
4897 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
4898 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
4899 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
4900 | " * THE SOFTWARE.\n" |
4901 | " *\n" |
4902 | " *===-----------------------------------------------------------------------===\n" |
4903 | " */\n" |
4904 | "\n" |
4905 | "#ifndef __ARM_ACLE_H\n" |
4906 | "#define __ARM_ACLE_H\n" |
4907 | "\n" |
4908 | "#ifndef __ARM_ACLE\n" |
4909 | "#error \"ACLE intrinsics support not enabled.\"\n" |
4910 | "#endif\n" |
4911 | "\n" |
4912 | "#include <stdint.h>\n" |
4913 | "\n" |
4914 | "#if defined(__cplusplus)\n" |
4915 | "extern \"C\" {\n" |
4916 | "#endif\n" |
4917 | "\n" |
4918 | "/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */\n" |
4919 | "/* 8.3 Memory barriers */\n" |
4920 | "#if !defined(_MSC_VER)\n" |
4921 | "#define __dmb(i) __builtin_arm_dmb(i)\n" |
4922 | "#define __dsb(i) __builtin_arm_dsb(i)\n" |
4923 | "#define __isb(i) __builtin_arm_isb(i)\n" |
4924 | "#endif\n" |
4925 | "\n" |
4926 | "/* 8.4 Hints */\n" |
4927 | "\n" |
4928 | "#if !defined(_MSC_VER)\n" |
4929 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {\n" |
4930 | " __builtin_arm_wfi();\n" |
4931 | "}\n" |
4932 | "\n" |
4933 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {\n" |
4934 | " __builtin_arm_wfe();\n" |
4935 | "}\n" |
4936 | "\n" |
4937 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {\n" |
4938 | " __builtin_arm_sev();\n" |
4939 | "}\n" |
4940 | "\n" |
4941 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {\n" |
4942 | " __builtin_arm_sevl();\n" |
4943 | "}\n" |
4944 | "\n" |
4945 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {\n" |
4946 | " __builtin_arm_yield();\n" |
4947 | "}\n" |
4948 | "#endif\n" |
4949 | "\n" |
4950 | "#if __ARM_32BIT_STATE\n" |
4951 | "#define __dbg(t) __builtin_arm_dbg(t)\n" |
4952 | "#endif\n" |
4953 | "\n" |
4954 | "/* 8.5 Swap */\n" |
4955 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
4956 | "__swp(uint32_t __x, volatile uint32_t *__p) {\n" |
4957 | " uint32_t v;\n" |
4958 | " do\n" |
4959 | " v = __builtin_arm_ldrex(__p);\n" |
4960 | " while (__builtin_arm_strex(__x, __p));\n" |
4961 | " return v;\n" |
4962 | "}\n" |
4963 | "\n" |
4964 | "/* 8.6 Memory prefetch intrinsics */\n" |
4965 | "/* 8.6.1 Data prefetch */\n" |
4966 | "#define __pld(addr) __pldx(0, 0, 0, addr)\n" |
4967 | "\n" |
4968 | "#if __ARM_32BIT_STATE\n" |
4969 | "#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n" |
4970 | " __builtin_arm_prefetch(addr, access_kind, 1)\n" |
4971 | "#else\n" |
4972 | "#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n" |
4973 | " __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)\n" |
4974 | "#endif\n" |
4975 | "\n" |
4976 | "/* 8.6.2 Instruction prefetch */\n" |
4977 | "#define __pli(addr) __plix(0, 0, addr)\n" |
4978 | "\n" |
4979 | "#if __ARM_32BIT_STATE\n" |
4980 | "#define __plix(cache_level, retention_policy, addr) \\\n" |
4981 | " __builtin_arm_prefetch(addr, 0, 0)\n" |
4982 | "#else\n" |
4983 | "#define __plix(cache_level, retention_policy, addr) \\\n" |
4984 | " __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)\n" |
4985 | "#endif\n" |
4986 | "\n" |
4987 | "/* 8.7 NOP */\n" |
4988 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {\n" |
4989 | " __builtin_arm_nop();\n" |
4990 | "}\n" |
4991 | "\n" |
4992 | "/* 9 DATA-PROCESSING INTRINSICS */\n" |
4993 | "/* 9.2 Miscellaneous data-processing intrinsics */\n" |
4994 | "/* ROR */\n" |
4995 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
4996 | "__ror(uint32_t __x, uint32_t __y) {\n" |
4997 | " __y %= 32;\n" |
4998 | " if (__y == 0)\n" |
4999 | " return __x;\n" |
5000 | " return (__x >> __y) | (__x << (32 - __y));\n" |
5001 | "}\n" |
5002 | "\n" |
5003 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
5004 | "__rorll(uint64_t __x, uint32_t __y) {\n" |
5005 | " __y %= 64;\n" |
5006 | " if (__y == 0)\n" |
5007 | " return __x;\n" |
5008 | " return (__x >> __y) | (__x << (64 - __y));\n" |
5009 | "}\n" |
5010 | "\n" |
5011 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
5012 | "__rorl(unsigned long __x, uint32_t __y) {\n" |
5013 | "#if __SIZEOF_LONG__ == 4\n" |
5014 | " return __ror(__x, __y);\n" |
5015 | "#else\n" |
5016 | " return __rorll(__x, __y);\n" |
5017 | "#endif\n" |
5018 | "}\n" |
5019 | "\n" |
5020 | "\n" |
5021 | "/* CLZ */\n" |
5022 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5023 | "__clz(uint32_t __t) {\n" |
5024 | " return __builtin_clz(__t);\n" |
5025 | "}\n" |
5026 | "\n" |
5027 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
5028 | "__clzl(unsigned long __t) {\n" |
5029 | " return __builtin_clzl(__t);\n" |
5030 | "}\n" |
5031 | "\n" |
5032 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
5033 | "__clzll(uint64_t __t) {\n" |
5034 | " return __builtin_clzll(__t);\n" |
5035 | "}\n" |
5036 | "\n" |
5037 | "/* REV */\n" |
5038 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5039 | "__rev(uint32_t __t) {\n" |
5040 | " return __builtin_bswap32(__t);\n" |
5041 | "}\n" |
5042 | "\n" |
5043 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
5044 | "__revl(unsigned long __t) {\n" |
5045 | "#if __SIZEOF_LONG__ == 4\n" |
5046 | " return __builtin_bswap32(__t);\n" |
5047 | "#else\n" |
5048 | " return __builtin_bswap64(__t);\n" |
5049 | "#endif\n" |
5050 | "}\n" |
5051 | "\n" |
5052 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
5053 | "__revll(uint64_t __t) {\n" |
5054 | " return __builtin_bswap64(__t);\n" |
5055 | "}\n" |
5056 | "\n" |
5057 | "/* REV16 */\n" |
5058 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5059 | "__rev16(uint32_t __t) {\n" |
5060 | " return __ror(__rev(__t), 16);\n" |
5061 | "}\n" |
5062 | "\n" |
5063 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
5064 | "__rev16ll(uint64_t __t) {\n" |
5065 | " return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);\n" |
5066 | "}\n" |
5067 | "\n" |
5068 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
5069 | "__rev16l(unsigned long __t) {\n" |
5070 | "#if __SIZEOF_LONG__ == 4\n" |
5071 | " return __rev16(__t);\n" |
5072 | "#else\n" |
5073 | " return __rev16ll(__t);\n" |
5074 | "#endif\n" |
5075 | "}\n" |
5076 | "\n" |
5077 | "/* REVSH */\n" |
5078 | "static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))\n" |
5079 | "__revsh(int16_t __t) {\n" |
5080 | " return __builtin_bswap16(__t);\n" |
5081 | "}\n" |
5082 | "\n" |
5083 | "/* RBIT */\n" |
5084 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5085 | "__rbit(uint32_t __t) {\n" |
5086 | " return __builtin_arm_rbit(__t);\n" |
5087 | "}\n" |
5088 | "\n" |
5089 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
5090 | "__rbitll(uint64_t __t) {\n" |
5091 | "#if __ARM_32BIT_STATE\n" |
5092 | " return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |\n" |
5093 | " __builtin_arm_rbit(__t >> 32);\n" |
5094 | "#else\n" |
5095 | " return __builtin_arm_rbit64(__t);\n" |
5096 | "#endif\n" |
5097 | "}\n" |
5098 | "\n" |
5099 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
5100 | "__rbitl(unsigned long __t) {\n" |
5101 | "#if __SIZEOF_LONG__ == 4\n" |
5102 | " return __rbit(__t);\n" |
5103 | "#else\n" |
5104 | " return __rbitll(__t);\n" |
5105 | "#endif\n" |
5106 | "}\n" |
5107 | "\n" |
5108 | "/*\n" |
5109 | " * 9.3 16-bit multiplications\n" |
5110 | " */\n" |
5111 | "#if __ARM_FEATURE_DSP\n" |
5112 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
5113 | "__smulbb(int32_t __a, int32_t __b) {\n" |
5114 | " return __builtin_arm_smulbb(__a, __b);\n" |
5115 | "}\n" |
5116 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
5117 | "__smulbt(int32_t __a, int32_t __b) {\n" |
5118 | " return __builtin_arm_smulbt(__a, __b);\n" |
5119 | "}\n" |
5120 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
5121 | "__smultb(int32_t __a, int32_t __b) {\n" |
5122 | " return __builtin_arm_smultb(__a, __b);\n" |
5123 | "}\n" |
5124 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
5125 | "__smultt(int32_t __a, int32_t __b) {\n" |
5126 | " return __builtin_arm_smultt(__a, __b);\n" |
5127 | "}\n" |
5128 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
5129 | "__smulwb(int32_t __a, int32_t __b) {\n" |
5130 | " return __builtin_arm_smulwb(__a, __b);\n" |
5131 | "}\n" |
5132 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
5133 | "__smulwt(int32_t __a, int32_t __b) {\n" |
5134 | " return __builtin_arm_smulwt(__a, __b);\n" |
5135 | "}\n" |
5136 | "#endif\n" |
5137 | "\n" |
5138 | "/*\n" |
5139 | " * 9.4 Saturating intrinsics\n" |
5140 | " *\n" |
5141 | " * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag\n" |
5142 | " * intrinsics are implemented and the flag is enabled.\n" |
5143 | " */\n" |
5144 | "/* 9.4.1 Width-specified saturation intrinsics */\n" |
5145 | "#if __ARM_FEATURE_SAT\n" |
5146 | "#define __ssat(x, y) __builtin_arm_ssat(x, y)\n" |
5147 | "#define __usat(x, y) __builtin_arm_usat(x, y)\n" |
5148 | "#endif\n" |
5149 | "\n" |
5150 | "/* 9.4.2 Saturating addition and subtraction intrinsics */\n" |
5151 | "#if __ARM_FEATURE_DSP\n" |
5152 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5153 | "__qadd(int32_t __t, int32_t __v) {\n" |
5154 | " return __builtin_arm_qadd(__t, __v);\n" |
5155 | "}\n" |
5156 | "\n" |
5157 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5158 | "__qsub(int32_t __t, int32_t __v) {\n" |
5159 | " return __builtin_arm_qsub(__t, __v);\n" |
5160 | "}\n" |
5161 | "\n" |
5162 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5163 | "__qdbl(int32_t __t) {\n" |
5164 | " return __builtin_arm_qadd(__t, __t);\n" |
5165 | "}\n" |
5166 | "#endif\n" |
5167 | "\n" |
5168 | "/* 9.4.3 Accumultating multiplications */\n" |
5169 | "#if __ARM_FEATURE_DSP\n" |
5170 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5171 | "__smlabb(int32_t __a, int32_t __b, int32_t __c) {\n" |
5172 | " return __builtin_arm_smlabb(__a, __b, __c);\n" |
5173 | "}\n" |
5174 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5175 | "__smlabt(int32_t __a, int32_t __b, int32_t __c) {\n" |
5176 | " return __builtin_arm_smlabt(__a, __b, __c);\n" |
5177 | "}\n" |
5178 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5179 | "__smlatb(int32_t __a, int32_t __b, int32_t __c) {\n" |
5180 | " return __builtin_arm_smlatb(__a, __b, __c);\n" |
5181 | "}\n" |
5182 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5183 | "__smlatt(int32_t __a, int32_t __b, int32_t __c) {\n" |
5184 | " return __builtin_arm_smlatt(__a, __b, __c);\n" |
5185 | "}\n" |
5186 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5187 | "__smlawb(int32_t __a, int32_t __b, int32_t __c) {\n" |
5188 | " return __builtin_arm_smlawb(__a, __b, __c);\n" |
5189 | "}\n" |
5190 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5191 | "__smlawt(int32_t __a, int32_t __b, int32_t __c) {\n" |
5192 | " return __builtin_arm_smlawt(__a, __b, __c);\n" |
5193 | "}\n" |
5194 | "#endif\n" |
5195 | "\n" |
5196 | "\n" |
5197 | "/* 9.5.4 Parallel 16-bit saturation */\n" |
5198 | "#if __ARM_FEATURE_SIMD32\n" |
5199 | "#define __ssat16(x, y) __builtin_arm_ssat16(x, y)\n" |
5200 | "#define __usat16(x, y) __builtin_arm_usat16(x, y)\n" |
5201 | "#endif\n" |
5202 | "\n" |
5203 | "/* 9.5.5 Packing and unpacking */\n" |
5204 | "#if __ARM_FEATURE_SIMD32\n" |
5205 | "typedef int32_t int8x4_t;\n" |
5206 | "typedef int32_t int16x2_t;\n" |
5207 | "typedef uint32_t uint8x4_t;\n" |
5208 | "typedef uint32_t uint16x2_t;\n" |
5209 | "\n" |
5210 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5211 | "__sxtab16(int16x2_t __a, int8x4_t __b) {\n" |
5212 | " return __builtin_arm_sxtab16(__a, __b);\n" |
5213 | "}\n" |
5214 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5215 | "__sxtb16(int8x4_t __a) {\n" |
5216 | " return __builtin_arm_sxtb16(__a);\n" |
5217 | "}\n" |
5218 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5219 | "__uxtab16(int16x2_t __a, int8x4_t __b) {\n" |
5220 | " return __builtin_arm_uxtab16(__a, __b);\n" |
5221 | "}\n" |
5222 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5223 | "__uxtb16(int8x4_t __a) {\n" |
5224 | " return __builtin_arm_uxtb16(__a);\n" |
5225 | "}\n" |
5226 | "#endif\n" |
5227 | "\n" |
5228 | "/* 9.5.6 Parallel selection */\n" |
5229 | "#if __ARM_FEATURE_SIMD32\n" |
5230 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5231 | "__sel(uint8x4_t __a, uint8x4_t __b) {\n" |
5232 | " return __builtin_arm_sel(__a, __b);\n" |
5233 | "}\n" |
5234 | "#endif\n" |
5235 | "\n" |
5236 | "/* 9.5.7 Parallel 8-bit addition and subtraction */\n" |
5237 | "#if __ARM_FEATURE_SIMD32\n" |
5238 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5239 | "__qadd8(int8x4_t __a, int8x4_t __b) {\n" |
5240 | " return __builtin_arm_qadd8(__a, __b);\n" |
5241 | "}\n" |
5242 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5243 | "__qsub8(int8x4_t __a, int8x4_t __b) {\n" |
5244 | " return __builtin_arm_qsub8(__a, __b);\n" |
5245 | "}\n" |
5246 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5247 | "__sadd8(int8x4_t __a, int8x4_t __b) {\n" |
5248 | " return __builtin_arm_sadd8(__a, __b);\n" |
5249 | "}\n" |
5250 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5251 | "__shadd8(int8x4_t __a, int8x4_t __b) {\n" |
5252 | " return __builtin_arm_shadd8(__a, __b);\n" |
5253 | "}\n" |
5254 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5255 | "__shsub8(int8x4_t __a, int8x4_t __b) {\n" |
5256 | " return __builtin_arm_shsub8(__a, __b);\n" |
5257 | "}\n" |
5258 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5259 | "__ssub8(int8x4_t __a, int8x4_t __b) {\n" |
5260 | " return __builtin_arm_ssub8(__a, __b);\n" |
5261 | "}\n" |
5262 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5263 | "__uadd8(uint8x4_t __a, uint8x4_t __b) {\n" |
5264 | " return __builtin_arm_uadd8(__a, __b);\n" |
5265 | "}\n" |
5266 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5267 | "__uhadd8(uint8x4_t __a, uint8x4_t __b) {\n" |
5268 | " return __builtin_arm_uhadd8(__a, __b);\n" |
5269 | "}\n" |
5270 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5271 | "__uhsub8(uint8x4_t __a, uint8x4_t __b) {\n" |
5272 | " return __builtin_arm_uhsub8(__a, __b);\n" |
5273 | "}\n" |
5274 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5275 | "__uqadd8(uint8x4_t __a, uint8x4_t __b) {\n" |
5276 | " return __builtin_arm_uqadd8(__a, __b);\n" |
5277 | "}\n" |
5278 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5279 | "__uqsub8(uint8x4_t __a, uint8x4_t __b) {\n" |
5280 | " return __builtin_arm_uqsub8(__a, __b);\n" |
5281 | "}\n" |
5282 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
5283 | "__usub8(uint8x4_t __a, uint8x4_t __b) {\n" |
5284 | " return __builtin_arm_usub8(__a, __b);\n" |
5285 | "}\n" |
5286 | "#endif\n" |
5287 | "\n" |
5288 | "/* 9.5.8 Sum of 8-bit absolute differences */\n" |
5289 | "#if __ARM_FEATURE_SIMD32\n" |
5290 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5291 | "__usad8(uint8x4_t __a, uint8x4_t __b) {\n" |
5292 | " return __builtin_arm_usad8(__a, __b);\n" |
5293 | "}\n" |
5294 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5295 | "__usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {\n" |
5296 | " return __builtin_arm_usada8(__a, __b, __c);\n" |
5297 | "}\n" |
5298 | "#endif\n" |
5299 | "\n" |
5300 | "/* 9.5.9 Parallel 16-bit addition and subtraction */\n" |
5301 | "#if __ARM_FEATURE_SIMD32\n" |
5302 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5303 | "__qadd16(int16x2_t __a, int16x2_t __b) {\n" |
5304 | " return __builtin_arm_qadd16(__a, __b);\n" |
5305 | "}\n" |
5306 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5307 | "__qasx(int16x2_t __a, int16x2_t __b) {\n" |
5308 | " return __builtin_arm_qasx(__a, __b);\n" |
5309 | "}\n" |
5310 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5311 | "__qsax(int16x2_t __a, int16x2_t __b) {\n" |
5312 | " return __builtin_arm_qsax(__a, __b);\n" |
5313 | "}\n" |
5314 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5315 | "__qsub16(int16x2_t __a, int16x2_t __b) {\n" |
5316 | " return __builtin_arm_qsub16(__a, __b);\n" |
5317 | "}\n" |
5318 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5319 | "__sadd16(int16x2_t __a, int16x2_t __b) {\n" |
5320 | " return __builtin_arm_sadd16(__a, __b);\n" |
5321 | "}\n" |
5322 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5323 | "__sasx(int16x2_t __a, int16x2_t __b) {\n" |
5324 | " return __builtin_arm_sasx(__a, __b);\n" |
5325 | "}\n" |
5326 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5327 | "__shadd16(int16x2_t __a, int16x2_t __b) {\n" |
5328 | " return __builtin_arm_shadd16(__a, __b);\n" |
5329 | "}\n" |
5330 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5331 | "__shasx(int16x2_t __a, int16x2_t __b) {\n" |
5332 | " return __builtin_arm_shasx(__a, __b);\n" |
5333 | "}\n" |
5334 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5335 | "__shsax(int16x2_t __a, int16x2_t __b) {\n" |
5336 | " return __builtin_arm_shsax(__a, __b);\n" |
5337 | "}\n" |
5338 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5339 | "__shsub16(int16x2_t __a, int16x2_t __b) {\n" |
5340 | " return __builtin_arm_shsub16(__a, __b);\n" |
5341 | "}\n" |
5342 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5343 | "__ssax(int16x2_t __a, int16x2_t __b) {\n" |
5344 | " return __builtin_arm_ssax(__a, __b);\n" |
5345 | "}\n" |
5346 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5347 | "__ssub16(int16x2_t __a, int16x2_t __b) {\n" |
5348 | " return __builtin_arm_ssub16(__a, __b);\n" |
5349 | "}\n" |
5350 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5351 | "__uadd16(uint16x2_t __a, uint16x2_t __b) {\n" |
5352 | " return __builtin_arm_uadd16(__a, __b);\n" |
5353 | "}\n" |
5354 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5355 | "__uasx(uint16x2_t __a, uint16x2_t __b) {\n" |
5356 | " return __builtin_arm_uasx(__a, __b);\n" |
5357 | "}\n" |
5358 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5359 | "__uhadd16(uint16x2_t __a, uint16x2_t __b) {\n" |
5360 | " return __builtin_arm_uhadd16(__a, __b);\n" |
5361 | "}\n" |
5362 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5363 | "__uhasx(uint16x2_t __a, uint16x2_t __b) {\n" |
5364 | " return __builtin_arm_uhasx(__a, __b);\n" |
5365 | "}\n" |
5366 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5367 | "__uhsax(uint16x2_t __a, uint16x2_t __b) {\n" |
5368 | " return __builtin_arm_uhsax(__a, __b);\n" |
5369 | "}\n" |
5370 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5371 | "__uhsub16(uint16x2_t __a, uint16x2_t __b) {\n" |
5372 | " return __builtin_arm_uhsub16(__a, __b);\n" |
5373 | "}\n" |
5374 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5375 | "__uqadd16(uint16x2_t __a, uint16x2_t __b) {\n" |
5376 | " return __builtin_arm_uqadd16(__a, __b);\n" |
5377 | "}\n" |
5378 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5379 | "__uqasx(uint16x2_t __a, uint16x2_t __b) {\n" |
5380 | " return __builtin_arm_uqasx(__a, __b);\n" |
5381 | "}\n" |
5382 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5383 | "__uqsax(uint16x2_t __a, uint16x2_t __b) {\n" |
5384 | " return __builtin_arm_uqsax(__a, __b);\n" |
5385 | "}\n" |
5386 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5387 | "__uqsub16(uint16x2_t __a, uint16x2_t __b) {\n" |
5388 | " return __builtin_arm_uqsub16(__a, __b);\n" |
5389 | "}\n" |
5390 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5391 | "__usax(uint16x2_t __a, uint16x2_t __b) {\n" |
5392 | " return __builtin_arm_usax(__a, __b);\n" |
5393 | "}\n" |
5394 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
5395 | "__usub16(uint16x2_t __a, uint16x2_t __b) {\n" |
5396 | " return __builtin_arm_usub16(__a, __b);\n" |
5397 | "}\n" |
5398 | "#endif\n" |
5399 | "\n" |
5400 | "/* 9.5.10 Parallel 16-bit multiplications */\n" |
5401 | "#if __ARM_FEATURE_SIMD32\n" |
5402 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5403 | "__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {\n" |
5404 | " return __builtin_arm_smlad(__a, __b, __c);\n" |
5405 | "}\n" |
5406 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5407 | "__smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n" |
5408 | " return __builtin_arm_smladx(__a, __b, __c);\n" |
5409 | "}\n" |
5410 | "static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n" |
5411 | "__smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {\n" |
5412 | " return __builtin_arm_smlald(__a, __b, __c);\n" |
5413 | "}\n" |
5414 | "static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n" |
5415 | "__smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n" |
5416 | " return __builtin_arm_smlaldx(__a, __b, __c);\n" |
5417 | "}\n" |
5418 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5419 | "__smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {\n" |
5420 | " return __builtin_arm_smlsd(__a, __b, __c);\n" |
5421 | "}\n" |
5422 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5423 | "__smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n" |
5424 | " return __builtin_arm_smlsdx(__a, __b, __c);\n" |
5425 | "}\n" |
5426 | "static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n" |
5427 | "__smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {\n" |
5428 | " return __builtin_arm_smlsld(__a, __b, __c);\n" |
5429 | "}\n" |
5430 | "static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n" |
5431 | "__smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n" |
5432 | " return __builtin_arm_smlsldx(__a, __b, __c);\n" |
5433 | "}\n" |
5434 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5435 | "__smuad(int16x2_t __a, int16x2_t __b) {\n" |
5436 | " return __builtin_arm_smuad(__a, __b);\n" |
5437 | "}\n" |
5438 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5439 | "__smuadx(int16x2_t __a, int16x2_t __b) {\n" |
5440 | " return __builtin_arm_smuadx(__a, __b);\n" |
5441 | "}\n" |
5442 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5443 | "__smusd(int16x2_t __a, int16x2_t __b) {\n" |
5444 | " return __builtin_arm_smusd(__a, __b);\n" |
5445 | "}\n" |
5446 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
5447 | "__smusdx(int16x2_t __a, int16x2_t __b) {\n" |
5448 | " return __builtin_arm_smusdx(__a, __b);\n" |
5449 | "}\n" |
5450 | "#endif\n" |
5451 | "\n" |
5452 | "/* 9.7 CRC32 intrinsics */\n" |
5453 | "#if __ARM_FEATURE_CRC32\n" |
5454 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5455 | "__crc32b(uint32_t __a, uint8_t __b) {\n" |
5456 | " return __builtin_arm_crc32b(__a, __b);\n" |
5457 | "}\n" |
5458 | "\n" |
5459 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5460 | "__crc32h(uint32_t __a, uint16_t __b) {\n" |
5461 | " return __builtin_arm_crc32h(__a, __b);\n" |
5462 | "}\n" |
5463 | "\n" |
5464 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5465 | "__crc32w(uint32_t __a, uint32_t __b) {\n" |
5466 | " return __builtin_arm_crc32w(__a, __b);\n" |
5467 | "}\n" |
5468 | "\n" |
5469 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5470 | "__crc32d(uint32_t __a, uint64_t __b) {\n" |
5471 | " return __builtin_arm_crc32d(__a, __b);\n" |
5472 | "}\n" |
5473 | "\n" |
5474 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5475 | "__crc32cb(uint32_t __a, uint8_t __b) {\n" |
5476 | " return __builtin_arm_crc32cb(__a, __b);\n" |
5477 | "}\n" |
5478 | "\n" |
5479 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5480 | "__crc32ch(uint32_t __a, uint16_t __b) {\n" |
5481 | " return __builtin_arm_crc32ch(__a, __b);\n" |
5482 | "}\n" |
5483 | "\n" |
5484 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5485 | "__crc32cw(uint32_t __a, uint32_t __b) {\n" |
5486 | " return __builtin_arm_crc32cw(__a, __b);\n" |
5487 | "}\n" |
5488 | "\n" |
5489 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
5490 | "__crc32cd(uint32_t __a, uint64_t __b) {\n" |
5491 | " return __builtin_arm_crc32cd(__a, __b);\n" |
5492 | "}\n" |
5493 | "#endif\n" |
5494 | "\n" |
5495 | "/* 10.1 Special register intrinsics */\n" |
5496 | "#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)\n" |
5497 | "#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)\n" |
5498 | "#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)\n" |
5499 | "#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)\n" |
5500 | "#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)\n" |
5501 | "#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)\n" |
5502 | "\n" |
5503 | "#if defined(__cplusplus)\n" |
5504 | "}\n" |
5505 | "#endif\n" |
5506 | "\n" |
5507 | "#endif /* __ARM_ACLE_H */\n" |
5508 | "" } , |
5509 | { "/builtins/arm_fp16.h" , "/*===---- arm_fp16.h - ARM FP16 intrinsics ---------------------------------===\n" |
5510 | " *\n" |
5511 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
5512 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
5513 | " * in the Software without restriction, including without limitation the rights\n" |
5514 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
5515 | " * copies of the Software, and to permit persons to whom the Software is\n" |
5516 | " * furnished to do so, subject to the following conditions:\n" |
5517 | " *\n" |
5518 | " * The above copyright notice and this permission notice shall be included in\n" |
5519 | " * all copies or substantial portions of the Software.\n" |
5520 | " *\n" |
5521 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
5522 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
5523 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
5524 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
5525 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
5526 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
5527 | " * THE SOFTWARE.\n" |
5528 | " *\n" |
5529 | " *===-----------------------------------------------------------------------===\n" |
5530 | " */\n" |
5531 | "\n" |
5532 | "#ifndef __ARM_FP16_H\n" |
5533 | "#define __ARM_FP16_H\n" |
5534 | "\n" |
5535 | "#include <stdint.h>\n" |
5536 | "\n" |
5537 | "typedef __fp16 float16_t;\n" |
5538 | "#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))\n" |
5539 | "\n" |
5540 | "#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)\n" |
5541 | "#ifdef __LITTLE_ENDIAN__\n" |
5542 | "#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n" |
5543 | " float16_t __s0 = __p0; \\\n" |
5544 | " float16_t __s1 = __p1; \\\n" |
5545 | " float16_t __ret; \\\n" |
5546 | " __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n" |
5547 | " __ret; \\\n" |
5548 | "})\n" |
5549 | "#else\n" |
5550 | "#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n" |
5551 | " float16_t __s0 = __p0; \\\n" |
5552 | " float16_t __s1 = __p1; \\\n" |
5553 | " float16_t __ret; \\\n" |
5554 | " __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n" |
5555 | " __ret; \\\n" |
5556 | "})\n" |
5557 | "#endif\n" |
5558 | "\n" |
5559 | "#ifdef __LITTLE_ENDIAN__\n" |
5560 | "#define vabsh_f16(__p0) __extension__ ({ \\\n" |
5561 | " float16_t __s0 = __p0; \\\n" |
5562 | " float16_t __ret; \\\n" |
5563 | " __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n" |
5564 | " __ret; \\\n" |
5565 | "})\n" |
5566 | "#else\n" |
5567 | "#define vabsh_f16(__p0) __extension__ ({ \\\n" |
5568 | " float16_t __s0 = __p0; \\\n" |
5569 | " float16_t __ret; \\\n" |
5570 | " __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n" |
5571 | " __ret; \\\n" |
5572 | "})\n" |
5573 | "#endif\n" |
5574 | "\n" |
5575 | "#ifdef __LITTLE_ENDIAN__\n" |
5576 | "#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n" |
5577 | " float16_t __s0 = __p0; \\\n" |
5578 | " float16_t __s1 = __p1; \\\n" |
5579 | " float16_t __ret; \\\n" |
5580 | " __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n" |
5581 | " __ret; \\\n" |
5582 | "})\n" |
5583 | "#else\n" |
5584 | "#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n" |
5585 | " float16_t __s0 = __p0; \\\n" |
5586 | " float16_t __s1 = __p1; \\\n" |
5587 | " float16_t __ret; \\\n" |
5588 | " __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n" |
5589 | " __ret; \\\n" |
5590 | "})\n" |
5591 | "#endif\n" |
5592 | "\n" |
5593 | "#ifdef __LITTLE_ENDIAN__\n" |
5594 | "#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n" |
5595 | " float16_t __s0 = __p0; \\\n" |
5596 | " float16_t __s1 = __p1; \\\n" |
5597 | " uint16_t __ret; \\\n" |
5598 | " __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n" |
5599 | " __ret; \\\n" |
5600 | "})\n" |
5601 | "#else\n" |
5602 | "#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n" |
5603 | " float16_t __s0 = __p0; \\\n" |
5604 | " float16_t __s1 = __p1; \\\n" |
5605 | " uint16_t __ret; \\\n" |
5606 | " __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n" |
5607 | " __ret; \\\n" |
5608 | "})\n" |
5609 | "#endif\n" |
5610 | "\n" |
5611 | "#ifdef __LITTLE_ENDIAN__\n" |
5612 | "#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n" |
5613 | " float16_t __s0 = __p0; \\\n" |
5614 | " float16_t __s1 = __p1; \\\n" |
5615 | " uint16_t __ret; \\\n" |
5616 | " __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n" |
5617 | " __ret; \\\n" |
5618 | "})\n" |
5619 | "#else\n" |
5620 | "#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n" |
5621 | " float16_t __s0 = __p0; \\\n" |
5622 | " float16_t __s1 = __p1; \\\n" |
5623 | " uint16_t __ret; \\\n" |
5624 | " __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n" |
5625 | " __ret; \\\n" |
5626 | "})\n" |
5627 | "#endif\n" |
5628 | "\n" |
5629 | "#ifdef __LITTLE_ENDIAN__\n" |
5630 | "#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n" |
5631 | " float16_t __s0 = __p0; \\\n" |
5632 | " float16_t __s1 = __p1; \\\n" |
5633 | " uint16_t __ret; \\\n" |
5634 | " __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n" |
5635 | " __ret; \\\n" |
5636 | "})\n" |
5637 | "#else\n" |
5638 | "#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n" |
5639 | " float16_t __s0 = __p0; \\\n" |
5640 | " float16_t __s1 = __p1; \\\n" |
5641 | " uint16_t __ret; \\\n" |
5642 | " __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n" |
5643 | " __ret; \\\n" |
5644 | "})\n" |
5645 | "#endif\n" |
5646 | "\n" |
5647 | "#ifdef __LITTLE_ENDIAN__\n" |
5648 | "#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n" |
5649 | " float16_t __s0 = __p0; \\\n" |
5650 | " float16_t __s1 = __p1; \\\n" |
5651 | " uint16_t __ret; \\\n" |
5652 | " __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n" |
5653 | " __ret; \\\n" |
5654 | "})\n" |
5655 | "#else\n" |
5656 | "#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n" |
5657 | " float16_t __s0 = __p0; \\\n" |
5658 | " float16_t __s1 = __p1; \\\n" |
5659 | " uint16_t __ret; \\\n" |
5660 | " __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n" |
5661 | " __ret; \\\n" |
5662 | "})\n" |
5663 | "#endif\n" |
5664 | "\n" |
5665 | "#ifdef __LITTLE_ENDIAN__\n" |
5666 | "#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n" |
5667 | " float16_t __s0 = __p0; \\\n" |
5668 | " float16_t __s1 = __p1; \\\n" |
5669 | " uint16_t __ret; \\\n" |
5670 | " __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n" |
5671 | " __ret; \\\n" |
5672 | "})\n" |
5673 | "#else\n" |
5674 | "#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n" |
5675 | " float16_t __s0 = __p0; \\\n" |
5676 | " float16_t __s1 = __p1; \\\n" |
5677 | " uint16_t __ret; \\\n" |
5678 | " __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n" |
5679 | " __ret; \\\n" |
5680 | "})\n" |
5681 | "#endif\n" |
5682 | "\n" |
5683 | "#ifdef __LITTLE_ENDIAN__\n" |
5684 | "#define vceqzh_f16(__p0) __extension__ ({ \\\n" |
5685 | " float16_t __s0 = __p0; \\\n" |
5686 | " uint16_t __ret; \\\n" |
5687 | " __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n" |
5688 | " __ret; \\\n" |
5689 | "})\n" |
5690 | "#else\n" |
5691 | "#define vceqzh_f16(__p0) __extension__ ({ \\\n" |
5692 | " float16_t __s0 = __p0; \\\n" |
5693 | " uint16_t __ret; \\\n" |
5694 | " __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n" |
5695 | " __ret; \\\n" |
5696 | "})\n" |
5697 | "#endif\n" |
5698 | "\n" |
5699 | "#ifdef __LITTLE_ENDIAN__\n" |
5700 | "#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n" |
5701 | " float16_t __s0 = __p0; \\\n" |
5702 | " float16_t __s1 = __p1; \\\n" |
5703 | " uint16_t __ret; \\\n" |
5704 | " __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n" |
5705 | " __ret; \\\n" |
5706 | "})\n" |
5707 | "#else\n" |
5708 | "#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n" |
5709 | " float16_t __s0 = __p0; \\\n" |
5710 | " float16_t __s1 = __p1; \\\n" |
5711 | " uint16_t __ret; \\\n" |
5712 | " __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n" |
5713 | " __ret; \\\n" |
5714 | "})\n" |
5715 | "#endif\n" |
5716 | "\n" |
5717 | "#ifdef __LITTLE_ENDIAN__\n" |
5718 | "#define vcgezh_f16(__p0) __extension__ ({ \\\n" |
5719 | " float16_t __s0 = __p0; \\\n" |
5720 | " uint16_t __ret; \\\n" |
5721 | " __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n" |
5722 | " __ret; \\\n" |
5723 | "})\n" |
5724 | "#else\n" |
5725 | "#define vcgezh_f16(__p0) __extension__ ({ \\\n" |
5726 | " float16_t __s0 = __p0; \\\n" |
5727 | " uint16_t __ret; \\\n" |
5728 | " __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n" |
5729 | " __ret; \\\n" |
5730 | "})\n" |
5731 | "#endif\n" |
5732 | "\n" |
5733 | "#ifdef __LITTLE_ENDIAN__\n" |
5734 | "#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n" |
5735 | " float16_t __s0 = __p0; \\\n" |
5736 | " float16_t __s1 = __p1; \\\n" |
5737 | " uint16_t __ret; \\\n" |
5738 | " __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n" |
5739 | " __ret; \\\n" |
5740 | "})\n" |
5741 | "#else\n" |
5742 | "#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n" |
5743 | " float16_t __s0 = __p0; \\\n" |
5744 | " float16_t __s1 = __p1; \\\n" |
5745 | " uint16_t __ret; \\\n" |
5746 | " __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n" |
5747 | " __ret; \\\n" |
5748 | "})\n" |
5749 | "#endif\n" |
5750 | "\n" |
5751 | "#ifdef __LITTLE_ENDIAN__\n" |
5752 | "#define vcgtzh_f16(__p0) __extension__ ({ \\\n" |
5753 | " float16_t __s0 = __p0; \\\n" |
5754 | " uint16_t __ret; \\\n" |
5755 | " __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n" |
5756 | " __ret; \\\n" |
5757 | "})\n" |
5758 | "#else\n" |
5759 | "#define vcgtzh_f16(__p0) __extension__ ({ \\\n" |
5760 | " float16_t __s0 = __p0; \\\n" |
5761 | " uint16_t __ret; \\\n" |
5762 | " __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n" |
5763 | " __ret; \\\n" |
5764 | "})\n" |
5765 | "#endif\n" |
5766 | "\n" |
5767 | "#ifdef __LITTLE_ENDIAN__\n" |
5768 | "#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n" |
5769 | " float16_t __s0 = __p0; \\\n" |
5770 | " float16_t __s1 = __p1; \\\n" |
5771 | " uint16_t __ret; \\\n" |
5772 | " __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n" |
5773 | " __ret; \\\n" |
5774 | "})\n" |
5775 | "#else\n" |
5776 | "#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n" |
5777 | " float16_t __s0 = __p0; \\\n" |
5778 | " float16_t __s1 = __p1; \\\n" |
5779 | " uint16_t __ret; \\\n" |
5780 | " __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n" |
5781 | " __ret; \\\n" |
5782 | "})\n" |
5783 | "#endif\n" |
5784 | "\n" |
5785 | "#ifdef __LITTLE_ENDIAN__\n" |
5786 | "#define vclezh_f16(__p0) __extension__ ({ \\\n" |
5787 | " float16_t __s0 = __p0; \\\n" |
5788 | " uint16_t __ret; \\\n" |
5789 | " __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n" |
5790 | " __ret; \\\n" |
5791 | "})\n" |
5792 | "#else\n" |
5793 | "#define vclezh_f16(__p0) __extension__ ({ \\\n" |
5794 | " float16_t __s0 = __p0; \\\n" |
5795 | " uint16_t __ret; \\\n" |
5796 | " __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n" |
5797 | " __ret; \\\n" |
5798 | "})\n" |
5799 | "#endif\n" |
5800 | "\n" |
5801 | "#ifdef __LITTLE_ENDIAN__\n" |
5802 | "#define vclth_f16(__p0, __p1) __extension__ ({ \\\n" |
5803 | " float16_t __s0 = __p0; \\\n" |
5804 | " float16_t __s1 = __p1; \\\n" |
5805 | " uint16_t __ret; \\\n" |
5806 | " __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n" |
5807 | " __ret; \\\n" |
5808 | "})\n" |
5809 | "#else\n" |
5810 | "#define vclth_f16(__p0, __p1) __extension__ ({ \\\n" |
5811 | " float16_t __s0 = __p0; \\\n" |
5812 | " float16_t __s1 = __p1; \\\n" |
5813 | " uint16_t __ret; \\\n" |
5814 | " __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n" |
5815 | " __ret; \\\n" |
5816 | "})\n" |
5817 | "#endif\n" |
5818 | "\n" |
5819 | "#ifdef __LITTLE_ENDIAN__\n" |
5820 | "#define vcltzh_f16(__p0) __extension__ ({ \\\n" |
5821 | " float16_t __s0 = __p0; \\\n" |
5822 | " uint16_t __ret; \\\n" |
5823 | " __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n" |
5824 | " __ret; \\\n" |
5825 | "})\n" |
5826 | "#else\n" |
5827 | "#define vcltzh_f16(__p0) __extension__ ({ \\\n" |
5828 | " float16_t __s0 = __p0; \\\n" |
5829 | " uint16_t __ret; \\\n" |
5830 | " __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n" |
5831 | " __ret; \\\n" |
5832 | "})\n" |
5833 | "#endif\n" |
5834 | "\n" |
5835 | "#ifdef __LITTLE_ENDIAN__\n" |
5836 | "#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n" |
5837 | " float16_t __s0 = __p0; \\\n" |
5838 | " int16_t __ret; \\\n" |
5839 | " __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n" |
5840 | " __ret; \\\n" |
5841 | "})\n" |
5842 | "#else\n" |
5843 | "#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n" |
5844 | " float16_t __s0 = __p0; \\\n" |
5845 | " int16_t __ret; \\\n" |
5846 | " __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n" |
5847 | " __ret; \\\n" |
5848 | "})\n" |
5849 | "#endif\n" |
5850 | "\n" |
5851 | "#ifdef __LITTLE_ENDIAN__\n" |
5852 | "#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n" |
5853 | " float16_t __s0 = __p0; \\\n" |
5854 | " int32_t __ret; \\\n" |
5855 | " __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n" |
5856 | " __ret; \\\n" |
5857 | "})\n" |
5858 | "#else\n" |
5859 | "#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n" |
5860 | " float16_t __s0 = __p0; \\\n" |
5861 | " int32_t __ret; \\\n" |
5862 | " __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n" |
5863 | " __ret; \\\n" |
5864 | "})\n" |
5865 | "#endif\n" |
5866 | "\n" |
5867 | "#ifdef __LITTLE_ENDIAN__\n" |
5868 | "#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n" |
5869 | " float16_t __s0 = __p0; \\\n" |
5870 | " int64_t __ret; \\\n" |
5871 | " __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n" |
5872 | " __ret; \\\n" |
5873 | "})\n" |
5874 | "#else\n" |
5875 | "#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n" |
5876 | " float16_t __s0 = __p0; \\\n" |
5877 | " int64_t __ret; \\\n" |
5878 | " __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n" |
5879 | " __ret; \\\n" |
5880 | "})\n" |
5881 | "#endif\n" |
5882 | "\n" |
5883 | "#ifdef __LITTLE_ENDIAN__\n" |
5884 | "#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n" |
5885 | " float16_t __s0 = __p0; \\\n" |
5886 | " uint16_t __ret; \\\n" |
5887 | " __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n" |
5888 | " __ret; \\\n" |
5889 | "})\n" |
5890 | "#else\n" |
5891 | "#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n" |
5892 | " float16_t __s0 = __p0; \\\n" |
5893 | " uint16_t __ret; \\\n" |
5894 | " __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n" |
5895 | " __ret; \\\n" |
5896 | "})\n" |
5897 | "#endif\n" |
5898 | "\n" |
5899 | "#ifdef __LITTLE_ENDIAN__\n" |
5900 | "#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n" |
5901 | " float16_t __s0 = __p0; \\\n" |
5902 | " uint32_t __ret; \\\n" |
5903 | " __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n" |
5904 | " __ret; \\\n" |
5905 | "})\n" |
5906 | "#else\n" |
5907 | "#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n" |
5908 | " float16_t __s0 = __p0; \\\n" |
5909 | " uint32_t __ret; \\\n" |
5910 | " __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n" |
5911 | " __ret; \\\n" |
5912 | "})\n" |
5913 | "#endif\n" |
5914 | "\n" |
5915 | "#ifdef __LITTLE_ENDIAN__\n" |
5916 | "#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n" |
5917 | " float16_t __s0 = __p0; \\\n" |
5918 | " uint64_t __ret; \\\n" |
5919 | " __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n" |
5920 | " __ret; \\\n" |
5921 | "})\n" |
5922 | "#else\n" |
5923 | "#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n" |
5924 | " float16_t __s0 = __p0; \\\n" |
5925 | " uint64_t __ret; \\\n" |
5926 | " __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n" |
5927 | " __ret; \\\n" |
5928 | "})\n" |
5929 | "#endif\n" |
5930 | "\n" |
5931 | "#ifdef __LITTLE_ENDIAN__\n" |
5932 | "#define vcvth_s16_f16(__p0) __extension__ ({ \\\n" |
5933 | " float16_t __s0 = __p0; \\\n" |
5934 | " int16_t __ret; \\\n" |
5935 | " __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n" |
5936 | " __ret; \\\n" |
5937 | "})\n" |
5938 | "#else\n" |
5939 | "#define vcvth_s16_f16(__p0) __extension__ ({ \\\n" |
5940 | " float16_t __s0 = __p0; \\\n" |
5941 | " int16_t __ret; \\\n" |
5942 | " __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n" |
5943 | " __ret; \\\n" |
5944 | "})\n" |
5945 | "#endif\n" |
5946 | "\n" |
5947 | "#ifdef __LITTLE_ENDIAN__\n" |
5948 | "#define vcvth_s32_f16(__p0) __extension__ ({ \\\n" |
5949 | " float16_t __s0 = __p0; \\\n" |
5950 | " int32_t __ret; \\\n" |
5951 | " __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n" |
5952 | " __ret; \\\n" |
5953 | "})\n" |
5954 | "#else\n" |
5955 | "#define vcvth_s32_f16(__p0) __extension__ ({ \\\n" |
5956 | " float16_t __s0 = __p0; \\\n" |
5957 | " int32_t __ret; \\\n" |
5958 | " __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n" |
5959 | " __ret; \\\n" |
5960 | "})\n" |
5961 | "#endif\n" |
5962 | "\n" |
5963 | "#ifdef __LITTLE_ENDIAN__\n" |
5964 | "#define vcvth_s64_f16(__p0) __extension__ ({ \\\n" |
5965 | " float16_t __s0 = __p0; \\\n" |
5966 | " int64_t __ret; \\\n" |
5967 | " __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n" |
5968 | " __ret; \\\n" |
5969 | "})\n" |
5970 | "#else\n" |
5971 | "#define vcvth_s64_f16(__p0) __extension__ ({ \\\n" |
5972 | " float16_t __s0 = __p0; \\\n" |
5973 | " int64_t __ret; \\\n" |
5974 | " __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n" |
5975 | " __ret; \\\n" |
5976 | "})\n" |
5977 | "#endif\n" |
5978 | "\n" |
5979 | "#ifdef __LITTLE_ENDIAN__\n" |
5980 | "#define vcvth_u16_f16(__p0) __extension__ ({ \\\n" |
5981 | " float16_t __s0 = __p0; \\\n" |
5982 | " uint16_t __ret; \\\n" |
5983 | " __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n" |
5984 | " __ret; \\\n" |
5985 | "})\n" |
5986 | "#else\n" |
5987 | "#define vcvth_u16_f16(__p0) __extension__ ({ \\\n" |
5988 | " float16_t __s0 = __p0; \\\n" |
5989 | " uint16_t __ret; \\\n" |
5990 | " __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n" |
5991 | " __ret; \\\n" |
5992 | "})\n" |
5993 | "#endif\n" |
5994 | "\n" |
5995 | "#ifdef __LITTLE_ENDIAN__\n" |
5996 | "#define vcvth_u32_f16(__p0) __extension__ ({ \\\n" |
5997 | " float16_t __s0 = __p0; \\\n" |
5998 | " uint32_t __ret; \\\n" |
5999 | " __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n" |
6000 | " __ret; \\\n" |
6001 | "})\n" |
6002 | "#else\n" |
6003 | "#define vcvth_u32_f16(__p0) __extension__ ({ \\\n" |
6004 | " float16_t __s0 = __p0; \\\n" |
6005 | " uint32_t __ret; \\\n" |
6006 | " __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n" |
6007 | " __ret; \\\n" |
6008 | "})\n" |
6009 | "#endif\n" |
6010 | "\n" |
6011 | "#ifdef __LITTLE_ENDIAN__\n" |
6012 | "#define vcvth_u64_f16(__p0) __extension__ ({ \\\n" |
6013 | " float16_t __s0 = __p0; \\\n" |
6014 | " uint64_t __ret; \\\n" |
6015 | " __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n" |
6016 | " __ret; \\\n" |
6017 | "})\n" |
6018 | "#else\n" |
6019 | "#define vcvth_u64_f16(__p0) __extension__ ({ \\\n" |
6020 | " float16_t __s0 = __p0; \\\n" |
6021 | " uint64_t __ret; \\\n" |
6022 | " __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n" |
6023 | " __ret; \\\n" |
6024 | "})\n" |
6025 | "#endif\n" |
6026 | "\n" |
6027 | "#ifdef __LITTLE_ENDIAN__\n" |
6028 | "#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n" |
6029 | " float16_t __s0 = __p0; \\\n" |
6030 | " int16_t __ret; \\\n" |
6031 | " __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n" |
6032 | " __ret; \\\n" |
6033 | "})\n" |
6034 | "#else\n" |
6035 | "#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n" |
6036 | " float16_t __s0 = __p0; \\\n" |
6037 | " int16_t __ret; \\\n" |
6038 | " __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n" |
6039 | " __ret; \\\n" |
6040 | "})\n" |
6041 | "#endif\n" |
6042 | "\n" |
6043 | "#ifdef __LITTLE_ENDIAN__\n" |
6044 | "#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n" |
6045 | " float16_t __s0 = __p0; \\\n" |
6046 | " int32_t __ret; \\\n" |
6047 | " __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n" |
6048 | " __ret; \\\n" |
6049 | "})\n" |
6050 | "#else\n" |
6051 | "#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n" |
6052 | " float16_t __s0 = __p0; \\\n" |
6053 | " int32_t __ret; \\\n" |
6054 | " __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n" |
6055 | " __ret; \\\n" |
6056 | "})\n" |
6057 | "#endif\n" |
6058 | "\n" |
6059 | "#ifdef __LITTLE_ENDIAN__\n" |
6060 | "#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n" |
6061 | " float16_t __s0 = __p0; \\\n" |
6062 | " int64_t __ret; \\\n" |
6063 | " __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n" |
6064 | " __ret; \\\n" |
6065 | "})\n" |
6066 | "#else\n" |
6067 | "#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n" |
6068 | " float16_t __s0 = __p0; \\\n" |
6069 | " int64_t __ret; \\\n" |
6070 | " __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n" |
6071 | " __ret; \\\n" |
6072 | "})\n" |
6073 | "#endif\n" |
6074 | "\n" |
6075 | "#ifdef __LITTLE_ENDIAN__\n" |
6076 | "#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n" |
6077 | " float16_t __s0 = __p0; \\\n" |
6078 | " uint16_t __ret; \\\n" |
6079 | " __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n" |
6080 | " __ret; \\\n" |
6081 | "})\n" |
6082 | "#else\n" |
6083 | "#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n" |
6084 | " float16_t __s0 = __p0; \\\n" |
6085 | " uint16_t __ret; \\\n" |
6086 | " __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n" |
6087 | " __ret; \\\n" |
6088 | "})\n" |
6089 | "#endif\n" |
6090 | "\n" |
6091 | "#ifdef __LITTLE_ENDIAN__\n" |
6092 | "#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n" |
6093 | " float16_t __s0 = __p0; \\\n" |
6094 | " uint32_t __ret; \\\n" |
6095 | " __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n" |
6096 | " __ret; \\\n" |
6097 | "})\n" |
6098 | "#else\n" |
6099 | "#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n" |
6100 | " float16_t __s0 = __p0; \\\n" |
6101 | " uint32_t __ret; \\\n" |
6102 | " __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n" |
6103 | " __ret; \\\n" |
6104 | "})\n" |
6105 | "#endif\n" |
6106 | "\n" |
6107 | "#ifdef __LITTLE_ENDIAN__\n" |
6108 | "#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n" |
6109 | " float16_t __s0 = __p0; \\\n" |
6110 | " uint64_t __ret; \\\n" |
6111 | " __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n" |
6112 | " __ret; \\\n" |
6113 | "})\n" |
6114 | "#else\n" |
6115 | "#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n" |
6116 | " float16_t __s0 = __p0; \\\n" |
6117 | " uint64_t __ret; \\\n" |
6118 | " __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n" |
6119 | " __ret; \\\n" |
6120 | "})\n" |
6121 | "#endif\n" |
6122 | "\n" |
6123 | "#ifdef __LITTLE_ENDIAN__\n" |
6124 | "__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n" |
6125 | " float16_t __ret;\n" |
6126 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n" |
6127 | " return __ret;\n" |
6128 | "}\n" |
6129 | "#else\n" |
6130 | "__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n" |
6131 | " float16_t __ret;\n" |
6132 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n" |
6133 | " return __ret;\n" |
6134 | "}\n" |
6135 | "#endif\n" |
6136 | "\n" |
6137 | "#ifdef __LITTLE_ENDIAN__\n" |
6138 | "__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n" |
6139 | " float16_t __ret;\n" |
6140 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n" |
6141 | " return __ret;\n" |
6142 | "}\n" |
6143 | "#else\n" |
6144 | "__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n" |
6145 | " float16_t __ret;\n" |
6146 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n" |
6147 | " return __ret;\n" |
6148 | "}\n" |
6149 | "#endif\n" |
6150 | "\n" |
6151 | "#ifdef __LITTLE_ENDIAN__\n" |
6152 | "__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n" |
6153 | " float16_t __ret;\n" |
6154 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n" |
6155 | " return __ret;\n" |
6156 | "}\n" |
6157 | "#else\n" |
6158 | "__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n" |
6159 | " float16_t __ret;\n" |
6160 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n" |
6161 | " return __ret;\n" |
6162 | "}\n" |
6163 | "#endif\n" |
6164 | "\n" |
6165 | "#ifdef __LITTLE_ENDIAN__\n" |
6166 | "__ai float16_t vcvth_f16_s32(int32_t __p0) {\n" |
6167 | " float16_t __ret;\n" |
6168 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n" |
6169 | " return __ret;\n" |
6170 | "}\n" |
6171 | "#else\n" |
6172 | "__ai float16_t vcvth_f16_s32(int32_t __p0) {\n" |
6173 | " float16_t __ret;\n" |
6174 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n" |
6175 | " return __ret;\n" |
6176 | "}\n" |
6177 | "#endif\n" |
6178 | "\n" |
6179 | "#ifdef __LITTLE_ENDIAN__\n" |
6180 | "__ai float16_t vcvth_f16_s64(int64_t __p0) {\n" |
6181 | " float16_t __ret;\n" |
6182 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n" |
6183 | " return __ret;\n" |
6184 | "}\n" |
6185 | "#else\n" |
6186 | "__ai float16_t vcvth_f16_s64(int64_t __p0) {\n" |
6187 | " float16_t __ret;\n" |
6188 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n" |
6189 | " return __ret;\n" |
6190 | "}\n" |
6191 | "#endif\n" |
6192 | "\n" |
6193 | "#ifdef __LITTLE_ENDIAN__\n" |
6194 | "__ai float16_t vcvth_f16_s16(int16_t __p0) {\n" |
6195 | " float16_t __ret;\n" |
6196 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n" |
6197 | " return __ret;\n" |
6198 | "}\n" |
6199 | "#else\n" |
6200 | "__ai float16_t vcvth_f16_s16(int16_t __p0) {\n" |
6201 | " float16_t __ret;\n" |
6202 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n" |
6203 | " return __ret;\n" |
6204 | "}\n" |
6205 | "#endif\n" |
6206 | "\n" |
6207 | "#ifdef __LITTLE_ENDIAN__\n" |
6208 | "#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n" |
6209 | " uint32_t __s0 = __p0; \\\n" |
6210 | " float16_t __ret; \\\n" |
6211 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n" |
6212 | " __ret; \\\n" |
6213 | "})\n" |
6214 | "#else\n" |
6215 | "#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n" |
6216 | " uint32_t __s0 = __p0; \\\n" |
6217 | " float16_t __ret; \\\n" |
6218 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n" |
6219 | " __ret; \\\n" |
6220 | "})\n" |
6221 | "#endif\n" |
6222 | "\n" |
6223 | "#ifdef __LITTLE_ENDIAN__\n" |
6224 | "#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n" |
6225 | " uint64_t __s0 = __p0; \\\n" |
6226 | " float16_t __ret; \\\n" |
6227 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n" |
6228 | " __ret; \\\n" |
6229 | "})\n" |
6230 | "#else\n" |
6231 | "#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n" |
6232 | " uint64_t __s0 = __p0; \\\n" |
6233 | " float16_t __ret; \\\n" |
6234 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n" |
6235 | " __ret; \\\n" |
6236 | "})\n" |
6237 | "#endif\n" |
6238 | "\n" |
6239 | "#ifdef __LITTLE_ENDIAN__\n" |
6240 | "#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n" |
6241 | " uint16_t __s0 = __p0; \\\n" |
6242 | " float16_t __ret; \\\n" |
6243 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n" |
6244 | " __ret; \\\n" |
6245 | "})\n" |
6246 | "#else\n" |
6247 | "#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n" |
6248 | " uint16_t __s0 = __p0; \\\n" |
6249 | " float16_t __ret; \\\n" |
6250 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n" |
6251 | " __ret; \\\n" |
6252 | "})\n" |
6253 | "#endif\n" |
6254 | "\n" |
6255 | "#ifdef __LITTLE_ENDIAN__\n" |
6256 | "#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n" |
6257 | " int32_t __s0 = __p0; \\\n" |
6258 | " float16_t __ret; \\\n" |
6259 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n" |
6260 | " __ret; \\\n" |
6261 | "})\n" |
6262 | "#else\n" |
6263 | "#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n" |
6264 | " int32_t __s0 = __p0; \\\n" |
6265 | " float16_t __ret; \\\n" |
6266 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n" |
6267 | " __ret; \\\n" |
6268 | "})\n" |
6269 | "#endif\n" |
6270 | "\n" |
6271 | "#ifdef __LITTLE_ENDIAN__\n" |
6272 | "#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n" |
6273 | " int64_t __s0 = __p0; \\\n" |
6274 | " float16_t __ret; \\\n" |
6275 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n" |
6276 | " __ret; \\\n" |
6277 | "})\n" |
6278 | "#else\n" |
6279 | "#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n" |
6280 | " int64_t __s0 = __p0; \\\n" |
6281 | " float16_t __ret; \\\n" |
6282 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n" |
6283 | " __ret; \\\n" |
6284 | "})\n" |
6285 | "#endif\n" |
6286 | "\n" |
6287 | "#ifdef __LITTLE_ENDIAN__\n" |
6288 | "#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n" |
6289 | " int16_t __s0 = __p0; \\\n" |
6290 | " float16_t __ret; \\\n" |
6291 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n" |
6292 | " __ret; \\\n" |
6293 | "})\n" |
6294 | "#else\n" |
6295 | "#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n" |
6296 | " int16_t __s0 = __p0; \\\n" |
6297 | " float16_t __ret; \\\n" |
6298 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n" |
6299 | " __ret; \\\n" |
6300 | "})\n" |
6301 | "#endif\n" |
6302 | "\n" |
6303 | "#ifdef __LITTLE_ENDIAN__\n" |
6304 | "#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n" |
6305 | " float16_t __s0 = __p0; \\\n" |
6306 | " int16_t __ret; \\\n" |
6307 | " __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n" |
6308 | " __ret; \\\n" |
6309 | "})\n" |
6310 | "#else\n" |
6311 | "#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n" |
6312 | " float16_t __s0 = __p0; \\\n" |
6313 | " int16_t __ret; \\\n" |
6314 | " __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n" |
6315 | " __ret; \\\n" |
6316 | "})\n" |
6317 | "#endif\n" |
6318 | "\n" |
6319 | "#ifdef __LITTLE_ENDIAN__\n" |
6320 | "#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n" |
6321 | " float16_t __s0 = __p0; \\\n" |
6322 | " int32_t __ret; \\\n" |
6323 | " __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n" |
6324 | " __ret; \\\n" |
6325 | "})\n" |
6326 | "#else\n" |
6327 | "#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n" |
6328 | " float16_t __s0 = __p0; \\\n" |
6329 | " int32_t __ret; \\\n" |
6330 | " __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n" |
6331 | " __ret; \\\n" |
6332 | "})\n" |
6333 | "#endif\n" |
6334 | "\n" |
6335 | "#ifdef __LITTLE_ENDIAN__\n" |
6336 | "#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n" |
6337 | " float16_t __s0 = __p0; \\\n" |
6338 | " int64_t __ret; \\\n" |
6339 | " __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n" |
6340 | " __ret; \\\n" |
6341 | "})\n" |
6342 | "#else\n" |
6343 | "#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n" |
6344 | " float16_t __s0 = __p0; \\\n" |
6345 | " int64_t __ret; \\\n" |
6346 | " __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n" |
6347 | " __ret; \\\n" |
6348 | "})\n" |
6349 | "#endif\n" |
6350 | "\n" |
6351 | "#ifdef __LITTLE_ENDIAN__\n" |
6352 | "#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n" |
6353 | " float16_t __s0 = __p0; \\\n" |
6354 | " uint16_t __ret; \\\n" |
6355 | " __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n" |
6356 | " __ret; \\\n" |
6357 | "})\n" |
6358 | "#else\n" |
6359 | "#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n" |
6360 | " float16_t __s0 = __p0; \\\n" |
6361 | " uint16_t __ret; \\\n" |
6362 | " __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n" |
6363 | " __ret; \\\n" |
6364 | "})\n" |
6365 | "#endif\n" |
6366 | "\n" |
6367 | "#ifdef __LITTLE_ENDIAN__\n" |
6368 | "#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n" |
6369 | " float16_t __s0 = __p0; \\\n" |
6370 | " uint32_t __ret; \\\n" |
6371 | " __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n" |
6372 | " __ret; \\\n" |
6373 | "})\n" |
6374 | "#else\n" |
6375 | "#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n" |
6376 | " float16_t __s0 = __p0; \\\n" |
6377 | " uint32_t __ret; \\\n" |
6378 | " __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n" |
6379 | " __ret; \\\n" |
6380 | "})\n" |
6381 | "#endif\n" |
6382 | "\n" |
6383 | "#ifdef __LITTLE_ENDIAN__\n" |
6384 | "#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n" |
6385 | " float16_t __s0 = __p0; \\\n" |
6386 | " uint64_t __ret; \\\n" |
6387 | " __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n" |
6388 | " __ret; \\\n" |
6389 | "})\n" |
6390 | "#else\n" |
6391 | "#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n" |
6392 | " float16_t __s0 = __p0; \\\n" |
6393 | " uint64_t __ret; \\\n" |
6394 | " __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n" |
6395 | " __ret; \\\n" |
6396 | "})\n" |
6397 | "#endif\n" |
6398 | "\n" |
6399 | "#ifdef __LITTLE_ENDIAN__\n" |
6400 | "#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n" |
6401 | " float16_t __s0 = __p0; \\\n" |
6402 | " int16_t __ret; \\\n" |
6403 | " __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n" |
6404 | " __ret; \\\n" |
6405 | "})\n" |
6406 | "#else\n" |
6407 | "#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n" |
6408 | " float16_t __s0 = __p0; \\\n" |
6409 | " int16_t __ret; \\\n" |
6410 | " __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n" |
6411 | " __ret; \\\n" |
6412 | "})\n" |
6413 | "#endif\n" |
6414 | "\n" |
6415 | "#ifdef __LITTLE_ENDIAN__\n" |
6416 | "#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n" |
6417 | " float16_t __s0 = __p0; \\\n" |
6418 | " int32_t __ret; \\\n" |
6419 | " __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n" |
6420 | " __ret; \\\n" |
6421 | "})\n" |
6422 | "#else\n" |
6423 | "#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n" |
6424 | " float16_t __s0 = __p0; \\\n" |
6425 | " int32_t __ret; \\\n" |
6426 | " __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n" |
6427 | " __ret; \\\n" |
6428 | "})\n" |
6429 | "#endif\n" |
6430 | "\n" |
6431 | "#ifdef __LITTLE_ENDIAN__\n" |
6432 | "#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n" |
6433 | " float16_t __s0 = __p0; \\\n" |
6434 | " int64_t __ret; \\\n" |
6435 | " __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n" |
6436 | " __ret; \\\n" |
6437 | "})\n" |
6438 | "#else\n" |
6439 | "#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n" |
6440 | " float16_t __s0 = __p0; \\\n" |
6441 | " int64_t __ret; \\\n" |
6442 | " __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n" |
6443 | " __ret; \\\n" |
6444 | "})\n" |
6445 | "#endif\n" |
6446 | "\n" |
6447 | "#ifdef __LITTLE_ENDIAN__\n" |
6448 | "#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n" |
6449 | " float16_t __s0 = __p0; \\\n" |
6450 | " uint16_t __ret; \\\n" |
6451 | " __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n" |
6452 | " __ret; \\\n" |
6453 | "})\n" |
6454 | "#else\n" |
6455 | "#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n" |
6456 | " float16_t __s0 = __p0; \\\n" |
6457 | " uint16_t __ret; \\\n" |
6458 | " __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n" |
6459 | " __ret; \\\n" |
6460 | "})\n" |
6461 | "#endif\n" |
6462 | "\n" |
6463 | "#ifdef __LITTLE_ENDIAN__\n" |
6464 | "#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n" |
6465 | " float16_t __s0 = __p0; \\\n" |
6466 | " uint32_t __ret; \\\n" |
6467 | " __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n" |
6468 | " __ret; \\\n" |
6469 | "})\n" |
6470 | "#else\n" |
6471 | "#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n" |
6472 | " float16_t __s0 = __p0; \\\n" |
6473 | " uint32_t __ret; \\\n" |
6474 | " __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n" |
6475 | " __ret; \\\n" |
6476 | "})\n" |
6477 | "#endif\n" |
6478 | "\n" |
6479 | "#ifdef __LITTLE_ENDIAN__\n" |
6480 | "#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n" |
6481 | " float16_t __s0 = __p0; \\\n" |
6482 | " uint64_t __ret; \\\n" |
6483 | " __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n" |
6484 | " __ret; \\\n" |
6485 | "})\n" |
6486 | "#else\n" |
6487 | "#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n" |
6488 | " float16_t __s0 = __p0; \\\n" |
6489 | " uint64_t __ret; \\\n" |
6490 | " __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n" |
6491 | " __ret; \\\n" |
6492 | "})\n" |
6493 | "#endif\n" |
6494 | "\n" |
6495 | "#ifdef __LITTLE_ENDIAN__\n" |
6496 | "#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n" |
6497 | " float16_t __s0 = __p0; \\\n" |
6498 | " int16_t __ret; \\\n" |
6499 | " __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n" |
6500 | " __ret; \\\n" |
6501 | "})\n" |
6502 | "#else\n" |
6503 | "#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n" |
6504 | " float16_t __s0 = __p0; \\\n" |
6505 | " int16_t __ret; \\\n" |
6506 | " __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n" |
6507 | " __ret; \\\n" |
6508 | "})\n" |
6509 | "#endif\n" |
6510 | "\n" |
6511 | "#ifdef __LITTLE_ENDIAN__\n" |
6512 | "#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n" |
6513 | " float16_t __s0 = __p0; \\\n" |
6514 | " int32_t __ret; \\\n" |
6515 | " __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n" |
6516 | " __ret; \\\n" |
6517 | "})\n" |
6518 | "#else\n" |
6519 | "#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n" |
6520 | " float16_t __s0 = __p0; \\\n" |
6521 | " int32_t __ret; \\\n" |
6522 | " __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n" |
6523 | " __ret; \\\n" |
6524 | "})\n" |
6525 | "#endif\n" |
6526 | "\n" |
6527 | "#ifdef __LITTLE_ENDIAN__\n" |
6528 | "#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n" |
6529 | " float16_t __s0 = __p0; \\\n" |
6530 | " int64_t __ret; \\\n" |
6531 | " __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n" |
6532 | " __ret; \\\n" |
6533 | "})\n" |
6534 | "#else\n" |
6535 | "#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n" |
6536 | " float16_t __s0 = __p0; \\\n" |
6537 | " int64_t __ret; \\\n" |
6538 | " __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n" |
6539 | " __ret; \\\n" |
6540 | "})\n" |
6541 | "#endif\n" |
6542 | "\n" |
6543 | "#ifdef __LITTLE_ENDIAN__\n" |
6544 | "#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n" |
6545 | " float16_t __s0 = __p0; \\\n" |
6546 | " uint16_t __ret; \\\n" |
6547 | " __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n" |
6548 | " __ret; \\\n" |
6549 | "})\n" |
6550 | "#else\n" |
6551 | "#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n" |
6552 | " float16_t __s0 = __p0; \\\n" |
6553 | " uint16_t __ret; \\\n" |
6554 | " __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n" |
6555 | " __ret; \\\n" |
6556 | "})\n" |
6557 | "#endif\n" |
6558 | "\n" |
6559 | "#ifdef __LITTLE_ENDIAN__\n" |
6560 | "#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n" |
6561 | " float16_t __s0 = __p0; \\\n" |
6562 | " uint32_t __ret; \\\n" |
6563 | " __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n" |
6564 | " __ret; \\\n" |
6565 | "})\n" |
6566 | "#else\n" |
6567 | "#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n" |
6568 | " float16_t __s0 = __p0; \\\n" |
6569 | " uint32_t __ret; \\\n" |
6570 | " __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n" |
6571 | " __ret; \\\n" |
6572 | "})\n" |
6573 | "#endif\n" |
6574 | "\n" |
6575 | "#ifdef __LITTLE_ENDIAN__\n" |
6576 | "#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n" |
6577 | " float16_t __s0 = __p0; \\\n" |
6578 | " uint64_t __ret; \\\n" |
6579 | " __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n" |
6580 | " __ret; \\\n" |
6581 | "})\n" |
6582 | "#else\n" |
6583 | "#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n" |
6584 | " float16_t __s0 = __p0; \\\n" |
6585 | " uint64_t __ret; \\\n" |
6586 | " __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n" |
6587 | " __ret; \\\n" |
6588 | "})\n" |
6589 | "#endif\n" |
6590 | "\n" |
6591 | "#ifdef __LITTLE_ENDIAN__\n" |
6592 | "#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n" |
6593 | " float16_t __s0 = __p0; \\\n" |
6594 | " float16_t __s1 = __p1; \\\n" |
6595 | " float16_t __ret; \\\n" |
6596 | " __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n" |
6597 | " __ret; \\\n" |
6598 | "})\n" |
6599 | "#else\n" |
6600 | "#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n" |
6601 | " float16_t __s0 = __p0; \\\n" |
6602 | " float16_t __s1 = __p1; \\\n" |
6603 | " float16_t __ret; \\\n" |
6604 | " __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n" |
6605 | " __ret; \\\n" |
6606 | "})\n" |
6607 | "#endif\n" |
6608 | "\n" |
6609 | "#ifdef __LITTLE_ENDIAN__\n" |
6610 | "#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n" |
6611 | " float16_t __s0 = __p0; \\\n" |
6612 | " float16_t __s1 = __p1; \\\n" |
6613 | " float16_t __s2 = __p2; \\\n" |
6614 | " float16_t __ret; \\\n" |
6615 | " __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n" |
6616 | " __ret; \\\n" |
6617 | "})\n" |
6618 | "#else\n" |
6619 | "#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n" |
6620 | " float16_t __s0 = __p0; \\\n" |
6621 | " float16_t __s1 = __p1; \\\n" |
6622 | " float16_t __s2 = __p2; \\\n" |
6623 | " float16_t __ret; \\\n" |
6624 | " __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n" |
6625 | " __ret; \\\n" |
6626 | "})\n" |
6627 | "#endif\n" |
6628 | "\n" |
6629 | "#ifdef __LITTLE_ENDIAN__\n" |
6630 | "#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n" |
6631 | " float16_t __s0 = __p0; \\\n" |
6632 | " float16_t __s1 = __p1; \\\n" |
6633 | " float16_t __s2 = __p2; \\\n" |
6634 | " float16_t __ret; \\\n" |
6635 | " __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n" |
6636 | " __ret; \\\n" |
6637 | "})\n" |
6638 | "#else\n" |
6639 | "#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n" |
6640 | " float16_t __s0 = __p0; \\\n" |
6641 | " float16_t __s1 = __p1; \\\n" |
6642 | " float16_t __s2 = __p2; \\\n" |
6643 | " float16_t __ret; \\\n" |
6644 | " __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n" |
6645 | " __ret; \\\n" |
6646 | "})\n" |
6647 | "#endif\n" |
6648 | "\n" |
6649 | "#ifdef __LITTLE_ENDIAN__\n" |
6650 | "#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n" |
6651 | " float16_t __s0 = __p0; \\\n" |
6652 | " float16_t __s1 = __p1; \\\n" |
6653 | " float16_t __ret; \\\n" |
6654 | " __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n" |
6655 | " __ret; \\\n" |
6656 | "})\n" |
6657 | "#else\n" |
6658 | "#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n" |
6659 | " float16_t __s0 = __p0; \\\n" |
6660 | " float16_t __s1 = __p1; \\\n" |
6661 | " float16_t __ret; \\\n" |
6662 | " __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n" |
6663 | " __ret; \\\n" |
6664 | "})\n" |
6665 | "#endif\n" |
6666 | "\n" |
6667 | "#ifdef __LITTLE_ENDIAN__\n" |
6668 | "#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n" |
6669 | " float16_t __s0 = __p0; \\\n" |
6670 | " float16_t __s1 = __p1; \\\n" |
6671 | " float16_t __ret; \\\n" |
6672 | " __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n" |
6673 | " __ret; \\\n" |
6674 | "})\n" |
6675 | "#else\n" |
6676 | "#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n" |
6677 | " float16_t __s0 = __p0; \\\n" |
6678 | " float16_t __s1 = __p1; \\\n" |
6679 | " float16_t __ret; \\\n" |
6680 | " __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n" |
6681 | " __ret; \\\n" |
6682 | "})\n" |
6683 | "#endif\n" |
6684 | "\n" |
6685 | "#ifdef __LITTLE_ENDIAN__\n" |
6686 | "#define vminh_f16(__p0, __p1) __extension__ ({ \\\n" |
6687 | " float16_t __s0 = __p0; \\\n" |
6688 | " float16_t __s1 = __p1; \\\n" |
6689 | " float16_t __ret; \\\n" |
6690 | " __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n" |
6691 | " __ret; \\\n" |
6692 | "})\n" |
6693 | "#else\n" |
6694 | "#define vminh_f16(__p0, __p1) __extension__ ({ \\\n" |
6695 | " float16_t __s0 = __p0; \\\n" |
6696 | " float16_t __s1 = __p1; \\\n" |
6697 | " float16_t __ret; \\\n" |
6698 | " __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n" |
6699 | " __ret; \\\n" |
6700 | "})\n" |
6701 | "#endif\n" |
6702 | "\n" |
6703 | "#ifdef __LITTLE_ENDIAN__\n" |
6704 | "#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n" |
6705 | " float16_t __s0 = __p0; \\\n" |
6706 | " float16_t __s1 = __p1; \\\n" |
6707 | " float16_t __ret; \\\n" |
6708 | " __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n" |
6709 | " __ret; \\\n" |
6710 | "})\n" |
6711 | "#else\n" |
6712 | "#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n" |
6713 | " float16_t __s0 = __p0; \\\n" |
6714 | " float16_t __s1 = __p1; \\\n" |
6715 | " float16_t __ret; \\\n" |
6716 | " __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n" |
6717 | " __ret; \\\n" |
6718 | "})\n" |
6719 | "#endif\n" |
6720 | "\n" |
6721 | "#ifdef __LITTLE_ENDIAN__\n" |
6722 | "#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n" |
6723 | " float16_t __s0 = __p0; \\\n" |
6724 | " float16_t __s1 = __p1; \\\n" |
6725 | " float16_t __ret; \\\n" |
6726 | " __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n" |
6727 | " __ret; \\\n" |
6728 | "})\n" |
6729 | "#else\n" |
6730 | "#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n" |
6731 | " float16_t __s0 = __p0; \\\n" |
6732 | " float16_t __s1 = __p1; \\\n" |
6733 | " float16_t __ret; \\\n" |
6734 | " __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n" |
6735 | " __ret; \\\n" |
6736 | "})\n" |
6737 | "#endif\n" |
6738 | "\n" |
6739 | "#ifdef __LITTLE_ENDIAN__\n" |
6740 | "#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n" |
6741 | " float16_t __s0 = __p0; \\\n" |
6742 | " float16_t __s1 = __p1; \\\n" |
6743 | " float16_t __ret; \\\n" |
6744 | " __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n" |
6745 | " __ret; \\\n" |
6746 | "})\n" |
6747 | "#else\n" |
6748 | "#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n" |
6749 | " float16_t __s0 = __p0; \\\n" |
6750 | " float16_t __s1 = __p1; \\\n" |
6751 | " float16_t __ret; \\\n" |
6752 | " __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n" |
6753 | " __ret; \\\n" |
6754 | "})\n" |
6755 | "#endif\n" |
6756 | "\n" |
6757 | "#ifdef __LITTLE_ENDIAN__\n" |
6758 | "#define vnegh_f16(__p0) __extension__ ({ \\\n" |
6759 | " float16_t __s0 = __p0; \\\n" |
6760 | " float16_t __ret; \\\n" |
6761 | " __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n" |
6762 | " __ret; \\\n" |
6763 | "})\n" |
6764 | "#else\n" |
6765 | "#define vnegh_f16(__p0) __extension__ ({ \\\n" |
6766 | " float16_t __s0 = __p0; \\\n" |
6767 | " float16_t __ret; \\\n" |
6768 | " __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n" |
6769 | " __ret; \\\n" |
6770 | "})\n" |
6771 | "#endif\n" |
6772 | "\n" |
6773 | "#ifdef __LITTLE_ENDIAN__\n" |
6774 | "#define vrecpeh_f16(__p0) __extension__ ({ \\\n" |
6775 | " float16_t __s0 = __p0; \\\n" |
6776 | " float16_t __ret; \\\n" |
6777 | " __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n" |
6778 | " __ret; \\\n" |
6779 | "})\n" |
6780 | "#else\n" |
6781 | "#define vrecpeh_f16(__p0) __extension__ ({ \\\n" |
6782 | " float16_t __s0 = __p0; \\\n" |
6783 | " float16_t __ret; \\\n" |
6784 | " __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n" |
6785 | " __ret; \\\n" |
6786 | "})\n" |
6787 | "#endif\n" |
6788 | "\n" |
6789 | "#ifdef __LITTLE_ENDIAN__\n" |
6790 | "#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n" |
6791 | " float16_t __s0 = __p0; \\\n" |
6792 | " float16_t __s1 = __p1; \\\n" |
6793 | " float16_t __ret; \\\n" |
6794 | " __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n" |
6795 | " __ret; \\\n" |
6796 | "})\n" |
6797 | "#else\n" |
6798 | "#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n" |
6799 | " float16_t __s0 = __p0; \\\n" |
6800 | " float16_t __s1 = __p1; \\\n" |
6801 | " float16_t __ret; \\\n" |
6802 | " __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n" |
6803 | " __ret; \\\n" |
6804 | "})\n" |
6805 | "#endif\n" |
6806 | "\n" |
6807 | "#ifdef __LITTLE_ENDIAN__\n" |
6808 | "#define vrecpxh_f16(__p0) __extension__ ({ \\\n" |
6809 | " float16_t __s0 = __p0; \\\n" |
6810 | " float16_t __ret; \\\n" |
6811 | " __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n" |
6812 | " __ret; \\\n" |
6813 | "})\n" |
6814 | "#else\n" |
6815 | "#define vrecpxh_f16(__p0) __extension__ ({ \\\n" |
6816 | " float16_t __s0 = __p0; \\\n" |
6817 | " float16_t __ret; \\\n" |
6818 | " __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n" |
6819 | " __ret; \\\n" |
6820 | "})\n" |
6821 | "#endif\n" |
6822 | "\n" |
6823 | "#ifdef __LITTLE_ENDIAN__\n" |
6824 | "#define vrndh_f16(__p0) __extension__ ({ \\\n" |
6825 | " float16_t __s0 = __p0; \\\n" |
6826 | " float16_t __ret; \\\n" |
6827 | " __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n" |
6828 | " __ret; \\\n" |
6829 | "})\n" |
6830 | "#else\n" |
6831 | "#define vrndh_f16(__p0) __extension__ ({ \\\n" |
6832 | " float16_t __s0 = __p0; \\\n" |
6833 | " float16_t __ret; \\\n" |
6834 | " __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n" |
6835 | " __ret; \\\n" |
6836 | "})\n" |
6837 | "#endif\n" |
6838 | "\n" |
6839 | "#ifdef __LITTLE_ENDIAN__\n" |
6840 | "#define vrndah_f16(__p0) __extension__ ({ \\\n" |
6841 | " float16_t __s0 = __p0; \\\n" |
6842 | " float16_t __ret; \\\n" |
6843 | " __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n" |
6844 | " __ret; \\\n" |
6845 | "})\n" |
6846 | "#else\n" |
6847 | "#define vrndah_f16(__p0) __extension__ ({ \\\n" |
6848 | " float16_t __s0 = __p0; \\\n" |
6849 | " float16_t __ret; \\\n" |
6850 | " __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n" |
6851 | " __ret; \\\n" |
6852 | "})\n" |
6853 | "#endif\n" |
6854 | "\n" |
6855 | "#ifdef __LITTLE_ENDIAN__\n" |
6856 | "#define vrndih_f16(__p0) __extension__ ({ \\\n" |
6857 | " float16_t __s0 = __p0; \\\n" |
6858 | " float16_t __ret; \\\n" |
6859 | " __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n" |
6860 | " __ret; \\\n" |
6861 | "})\n" |
6862 | "#else\n" |
6863 | "#define vrndih_f16(__p0) __extension__ ({ \\\n" |
6864 | " float16_t __s0 = __p0; \\\n" |
6865 | " float16_t __ret; \\\n" |
6866 | " __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n" |
6867 | " __ret; \\\n" |
6868 | "})\n" |
6869 | "#endif\n" |
6870 | "\n" |
6871 | "#ifdef __LITTLE_ENDIAN__\n" |
6872 | "#define vrndmh_f16(__p0) __extension__ ({ \\\n" |
6873 | " float16_t __s0 = __p0; \\\n" |
6874 | " float16_t __ret; \\\n" |
6875 | " __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n" |
6876 | " __ret; \\\n" |
6877 | "})\n" |
6878 | "#else\n" |
6879 | "#define vrndmh_f16(__p0) __extension__ ({ \\\n" |
6880 | " float16_t __s0 = __p0; \\\n" |
6881 | " float16_t __ret; \\\n" |
6882 | " __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n" |
6883 | " __ret; \\\n" |
6884 | "})\n" |
6885 | "#endif\n" |
6886 | "\n" |
6887 | "#ifdef __LITTLE_ENDIAN__\n" |
6888 | "#define vrndnh_f16(__p0) __extension__ ({ \\\n" |
6889 | " float16_t __s0 = __p0; \\\n" |
6890 | " float16_t __ret; \\\n" |
6891 | " __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n" |
6892 | " __ret; \\\n" |
6893 | "})\n" |
6894 | "#else\n" |
6895 | "#define vrndnh_f16(__p0) __extension__ ({ \\\n" |
6896 | " float16_t __s0 = __p0; \\\n" |
6897 | " float16_t __ret; \\\n" |
6898 | " __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n" |
6899 | " __ret; \\\n" |
6900 | "})\n" |
6901 | "#endif\n" |
6902 | "\n" |
6903 | "#ifdef __LITTLE_ENDIAN__\n" |
6904 | "#define vrndph_f16(__p0) __extension__ ({ \\\n" |
6905 | " float16_t __s0 = __p0; \\\n" |
6906 | " float16_t __ret; \\\n" |
6907 | " __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n" |
6908 | " __ret; \\\n" |
6909 | "})\n" |
6910 | "#else\n" |
6911 | "#define vrndph_f16(__p0) __extension__ ({ \\\n" |
6912 | " float16_t __s0 = __p0; \\\n" |
6913 | " float16_t __ret; \\\n" |
6914 | " __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n" |
6915 | " __ret; \\\n" |
6916 | "})\n" |
6917 | "#endif\n" |
6918 | "\n" |
6919 | "#ifdef __LITTLE_ENDIAN__\n" |
6920 | "#define vrndxh_f16(__p0) __extension__ ({ \\\n" |
6921 | " float16_t __s0 = __p0; \\\n" |
6922 | " float16_t __ret; \\\n" |
6923 | " __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n" |
6924 | " __ret; \\\n" |
6925 | "})\n" |
6926 | "#else\n" |
6927 | "#define vrndxh_f16(__p0) __extension__ ({ \\\n" |
6928 | " float16_t __s0 = __p0; \\\n" |
6929 | " float16_t __ret; \\\n" |
6930 | " __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n" |
6931 | " __ret; \\\n" |
6932 | "})\n" |
6933 | "#endif\n" |
6934 | "\n" |
6935 | "#ifdef __LITTLE_ENDIAN__\n" |
6936 | "#define vrsqrteh_f16(__p0) __extension__ ({ \\\n" |
6937 | " float16_t __s0 = __p0; \\\n" |
6938 | " float16_t __ret; \\\n" |
6939 | " __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n" |
6940 | " __ret; \\\n" |
6941 | "})\n" |
6942 | "#else\n" |
6943 | "#define vrsqrteh_f16(__p0) __extension__ ({ \\\n" |
6944 | " float16_t __s0 = __p0; \\\n" |
6945 | " float16_t __ret; \\\n" |
6946 | " __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n" |
6947 | " __ret; \\\n" |
6948 | "})\n" |
6949 | "#endif\n" |
6950 | "\n" |
6951 | "#ifdef __LITTLE_ENDIAN__\n" |
6952 | "#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n" |
6953 | " float16_t __s0 = __p0; \\\n" |
6954 | " float16_t __s1 = __p1; \\\n" |
6955 | " float16_t __ret; \\\n" |
6956 | " __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n" |
6957 | " __ret; \\\n" |
6958 | "})\n" |
6959 | "#else\n" |
6960 | "#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n" |
6961 | " float16_t __s0 = __p0; \\\n" |
6962 | " float16_t __s1 = __p1; \\\n" |
6963 | " float16_t __ret; \\\n" |
6964 | " __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n" |
6965 | " __ret; \\\n" |
6966 | "})\n" |
6967 | "#endif\n" |
6968 | "\n" |
6969 | "#ifdef __LITTLE_ENDIAN__\n" |
6970 | "#define vsqrth_f16(__p0) __extension__ ({ \\\n" |
6971 | " float16_t __s0 = __p0; \\\n" |
6972 | " float16_t __ret; \\\n" |
6973 | " __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n" |
6974 | " __ret; \\\n" |
6975 | "})\n" |
6976 | "#else\n" |
6977 | "#define vsqrth_f16(__p0) __extension__ ({ \\\n" |
6978 | " float16_t __s0 = __p0; \\\n" |
6979 | " float16_t __ret; \\\n" |
6980 | " __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n" |
6981 | " __ret; \\\n" |
6982 | "})\n" |
6983 | "#endif\n" |
6984 | "\n" |
6985 | "#ifdef __LITTLE_ENDIAN__\n" |
6986 | "#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n" |
6987 | " float16_t __s0 = __p0; \\\n" |
6988 | " float16_t __s1 = __p1; \\\n" |
6989 | " float16_t __ret; \\\n" |
6990 | " __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n" |
6991 | " __ret; \\\n" |
6992 | "})\n" |
6993 | "#else\n" |
6994 | "#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n" |
6995 | " float16_t __s0 = __p0; \\\n" |
6996 | " float16_t __s1 = __p1; \\\n" |
6997 | " float16_t __ret; \\\n" |
6998 | " __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n" |
6999 | " __ret; \\\n" |
7000 | "})\n" |
7001 | "#endif\n" |
7002 | "\n" |
7003 | "#endif\n" |
7004 | "\n" |
7005 | "#undef __ai\n" |
7006 | "\n" |
7007 | "#endif /* __ARM_FP16_H */\n" |
7008 | "" } , |
7009 | { "/builtins/armintr.h" , "/*===---- armintr.h - ARM Windows intrinsics -------------------------------===\n" |
7010 | " *\n" |
7011 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
7012 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
7013 | " * in the Software without restriction, including without limitation the rights\n" |
7014 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
7015 | " * copies of the Software, and to permit persons to whom the Software is\n" |
7016 | " * furnished to do so, subject to the following conditions:\n" |
7017 | " *\n" |
7018 | " * The above copyright notice and this permission notice shall be included in\n" |
7019 | " * all copies or substantial portions of the Software.\n" |
7020 | " *\n" |
7021 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
7022 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
7023 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
7024 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
7025 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
7026 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
7027 | " * THE SOFTWARE.\n" |
7028 | " *\n" |
7029 | " *===-----------------------------------------------------------------------===\n" |
7030 | " */\n" |
7031 | "\n" |
7032 | "/* Only include this if we're compiling for the windows platform. */\n" |
7033 | "#ifndef _MSC_VER\n" |
7034 | "#include_next <armintr.h>\n" |
7035 | "#else\n" |
7036 | "\n" |
7037 | "#ifndef __ARMINTR_H\n" |
7038 | "#define __ARMINTR_H\n" |
7039 | "\n" |
7040 | "typedef enum\n" |
7041 | "{\n" |
7042 | " _ARM_BARRIER_SY = 0xF,\n" |
7043 | " _ARM_BARRIER_ST = 0xE,\n" |
7044 | " _ARM_BARRIER_ISH = 0xB,\n" |
7045 | " _ARM_BARRIER_ISHST = 0xA,\n" |
7046 | " _ARM_BARRIER_NSH = 0x7,\n" |
7047 | " _ARM_BARRIER_NSHST = 0x6,\n" |
7048 | " _ARM_BARRIER_OSH = 0x3,\n" |
7049 | " _ARM_BARRIER_OSHST = 0x2\n" |
7050 | "} _ARMINTR_BARRIER_TYPE;\n" |
7051 | "\n" |
7052 | "#endif /* __ARMINTR_H */\n" |
7053 | "#endif /* _MSC_VER */\n" |
7054 | "" } , |
7055 | { "/builtins/avx2intrin.h" , "/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===\n" |
7056 | " *\n" |
7057 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
7058 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
7059 | " * in the Software without restriction, including without limitation the rights\n" |
7060 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
7061 | " * copies of the Software, and to permit persons to whom the Software is\n" |
7062 | " * furnished to do so, subject to the following conditions:\n" |
7063 | " *\n" |
7064 | " * The above copyright notice and this permission notice shall be included in\n" |
7065 | " * all copies or substantial portions of the Software.\n" |
7066 | " *\n" |
7067 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
7068 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
7069 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
7070 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
7071 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
7072 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
7073 | " * THE SOFTWARE.\n" |
7074 | " *\n" |
7075 | " *===-----------------------------------------------------------------------===\n" |
7076 | " */\n" |
7077 | "\n" |
7078 | "#ifndef __IMMINTRIN_H\n" |
7079 | "#error \"Never use <avx2intrin.h> directly; include <immintrin.h> instead.\"\n" |
7080 | "#endif\n" |
7081 | "\n" |
7082 | "#ifndef __AVX2INTRIN_H\n" |
7083 | "#define __AVX2INTRIN_H\n" |
7084 | "\n" |
7085 | "/* Define the default attributes for the functions in this file. */\n" |
7086 | "#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(256)))\n" |
7087 | "#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(128)))\n" |
7088 | "\n" |
7089 | "/* SSE4 Multiple Packed Sums of Absolute Difference. */\n" |
7090 | "#define _mm256_mpsadbw_epu8(X, Y, M) \\\n" |
7091 | " (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \\\n" |
7092 | " (__v32qi)(__m256i)(Y), (int)(M))\n" |
7093 | "\n" |
7094 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7095 | "_mm256_abs_epi8(__m256i __a)\n" |
7096 | "{\n" |
7097 | " return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);\n" |
7098 | "}\n" |
7099 | "\n" |
7100 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7101 | "_mm256_abs_epi16(__m256i __a)\n" |
7102 | "{\n" |
7103 | " return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);\n" |
7104 | "}\n" |
7105 | "\n" |
7106 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7107 | "_mm256_abs_epi32(__m256i __a)\n" |
7108 | "{\n" |
7109 | " return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);\n" |
7110 | "}\n" |
7111 | "\n" |
7112 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7113 | "_mm256_packs_epi16(__m256i __a, __m256i __b)\n" |
7114 | "{\n" |
7115 | " return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);\n" |
7116 | "}\n" |
7117 | "\n" |
7118 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7119 | "_mm256_packs_epi32(__m256i __a, __m256i __b)\n" |
7120 | "{\n" |
7121 | " return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);\n" |
7122 | "}\n" |
7123 | "\n" |
7124 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7125 | "_mm256_packus_epi16(__m256i __a, __m256i __b)\n" |
7126 | "{\n" |
7127 | " return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);\n" |
7128 | "}\n" |
7129 | "\n" |
7130 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7131 | "_mm256_packus_epi32(__m256i __V1, __m256i __V2)\n" |
7132 | "{\n" |
7133 | " return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);\n" |
7134 | "}\n" |
7135 | "\n" |
7136 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7137 | "_mm256_add_epi8(__m256i __a, __m256i __b)\n" |
7138 | "{\n" |
7139 | " return (__m256i)((__v32qu)__a + (__v32qu)__b);\n" |
7140 | "}\n" |
7141 | "\n" |
7142 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7143 | "_mm256_add_epi16(__m256i __a, __m256i __b)\n" |
7144 | "{\n" |
7145 | " return (__m256i)((__v16hu)__a + (__v16hu)__b);\n" |
7146 | "}\n" |
7147 | "\n" |
7148 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7149 | "_mm256_add_epi32(__m256i __a, __m256i __b)\n" |
7150 | "{\n" |
7151 | " return (__m256i)((__v8su)__a + (__v8su)__b);\n" |
7152 | "}\n" |
7153 | "\n" |
7154 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7155 | "_mm256_add_epi64(__m256i __a, __m256i __b)\n" |
7156 | "{\n" |
7157 | " return (__m256i)((__v4du)__a + (__v4du)__b);\n" |
7158 | "}\n" |
7159 | "\n" |
7160 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7161 | "_mm256_adds_epi8(__m256i __a, __m256i __b)\n" |
7162 | "{\n" |
7163 | " return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);\n" |
7164 | "}\n" |
7165 | "\n" |
7166 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7167 | "_mm256_adds_epi16(__m256i __a, __m256i __b)\n" |
7168 | "{\n" |
7169 | " return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);\n" |
7170 | "}\n" |
7171 | "\n" |
7172 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7173 | "_mm256_adds_epu8(__m256i __a, __m256i __b)\n" |
7174 | "{\n" |
7175 | " return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);\n" |
7176 | "}\n" |
7177 | "\n" |
7178 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7179 | "_mm256_adds_epu16(__m256i __a, __m256i __b)\n" |
7180 | "{\n" |
7181 | " return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);\n" |
7182 | "}\n" |
7183 | "\n" |
7184 | "#define _mm256_alignr_epi8(a, b, n) \\\n" |
7185 | " (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \\\n" |
7186 | " (__v32qi)(__m256i)(b), (n))\n" |
7187 | "\n" |
7188 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7189 | "_mm256_and_si256(__m256i __a, __m256i __b)\n" |
7190 | "{\n" |
7191 | " return (__m256i)((__v4du)__a & (__v4du)__b);\n" |
7192 | "}\n" |
7193 | "\n" |
7194 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7195 | "_mm256_andnot_si256(__m256i __a, __m256i __b)\n" |
7196 | "{\n" |
7197 | " return (__m256i)(~(__v4du)__a & (__v4du)__b);\n" |
7198 | "}\n" |
7199 | "\n" |
7200 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7201 | "_mm256_avg_epu8(__m256i __a, __m256i __b)\n" |
7202 | "{\n" |
7203 | " typedef unsigned short __v32hu __attribute__((__vector_size__(64)));\n" |
7204 | " return (__m256i)__builtin_convertvector(\n" |
7205 | " ((__builtin_convertvector((__v32qu)__a, __v32hu) +\n" |
7206 | " __builtin_convertvector((__v32qu)__b, __v32hu)) + 1)\n" |
7207 | " >> 1, __v32qu);\n" |
7208 | "}\n" |
7209 | "\n" |
7210 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7211 | "_mm256_avg_epu16(__m256i __a, __m256i __b)\n" |
7212 | "{\n" |
7213 | " typedef unsigned int __v16su __attribute__((__vector_size__(64)));\n" |
7214 | " return (__m256i)__builtin_convertvector(\n" |
7215 | " ((__builtin_convertvector((__v16hu)__a, __v16su) +\n" |
7216 | " __builtin_convertvector((__v16hu)__b, __v16su)) + 1)\n" |
7217 | " >> 1, __v16hu);\n" |
7218 | "}\n" |
7219 | "\n" |
7220 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7221 | "_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)\n" |
7222 | "{\n" |
7223 | " return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,\n" |
7224 | " (__v32qi)__M);\n" |
7225 | "}\n" |
7226 | "\n" |
7227 | "#define _mm256_blend_epi16(V1, V2, M) \\\n" |
7228 | " (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \\\n" |
7229 | " (__v16hi)(__m256i)(V2), (int)(M))\n" |
7230 | "\n" |
7231 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7232 | "_mm256_cmpeq_epi8(__m256i __a, __m256i __b)\n" |
7233 | "{\n" |
7234 | " return (__m256i)((__v32qi)__a == (__v32qi)__b);\n" |
7235 | "}\n" |
7236 | "\n" |
7237 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7238 | "_mm256_cmpeq_epi16(__m256i __a, __m256i __b)\n" |
7239 | "{\n" |
7240 | " return (__m256i)((__v16hi)__a == (__v16hi)__b);\n" |
7241 | "}\n" |
7242 | "\n" |
7243 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7244 | "_mm256_cmpeq_epi32(__m256i __a, __m256i __b)\n" |
7245 | "{\n" |
7246 | " return (__m256i)((__v8si)__a == (__v8si)__b);\n" |
7247 | "}\n" |
7248 | "\n" |
7249 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7250 | "_mm256_cmpeq_epi64(__m256i __a, __m256i __b)\n" |
7251 | "{\n" |
7252 | " return (__m256i)((__v4di)__a == (__v4di)__b);\n" |
7253 | "}\n" |
7254 | "\n" |
7255 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7256 | "_mm256_cmpgt_epi8(__m256i __a, __m256i __b)\n" |
7257 | "{\n" |
7258 | " /* This function always performs a signed comparison, but __v32qi is a char\n" |
7259 | " which may be signed or unsigned, so use __v32qs. */\n" |
7260 | " return (__m256i)((__v32qs)__a > (__v32qs)__b);\n" |
7261 | "}\n" |
7262 | "\n" |
7263 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7264 | "_mm256_cmpgt_epi16(__m256i __a, __m256i __b)\n" |
7265 | "{\n" |
7266 | " return (__m256i)((__v16hi)__a > (__v16hi)__b);\n" |
7267 | "}\n" |
7268 | "\n" |
7269 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7270 | "_mm256_cmpgt_epi32(__m256i __a, __m256i __b)\n" |
7271 | "{\n" |
7272 | " return (__m256i)((__v8si)__a > (__v8si)__b);\n" |
7273 | "}\n" |
7274 | "\n" |
7275 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7276 | "_mm256_cmpgt_epi64(__m256i __a, __m256i __b)\n" |
7277 | "{\n" |
7278 | " return (__m256i)((__v4di)__a > (__v4di)__b);\n" |
7279 | "}\n" |
7280 | "\n" |
7281 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7282 | "_mm256_hadd_epi16(__m256i __a, __m256i __b)\n" |
7283 | "{\n" |
7284 | " return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);\n" |
7285 | "}\n" |
7286 | "\n" |
7287 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7288 | "_mm256_hadd_epi32(__m256i __a, __m256i __b)\n" |
7289 | "{\n" |
7290 | " return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);\n" |
7291 | "}\n" |
7292 | "\n" |
7293 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7294 | "_mm256_hadds_epi16(__m256i __a, __m256i __b)\n" |
7295 | "{\n" |
7296 | " return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);\n" |
7297 | "}\n" |
7298 | "\n" |
7299 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7300 | "_mm256_hsub_epi16(__m256i __a, __m256i __b)\n" |
7301 | "{\n" |
7302 | " return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);\n" |
7303 | "}\n" |
7304 | "\n" |
7305 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7306 | "_mm256_hsub_epi32(__m256i __a, __m256i __b)\n" |
7307 | "{\n" |
7308 | " return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);\n" |
7309 | "}\n" |
7310 | "\n" |
7311 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7312 | "_mm256_hsubs_epi16(__m256i __a, __m256i __b)\n" |
7313 | "{\n" |
7314 | " return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);\n" |
7315 | "}\n" |
7316 | "\n" |
7317 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7318 | "_mm256_maddubs_epi16(__m256i __a, __m256i __b)\n" |
7319 | "{\n" |
7320 | " return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);\n" |
7321 | "}\n" |
7322 | "\n" |
7323 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7324 | "_mm256_madd_epi16(__m256i __a, __m256i __b)\n" |
7325 | "{\n" |
7326 | " return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);\n" |
7327 | "}\n" |
7328 | "\n" |
7329 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7330 | "_mm256_max_epi8(__m256i __a, __m256i __b)\n" |
7331 | "{\n" |
7332 | " return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);\n" |
7333 | "}\n" |
7334 | "\n" |
7335 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7336 | "_mm256_max_epi16(__m256i __a, __m256i __b)\n" |
7337 | "{\n" |
7338 | " return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);\n" |
7339 | "}\n" |
7340 | "\n" |
7341 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7342 | "_mm256_max_epi32(__m256i __a, __m256i __b)\n" |
7343 | "{\n" |
7344 | " return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);\n" |
7345 | "}\n" |
7346 | "\n" |
7347 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7348 | "_mm256_max_epu8(__m256i __a, __m256i __b)\n" |
7349 | "{\n" |
7350 | " return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);\n" |
7351 | "}\n" |
7352 | "\n" |
7353 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7354 | "_mm256_max_epu16(__m256i __a, __m256i __b)\n" |
7355 | "{\n" |
7356 | " return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);\n" |
7357 | "}\n" |
7358 | "\n" |
7359 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7360 | "_mm256_max_epu32(__m256i __a, __m256i __b)\n" |
7361 | "{\n" |
7362 | " return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);\n" |
7363 | "}\n" |
7364 | "\n" |
7365 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7366 | "_mm256_min_epi8(__m256i __a, __m256i __b)\n" |
7367 | "{\n" |
7368 | " return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);\n" |
7369 | "}\n" |
7370 | "\n" |
7371 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7372 | "_mm256_min_epi16(__m256i __a, __m256i __b)\n" |
7373 | "{\n" |
7374 | " return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);\n" |
7375 | "}\n" |
7376 | "\n" |
7377 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7378 | "_mm256_min_epi32(__m256i __a, __m256i __b)\n" |
7379 | "{\n" |
7380 | " return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);\n" |
7381 | "}\n" |
7382 | "\n" |
7383 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7384 | "_mm256_min_epu8(__m256i __a, __m256i __b)\n" |
7385 | "{\n" |
7386 | " return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);\n" |
7387 | "}\n" |
7388 | "\n" |
7389 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7390 | "_mm256_min_epu16(__m256i __a, __m256i __b)\n" |
7391 | "{\n" |
7392 | " return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);\n" |
7393 | "}\n" |
7394 | "\n" |
7395 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7396 | "_mm256_min_epu32(__m256i __a, __m256i __b)\n" |
7397 | "{\n" |
7398 | " return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);\n" |
7399 | "}\n" |
7400 | "\n" |
7401 | "static __inline__ int __DEFAULT_FN_ATTRS256\n" |
7402 | "_mm256_movemask_epi8(__m256i __a)\n" |
7403 | "{\n" |
7404 | " return __builtin_ia32_pmovmskb256((__v32qi)__a);\n" |
7405 | "}\n" |
7406 | "\n" |
7407 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7408 | "_mm256_cvtepi8_epi16(__m128i __V)\n" |
7409 | "{\n" |
7410 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
7411 | " which may be signed or unsigned, so use __v16qs. */\n" |
7412 | " return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);\n" |
7413 | "}\n" |
7414 | "\n" |
7415 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7416 | "_mm256_cvtepi8_epi32(__m128i __V)\n" |
7417 | "{\n" |
7418 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
7419 | " which may be signed or unsigned, so use __v16qs. */\n" |
7420 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n" |
7421 | "}\n" |
7422 | "\n" |
7423 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7424 | "_mm256_cvtepi8_epi64(__m128i __V)\n" |
7425 | "{\n" |
7426 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
7427 | " which may be signed or unsigned, so use __v16qs. */\n" |
7428 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);\n" |
7429 | "}\n" |
7430 | "\n" |
7431 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7432 | "_mm256_cvtepi16_epi32(__m128i __V)\n" |
7433 | "{\n" |
7434 | " return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);\n" |
7435 | "}\n" |
7436 | "\n" |
7437 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7438 | "_mm256_cvtepi16_epi64(__m128i __V)\n" |
7439 | "{\n" |
7440 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);\n" |
7441 | "}\n" |
7442 | "\n" |
7443 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7444 | "_mm256_cvtepi32_epi64(__m128i __V)\n" |
7445 | "{\n" |
7446 | " return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);\n" |
7447 | "}\n" |
7448 | "\n" |
7449 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7450 | "_mm256_cvtepu8_epi16(__m128i __V)\n" |
7451 | "{\n" |
7452 | " return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);\n" |
7453 | "}\n" |
7454 | "\n" |
7455 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7456 | "_mm256_cvtepu8_epi32(__m128i __V)\n" |
7457 | "{\n" |
7458 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n" |
7459 | "}\n" |
7460 | "\n" |
7461 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7462 | "_mm256_cvtepu8_epi64(__m128i __V)\n" |
7463 | "{\n" |
7464 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);\n" |
7465 | "}\n" |
7466 | "\n" |
7467 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7468 | "_mm256_cvtepu16_epi32(__m128i __V)\n" |
7469 | "{\n" |
7470 | " return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);\n" |
7471 | "}\n" |
7472 | "\n" |
7473 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7474 | "_mm256_cvtepu16_epi64(__m128i __V)\n" |
7475 | "{\n" |
7476 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);\n" |
7477 | "}\n" |
7478 | "\n" |
7479 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7480 | "_mm256_cvtepu32_epi64(__m128i __V)\n" |
7481 | "{\n" |
7482 | " return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);\n" |
7483 | "}\n" |
7484 | "\n" |
7485 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7486 | "_mm256_mul_epi32(__m256i __a, __m256i __b)\n" |
7487 | "{\n" |
7488 | " return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);\n" |
7489 | "}\n" |
7490 | "\n" |
7491 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7492 | "_mm256_mulhrs_epi16(__m256i __a, __m256i __b)\n" |
7493 | "{\n" |
7494 | " return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);\n" |
7495 | "}\n" |
7496 | "\n" |
7497 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7498 | "_mm256_mulhi_epu16(__m256i __a, __m256i __b)\n" |
7499 | "{\n" |
7500 | " return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);\n" |
7501 | "}\n" |
7502 | "\n" |
7503 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7504 | "_mm256_mulhi_epi16(__m256i __a, __m256i __b)\n" |
7505 | "{\n" |
7506 | " return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);\n" |
7507 | "}\n" |
7508 | "\n" |
7509 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7510 | "_mm256_mullo_epi16(__m256i __a, __m256i __b)\n" |
7511 | "{\n" |
7512 | " return (__m256i)((__v16hu)__a * (__v16hu)__b);\n" |
7513 | "}\n" |
7514 | "\n" |
7515 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7516 | "_mm256_mullo_epi32 (__m256i __a, __m256i __b)\n" |
7517 | "{\n" |
7518 | " return (__m256i)((__v8su)__a * (__v8su)__b);\n" |
7519 | "}\n" |
7520 | "\n" |
7521 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7522 | "_mm256_mul_epu32(__m256i __a, __m256i __b)\n" |
7523 | "{\n" |
7524 | " return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);\n" |
7525 | "}\n" |
7526 | "\n" |
7527 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7528 | "_mm256_or_si256(__m256i __a, __m256i __b)\n" |
7529 | "{\n" |
7530 | " return (__m256i)((__v4du)__a | (__v4du)__b);\n" |
7531 | "}\n" |
7532 | "\n" |
7533 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7534 | "_mm256_sad_epu8(__m256i __a, __m256i __b)\n" |
7535 | "{\n" |
7536 | " return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);\n" |
7537 | "}\n" |
7538 | "\n" |
7539 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7540 | "_mm256_shuffle_epi8(__m256i __a, __m256i __b)\n" |
7541 | "{\n" |
7542 | " return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);\n" |
7543 | "}\n" |
7544 | "\n" |
7545 | "#define _mm256_shuffle_epi32(a, imm) \\\n" |
7546 | " (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))\n" |
7547 | "\n" |
7548 | "#define _mm256_shufflehi_epi16(a, imm) \\\n" |
7549 | " (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))\n" |
7550 | "\n" |
7551 | "#define _mm256_shufflelo_epi16(a, imm) \\\n" |
7552 | " (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))\n" |
7553 | "\n" |
7554 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7555 | "_mm256_sign_epi8(__m256i __a, __m256i __b)\n" |
7556 | "{\n" |
7557 | " return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);\n" |
7558 | "}\n" |
7559 | "\n" |
7560 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7561 | "_mm256_sign_epi16(__m256i __a, __m256i __b)\n" |
7562 | "{\n" |
7563 | " return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);\n" |
7564 | "}\n" |
7565 | "\n" |
7566 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7567 | "_mm256_sign_epi32(__m256i __a, __m256i __b)\n" |
7568 | "{\n" |
7569 | " return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);\n" |
7570 | "}\n" |
7571 | "\n" |
7572 | "#define _mm256_slli_si256(a, imm) \\\n" |
7573 | " (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n" |
7574 | "\n" |
7575 | "#define _mm256_bslli_epi128(a, imm) \\\n" |
7576 | " (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n" |
7577 | "\n" |
7578 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7579 | "_mm256_slli_epi16(__m256i __a, int __count)\n" |
7580 | "{\n" |
7581 | " return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);\n" |
7582 | "}\n" |
7583 | "\n" |
7584 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7585 | "_mm256_sll_epi16(__m256i __a, __m128i __count)\n" |
7586 | "{\n" |
7587 | " return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);\n" |
7588 | "}\n" |
7589 | "\n" |
7590 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7591 | "_mm256_slli_epi32(__m256i __a, int __count)\n" |
7592 | "{\n" |
7593 | " return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);\n" |
7594 | "}\n" |
7595 | "\n" |
7596 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7597 | "_mm256_sll_epi32(__m256i __a, __m128i __count)\n" |
7598 | "{\n" |
7599 | " return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);\n" |
7600 | "}\n" |
7601 | "\n" |
7602 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7603 | "_mm256_slli_epi64(__m256i __a, int __count)\n" |
7604 | "{\n" |
7605 | " return __builtin_ia32_psllqi256((__v4di)__a, __count);\n" |
7606 | "}\n" |
7607 | "\n" |
7608 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7609 | "_mm256_sll_epi64(__m256i __a, __m128i __count)\n" |
7610 | "{\n" |
7611 | " return __builtin_ia32_psllq256((__v4di)__a, __count);\n" |
7612 | "}\n" |
7613 | "\n" |
7614 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7615 | "_mm256_srai_epi16(__m256i __a, int __count)\n" |
7616 | "{\n" |
7617 | " return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);\n" |
7618 | "}\n" |
7619 | "\n" |
7620 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7621 | "_mm256_sra_epi16(__m256i __a, __m128i __count)\n" |
7622 | "{\n" |
7623 | " return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);\n" |
7624 | "}\n" |
7625 | "\n" |
7626 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7627 | "_mm256_srai_epi32(__m256i __a, int __count)\n" |
7628 | "{\n" |
7629 | " return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);\n" |
7630 | "}\n" |
7631 | "\n" |
7632 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7633 | "_mm256_sra_epi32(__m256i __a, __m128i __count)\n" |
7634 | "{\n" |
7635 | " return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);\n" |
7636 | "}\n" |
7637 | "\n" |
7638 | "#define _mm256_srli_si256(a, imm) \\\n" |
7639 | " (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n" |
7640 | "\n" |
7641 | "#define _mm256_bsrli_epi128(a, imm) \\\n" |
7642 | " (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n" |
7643 | "\n" |
7644 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7645 | "_mm256_srli_epi16(__m256i __a, int __count)\n" |
7646 | "{\n" |
7647 | " return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);\n" |
7648 | "}\n" |
7649 | "\n" |
7650 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7651 | "_mm256_srl_epi16(__m256i __a, __m128i __count)\n" |
7652 | "{\n" |
7653 | " return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);\n" |
7654 | "}\n" |
7655 | "\n" |
7656 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7657 | "_mm256_srli_epi32(__m256i __a, int __count)\n" |
7658 | "{\n" |
7659 | " return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);\n" |
7660 | "}\n" |
7661 | "\n" |
7662 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7663 | "_mm256_srl_epi32(__m256i __a, __m128i __count)\n" |
7664 | "{\n" |
7665 | " return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);\n" |
7666 | "}\n" |
7667 | "\n" |
7668 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7669 | "_mm256_srli_epi64(__m256i __a, int __count)\n" |
7670 | "{\n" |
7671 | " return __builtin_ia32_psrlqi256((__v4di)__a, __count);\n" |
7672 | "}\n" |
7673 | "\n" |
7674 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7675 | "_mm256_srl_epi64(__m256i __a, __m128i __count)\n" |
7676 | "{\n" |
7677 | " return __builtin_ia32_psrlq256((__v4di)__a, __count);\n" |
7678 | "}\n" |
7679 | "\n" |
7680 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7681 | "_mm256_sub_epi8(__m256i __a, __m256i __b)\n" |
7682 | "{\n" |
7683 | " return (__m256i)((__v32qu)__a - (__v32qu)__b);\n" |
7684 | "}\n" |
7685 | "\n" |
7686 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7687 | "_mm256_sub_epi16(__m256i __a, __m256i __b)\n" |
7688 | "{\n" |
7689 | " return (__m256i)((__v16hu)__a - (__v16hu)__b);\n" |
7690 | "}\n" |
7691 | "\n" |
7692 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7693 | "_mm256_sub_epi32(__m256i __a, __m256i __b)\n" |
7694 | "{\n" |
7695 | " return (__m256i)((__v8su)__a - (__v8su)__b);\n" |
7696 | "}\n" |
7697 | "\n" |
7698 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7699 | "_mm256_sub_epi64(__m256i __a, __m256i __b)\n" |
7700 | "{\n" |
7701 | " return (__m256i)((__v4du)__a - (__v4du)__b);\n" |
7702 | "}\n" |
7703 | "\n" |
7704 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7705 | "_mm256_subs_epi8(__m256i __a, __m256i __b)\n" |
7706 | "{\n" |
7707 | " return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);\n" |
7708 | "}\n" |
7709 | "\n" |
7710 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7711 | "_mm256_subs_epi16(__m256i __a, __m256i __b)\n" |
7712 | "{\n" |
7713 | " return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);\n" |
7714 | "}\n" |
7715 | "\n" |
7716 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7717 | "_mm256_subs_epu8(__m256i __a, __m256i __b)\n" |
7718 | "{\n" |
7719 | " return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);\n" |
7720 | "}\n" |
7721 | "\n" |
7722 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7723 | "_mm256_subs_epu16(__m256i __a, __m256i __b)\n" |
7724 | "{\n" |
7725 | " return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);\n" |
7726 | "}\n" |
7727 | "\n" |
7728 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7729 | "_mm256_unpackhi_epi8(__m256i __a, __m256i __b)\n" |
7730 | "{\n" |
7731 | " return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);\n" |
7732 | "}\n" |
7733 | "\n" |
7734 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7735 | "_mm256_unpackhi_epi16(__m256i __a, __m256i __b)\n" |
7736 | "{\n" |
7737 | " return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n" |
7738 | "}\n" |
7739 | "\n" |
7740 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7741 | "_mm256_unpackhi_epi32(__m256i __a, __m256i __b)\n" |
7742 | "{\n" |
7743 | " return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);\n" |
7744 | "}\n" |
7745 | "\n" |
7746 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7747 | "_mm256_unpackhi_epi64(__m256i __a, __m256i __b)\n" |
7748 | "{\n" |
7749 | " return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);\n" |
7750 | "}\n" |
7751 | "\n" |
7752 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7753 | "_mm256_unpacklo_epi8(__m256i __a, __m256i __b)\n" |
7754 | "{\n" |
7755 | " return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);\n" |
7756 | "}\n" |
7757 | "\n" |
7758 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7759 | "_mm256_unpacklo_epi16(__m256i __a, __m256i __b)\n" |
7760 | "{\n" |
7761 | " return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);\n" |
7762 | "}\n" |
7763 | "\n" |
7764 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7765 | "_mm256_unpacklo_epi32(__m256i __a, __m256i __b)\n" |
7766 | "{\n" |
7767 | " return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);\n" |
7768 | "}\n" |
7769 | "\n" |
7770 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7771 | "_mm256_unpacklo_epi64(__m256i __a, __m256i __b)\n" |
7772 | "{\n" |
7773 | " return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);\n" |
7774 | "}\n" |
7775 | "\n" |
7776 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7777 | "_mm256_xor_si256(__m256i __a, __m256i __b)\n" |
7778 | "{\n" |
7779 | " return (__m256i)((__v4du)__a ^ (__v4du)__b);\n" |
7780 | "}\n" |
7781 | "\n" |
7782 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7783 | "_mm256_stream_load_si256(__m256i const *__V)\n" |
7784 | "{\n" |
7785 | " typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n" |
7786 | " return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);\n" |
7787 | "}\n" |
7788 | "\n" |
7789 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
7790 | "_mm_broadcastss_ps(__m128 __X)\n" |
7791 | "{\n" |
7792 | " return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);\n" |
7793 | "}\n" |
7794 | "\n" |
7795 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
7796 | "_mm_broadcastsd_pd(__m128d __a)\n" |
7797 | "{\n" |
7798 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n" |
7799 | "}\n" |
7800 | "\n" |
7801 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
7802 | "_mm256_broadcastss_ps(__m128 __X)\n" |
7803 | "{\n" |
7804 | " return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
7805 | "}\n" |
7806 | "\n" |
7807 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
7808 | "_mm256_broadcastsd_pd(__m128d __X)\n" |
7809 | "{\n" |
7810 | " return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);\n" |
7811 | "}\n" |
7812 | "\n" |
7813 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7814 | "_mm256_broadcastsi128_si256(__m128i __X)\n" |
7815 | "{\n" |
7816 | " return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1);\n" |
7817 | "}\n" |
7818 | "\n" |
7819 | "#define _mm_blend_epi32(V1, V2, M) \\\n" |
7820 | " (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \\\n" |
7821 | " (__v4si)(__m128i)(V2), (int)(M))\n" |
7822 | "\n" |
7823 | "#define _mm256_blend_epi32(V1, V2, M) \\\n" |
7824 | " (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \\\n" |
7825 | " (__v8si)(__m256i)(V2), (int)(M))\n" |
7826 | "\n" |
7827 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7828 | "_mm256_broadcastb_epi8(__m128i __X)\n" |
7829 | "{\n" |
7830 | " return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
7831 | "}\n" |
7832 | "\n" |
7833 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7834 | "_mm256_broadcastw_epi16(__m128i __X)\n" |
7835 | "{\n" |
7836 | " return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
7837 | "}\n" |
7838 | "\n" |
7839 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7840 | "_mm256_broadcastd_epi32(__m128i __X)\n" |
7841 | "{\n" |
7842 | " return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
7843 | "}\n" |
7844 | "\n" |
7845 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7846 | "_mm256_broadcastq_epi64(__m128i __X)\n" |
7847 | "{\n" |
7848 | " return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);\n" |
7849 | "}\n" |
7850 | "\n" |
7851 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7852 | "_mm_broadcastb_epi8(__m128i __X)\n" |
7853 | "{\n" |
7854 | " return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
7855 | "}\n" |
7856 | "\n" |
7857 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7858 | "_mm_broadcastw_epi16(__m128i __X)\n" |
7859 | "{\n" |
7860 | " return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
7861 | "}\n" |
7862 | "\n" |
7863 | "\n" |
7864 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7865 | "_mm_broadcastd_epi32(__m128i __X)\n" |
7866 | "{\n" |
7867 | " return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);\n" |
7868 | "}\n" |
7869 | "\n" |
7870 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7871 | "_mm_broadcastq_epi64(__m128i __X)\n" |
7872 | "{\n" |
7873 | " return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);\n" |
7874 | "}\n" |
7875 | "\n" |
7876 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7877 | "_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)\n" |
7878 | "{\n" |
7879 | " return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);\n" |
7880 | "}\n" |
7881 | "\n" |
7882 | "#define _mm256_permute4x64_pd(V, M) \\\n" |
7883 | " (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))\n" |
7884 | "\n" |
7885 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
7886 | "_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)\n" |
7887 | "{\n" |
7888 | " return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);\n" |
7889 | "}\n" |
7890 | "\n" |
7891 | "#define _mm256_permute4x64_epi64(V, M) \\\n" |
7892 | " (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))\n" |
7893 | "\n" |
7894 | "#define _mm256_permute2x128_si256(V1, V2, M) \\\n" |
7895 | " (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))\n" |
7896 | "\n" |
7897 | "#define _mm256_extracti128_si256(V, M) \\\n" |
7898 | " (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))\n" |
7899 | "\n" |
7900 | "#define _mm256_inserti128_si256(V1, V2, M) \\\n" |
7901 | " (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \\\n" |
7902 | " (__v2di)(__m128i)(V2), (int)(M))\n" |
7903 | "\n" |
7904 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7905 | "_mm256_maskload_epi32(int const *__X, __m256i __M)\n" |
7906 | "{\n" |
7907 | " return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);\n" |
7908 | "}\n" |
7909 | "\n" |
7910 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7911 | "_mm256_maskload_epi64(long long const *__X, __m256i __M)\n" |
7912 | "{\n" |
7913 | " return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M);\n" |
7914 | "}\n" |
7915 | "\n" |
7916 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7917 | "_mm_maskload_epi32(int const *__X, __m128i __M)\n" |
7918 | "{\n" |
7919 | " return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);\n" |
7920 | "}\n" |
7921 | "\n" |
7922 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7923 | "_mm_maskload_epi64(long long const *__X, __m128i __M)\n" |
7924 | "{\n" |
7925 | " return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);\n" |
7926 | "}\n" |
7927 | "\n" |
7928 | "static __inline__ void __DEFAULT_FN_ATTRS256\n" |
7929 | "_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)\n" |
7930 | "{\n" |
7931 | " __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);\n" |
7932 | "}\n" |
7933 | "\n" |
7934 | "static __inline__ void __DEFAULT_FN_ATTRS256\n" |
7935 | "_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)\n" |
7936 | "{\n" |
7937 | " __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y);\n" |
7938 | "}\n" |
7939 | "\n" |
7940 | "static __inline__ void __DEFAULT_FN_ATTRS128\n" |
7941 | "_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)\n" |
7942 | "{\n" |
7943 | " __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);\n" |
7944 | "}\n" |
7945 | "\n" |
7946 | "static __inline__ void __DEFAULT_FN_ATTRS128\n" |
7947 | "_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)\n" |
7948 | "{\n" |
7949 | " __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y);\n" |
7950 | "}\n" |
7951 | "\n" |
7952 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7953 | "_mm256_sllv_epi32(__m256i __X, __m256i __Y)\n" |
7954 | "{\n" |
7955 | " return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);\n" |
7956 | "}\n" |
7957 | "\n" |
7958 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7959 | "_mm_sllv_epi32(__m128i __X, __m128i __Y)\n" |
7960 | "{\n" |
7961 | " return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);\n" |
7962 | "}\n" |
7963 | "\n" |
7964 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7965 | "_mm256_sllv_epi64(__m256i __X, __m256i __Y)\n" |
7966 | "{\n" |
7967 | " return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);\n" |
7968 | "}\n" |
7969 | "\n" |
7970 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7971 | "_mm_sllv_epi64(__m128i __X, __m128i __Y)\n" |
7972 | "{\n" |
7973 | " return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);\n" |
7974 | "}\n" |
7975 | "\n" |
7976 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7977 | "_mm256_srav_epi32(__m256i __X, __m256i __Y)\n" |
7978 | "{\n" |
7979 | " return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);\n" |
7980 | "}\n" |
7981 | "\n" |
7982 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7983 | "_mm_srav_epi32(__m128i __X, __m128i __Y)\n" |
7984 | "{\n" |
7985 | " return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);\n" |
7986 | "}\n" |
7987 | "\n" |
7988 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
7989 | "_mm256_srlv_epi32(__m256i __X, __m256i __Y)\n" |
7990 | "{\n" |
7991 | " return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);\n" |
7992 | "}\n" |
7993 | "\n" |
7994 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
7995 | "_mm_srlv_epi32(__m128i __X, __m128i __Y)\n" |
7996 | "{\n" |
7997 | " return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);\n" |
7998 | "}\n" |
7999 | "\n" |
8000 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
8001 | "_mm256_srlv_epi64(__m256i __X, __m256i __Y)\n" |
8002 | "{\n" |
8003 | " return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);\n" |
8004 | "}\n" |
8005 | "\n" |
8006 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
8007 | "_mm_srlv_epi64(__m128i __X, __m128i __Y)\n" |
8008 | "{\n" |
8009 | " return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);\n" |
8010 | "}\n" |
8011 | "\n" |
8012 | "#define _mm_mask_i32gather_pd(a, m, i, mask, s) \\\n" |
8013 | " (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \\\n" |
8014 | " (double const *)(m), \\\n" |
8015 | " (__v4si)(__m128i)(i), \\\n" |
8016 | " (__v2df)(__m128d)(mask), (s))\n" |
8017 | "\n" |
8018 | "#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \\\n" |
8019 | " (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \\\n" |
8020 | " (double const *)(m), \\\n" |
8021 | " (__v4si)(__m128i)(i), \\\n" |
8022 | " (__v4df)(__m256d)(mask), (s))\n" |
8023 | "\n" |
8024 | "#define _mm_mask_i64gather_pd(a, m, i, mask, s) \\\n" |
8025 | " (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \\\n" |
8026 | " (double const *)(m), \\\n" |
8027 | " (__v2di)(__m128i)(i), \\\n" |
8028 | " (__v2df)(__m128d)(mask), (s))\n" |
8029 | "\n" |
8030 | "#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \\\n" |
8031 | " (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \\\n" |
8032 | " (double const *)(m), \\\n" |
8033 | " (__v4di)(__m256i)(i), \\\n" |
8034 | " (__v4df)(__m256d)(mask), (s))\n" |
8035 | "\n" |
8036 | "#define _mm_mask_i32gather_ps(a, m, i, mask, s) \\\n" |
8037 | " (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \\\n" |
8038 | " (float const *)(m), \\\n" |
8039 | " (__v4si)(__m128i)(i), \\\n" |
8040 | " (__v4sf)(__m128)(mask), (s))\n" |
8041 | "\n" |
8042 | "#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \\\n" |
8043 | " (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \\\n" |
8044 | " (float const *)(m), \\\n" |
8045 | " (__v8si)(__m256i)(i), \\\n" |
8046 | " (__v8sf)(__m256)(mask), (s))\n" |
8047 | "\n" |
8048 | "#define _mm_mask_i64gather_ps(a, m, i, mask, s) \\\n" |
8049 | " (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \\\n" |
8050 | " (float const *)(m), \\\n" |
8051 | " (__v2di)(__m128i)(i), \\\n" |
8052 | " (__v4sf)(__m128)(mask), (s))\n" |
8053 | "\n" |
8054 | "#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \\\n" |
8055 | " (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \\\n" |
8056 | " (float const *)(m), \\\n" |
8057 | " (__v4di)(__m256i)(i), \\\n" |
8058 | " (__v4sf)(__m128)(mask), (s))\n" |
8059 | "\n" |
8060 | "#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \\\n" |
8061 | " (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \\\n" |
8062 | " (int const *)(m), \\\n" |
8063 | " (__v4si)(__m128i)(i), \\\n" |
8064 | " (__v4si)(__m128i)(mask), (s))\n" |
8065 | "\n" |
8066 | "#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \\\n" |
8067 | " (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \\\n" |
8068 | " (int const *)(m), \\\n" |
8069 | " (__v8si)(__m256i)(i), \\\n" |
8070 | " (__v8si)(__m256i)(mask), (s))\n" |
8071 | "\n" |
8072 | "#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \\\n" |
8073 | " (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \\\n" |
8074 | " (int const *)(m), \\\n" |
8075 | " (__v2di)(__m128i)(i), \\\n" |
8076 | " (__v4si)(__m128i)(mask), (s))\n" |
8077 | "\n" |
8078 | "#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \\\n" |
8079 | " (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \\\n" |
8080 | " (int const *)(m), \\\n" |
8081 | " (__v4di)(__m256i)(i), \\\n" |
8082 | " (__v4si)(__m128i)(mask), (s))\n" |
8083 | "\n" |
8084 | "#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \\\n" |
8085 | " (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \\\n" |
8086 | " (long long const *)(m), \\\n" |
8087 | " (__v4si)(__m128i)(i), \\\n" |
8088 | " (__v2di)(__m128i)(mask), (s))\n" |
8089 | "\n" |
8090 | "#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \\\n" |
8091 | " (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \\\n" |
8092 | " (long long const *)(m), \\\n" |
8093 | " (__v4si)(__m128i)(i), \\\n" |
8094 | " (__v4di)(__m256i)(mask), (s))\n" |
8095 | "\n" |
8096 | "#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \\\n" |
8097 | " (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \\\n" |
8098 | " (long long const *)(m), \\\n" |
8099 | " (__v2di)(__m128i)(i), \\\n" |
8100 | " (__v2di)(__m128i)(mask), (s))\n" |
8101 | "\n" |
8102 | "#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \\\n" |
8103 | " (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \\\n" |
8104 | " (long long const *)(m), \\\n" |
8105 | " (__v4di)(__m256i)(i), \\\n" |
8106 | " (__v4di)(__m256i)(mask), (s))\n" |
8107 | "\n" |
8108 | "#define _mm_i32gather_pd(m, i, s) \\\n" |
8109 | " (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \\\n" |
8110 | " (double const *)(m), \\\n" |
8111 | " (__v4si)(__m128i)(i), \\\n" |
8112 | " (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n" |
8113 | " _mm_setzero_pd()), \\\n" |
8114 | " (s))\n" |
8115 | "\n" |
8116 | "#define _mm256_i32gather_pd(m, i, s) \\\n" |
8117 | " (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \\\n" |
8118 | " (double const *)(m), \\\n" |
8119 | " (__v4si)(__m128i)(i), \\\n" |
8120 | " (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n" |
8121 | " _mm256_setzero_pd(), \\\n" |
8122 | " _CMP_EQ_OQ), \\\n" |
8123 | " (s))\n" |
8124 | "\n" |
8125 | "#define _mm_i64gather_pd(m, i, s) \\\n" |
8126 | " (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \\\n" |
8127 | " (double const *)(m), \\\n" |
8128 | " (__v2di)(__m128i)(i), \\\n" |
8129 | " (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n" |
8130 | " _mm_setzero_pd()), \\\n" |
8131 | " (s))\n" |
8132 | "\n" |
8133 | "#define _mm256_i64gather_pd(m, i, s) \\\n" |
8134 | " (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \\\n" |
8135 | " (double const *)(m), \\\n" |
8136 | " (__v4di)(__m256i)(i), \\\n" |
8137 | " (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n" |
8138 | " _mm256_setzero_pd(), \\\n" |
8139 | " _CMP_EQ_OQ), \\\n" |
8140 | " (s))\n" |
8141 | "\n" |
8142 | "#define _mm_i32gather_ps(m, i, s) \\\n" |
8143 | " (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \\\n" |
8144 | " (float const *)(m), \\\n" |
8145 | " (__v4si)(__m128i)(i), \\\n" |
8146 | " (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n" |
8147 | " _mm_setzero_ps()), \\\n" |
8148 | " (s))\n" |
8149 | "\n" |
8150 | "#define _mm256_i32gather_ps(m, i, s) \\\n" |
8151 | " (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \\\n" |
8152 | " (float const *)(m), \\\n" |
8153 | " (__v8si)(__m256i)(i), \\\n" |
8154 | " (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \\\n" |
8155 | " _mm256_setzero_ps(), \\\n" |
8156 | " _CMP_EQ_OQ), \\\n" |
8157 | " (s))\n" |
8158 | "\n" |
8159 | "#define _mm_i64gather_ps(m, i, s) \\\n" |
8160 | " (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \\\n" |
8161 | " (float const *)(m), \\\n" |
8162 | " (__v2di)(__m128i)(i), \\\n" |
8163 | " (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n" |
8164 | " _mm_setzero_ps()), \\\n" |
8165 | " (s))\n" |
8166 | "\n" |
8167 | "#define _mm256_i64gather_ps(m, i, s) \\\n" |
8168 | " (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \\\n" |
8169 | " (float const *)(m), \\\n" |
8170 | " (__v4di)(__m256i)(i), \\\n" |
8171 | " (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n" |
8172 | " _mm_setzero_ps()), \\\n" |
8173 | " (s))\n" |
8174 | "\n" |
8175 | "#define _mm_i32gather_epi32(m, i, s) \\\n" |
8176 | " (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \\\n" |
8177 | " (int const *)(m), (__v4si)(__m128i)(i), \\\n" |
8178 | " (__v4si)_mm_set1_epi32(-1), (s))\n" |
8179 | "\n" |
8180 | "#define _mm256_i32gather_epi32(m, i, s) \\\n" |
8181 | " (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \\\n" |
8182 | " (int const *)(m), (__v8si)(__m256i)(i), \\\n" |
8183 | " (__v8si)_mm256_set1_epi32(-1), (s))\n" |
8184 | "\n" |
8185 | "#define _mm_i64gather_epi32(m, i, s) \\\n" |
8186 | " (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \\\n" |
8187 | " (int const *)(m), (__v2di)(__m128i)(i), \\\n" |
8188 | " (__v4si)_mm_set1_epi32(-1), (s))\n" |
8189 | "\n" |
8190 | "#define _mm256_i64gather_epi32(m, i, s) \\\n" |
8191 | " (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \\\n" |
8192 | " (int const *)(m), (__v4di)(__m256i)(i), \\\n" |
8193 | " (__v4si)_mm_set1_epi32(-1), (s))\n" |
8194 | "\n" |
8195 | "#define _mm_i32gather_epi64(m, i, s) \\\n" |
8196 | " (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \\\n" |
8197 | " (long long const *)(m), \\\n" |
8198 | " (__v4si)(__m128i)(i), \\\n" |
8199 | " (__v2di)_mm_set1_epi64x(-1), (s))\n" |
8200 | "\n" |
8201 | "#define _mm256_i32gather_epi64(m, i, s) \\\n" |
8202 | " (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \\\n" |
8203 | " (long long const *)(m), \\\n" |
8204 | " (__v4si)(__m128i)(i), \\\n" |
8205 | " (__v4di)_mm256_set1_epi64x(-1), (s))\n" |
8206 | "\n" |
8207 | "#define _mm_i64gather_epi64(m, i, s) \\\n" |
8208 | " (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \\\n" |
8209 | " (long long const *)(m), \\\n" |
8210 | " (__v2di)(__m128i)(i), \\\n" |
8211 | " (__v2di)_mm_set1_epi64x(-1), (s))\n" |
8212 | "\n" |
8213 | "#define _mm256_i64gather_epi64(m, i, s) \\\n" |
8214 | " (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \\\n" |
8215 | " (long long const *)(m), \\\n" |
8216 | " (__v4di)(__m256i)(i), \\\n" |
8217 | " (__v4di)_mm256_set1_epi64x(-1), (s))\n" |
8218 | "\n" |
8219 | "#undef __DEFAULT_FN_ATTRS256\n" |
8220 | "#undef __DEFAULT_FN_ATTRS128\n" |
8221 | "\n" |
8222 | "#endif /* __AVX2INTRIN_H */\n" |
8223 | "" } , |
8224 | { "/builtins/avxintrin.h" , "/*===---- avxintrin.h - AVX intrinsics -------------------------------------===\n" |
8225 | " *\n" |
8226 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
8227 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
8228 | " * in the Software without restriction, including without limitation the rights\n" |
8229 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
8230 | " * copies of the Software, and to permit persons to whom the Software is\n" |
8231 | " * furnished to do so, subject to the following conditions:\n" |
8232 | " *\n" |
8233 | " * The above copyright notice and this permission notice shall be included in\n" |
8234 | " * all copies or substantial portions of the Software.\n" |
8235 | " *\n" |
8236 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
8237 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
8238 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
8239 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
8240 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
8241 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
8242 | " * THE SOFTWARE.\n" |
8243 | " *\n" |
8244 | " *===-----------------------------------------------------------------------===\n" |
8245 | " */\n" |
8246 | "\n" |
8247 | "#ifndef __IMMINTRIN_H\n" |
8248 | "#error \"Never use <avxintrin.h> directly; include <immintrin.h> instead.\"\n" |
8249 | "#endif\n" |
8250 | "\n" |
8251 | "#ifndef __AVXINTRIN_H\n" |
8252 | "#define __AVXINTRIN_H\n" |
8253 | "\n" |
8254 | "typedef double __v4df __attribute__ ((__vector_size__ (32)));\n" |
8255 | "typedef float __v8sf __attribute__ ((__vector_size__ (32)));\n" |
8256 | "typedef long long __v4di __attribute__ ((__vector_size__ (32)));\n" |
8257 | "typedef int __v8si __attribute__ ((__vector_size__ (32)));\n" |
8258 | "typedef short __v16hi __attribute__ ((__vector_size__ (32)));\n" |
8259 | "typedef char __v32qi __attribute__ ((__vector_size__ (32)));\n" |
8260 | "\n" |
8261 | "/* Unsigned types */\n" |
8262 | "typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));\n" |
8263 | "typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n" |
8264 | "typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n" |
8265 | "typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));\n" |
8266 | "\n" |
8267 | "/* We need an explicitly signed variant for char. Note that this shouldn't\n" |
8268 | " * appear in the interface though. */\n" |
8269 | "typedef signed char __v32qs __attribute__((__vector_size__(32)));\n" |
8270 | "\n" |
8271 | "typedef float __m256 __attribute__ ((__vector_size__ (32)));\n" |
8272 | "typedef double __m256d __attribute__((__vector_size__(32)));\n" |
8273 | "typedef long long __m256i __attribute__((__vector_size__(32)));\n" |
8274 | "\n" |
8275 | "/* Define the default attributes for the functions in this file. */\n" |
8276 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(256)))\n" |
8277 | "#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(128)))\n" |
8278 | "\n" |
8279 | "/* Arithmetic */\n" |
8280 | "/// Adds two 256-bit vectors of [4 x double].\n" |
8281 | "///\n" |
8282 | "/// \\headerfile <x86intrin.h>\n" |
8283 | "///\n" |
8284 | "/// This intrinsic corresponds to the <c> VADDPD </c> instruction.\n" |
8285 | "///\n" |
8286 | "/// \\param __a\n" |
8287 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8288 | "/// \\param __b\n" |
8289 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8290 | "/// \\returns A 256-bit vector of [4 x double] containing the sums of both\n" |
8291 | "/// operands.\n" |
8292 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8293 | "_mm256_add_pd(__m256d __a, __m256d __b)\n" |
8294 | "{\n" |
8295 | " return (__m256d)((__v4df)__a+(__v4df)__b);\n" |
8296 | "}\n" |
8297 | "\n" |
8298 | "/// Adds two 256-bit vectors of [8 x float].\n" |
8299 | "///\n" |
8300 | "/// \\headerfile <x86intrin.h>\n" |
8301 | "///\n" |
8302 | "/// This intrinsic corresponds to the <c> VADDPS </c> instruction.\n" |
8303 | "///\n" |
8304 | "/// \\param __a\n" |
8305 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8306 | "/// \\param __b\n" |
8307 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8308 | "/// \\returns A 256-bit vector of [8 x float] containing the sums of both\n" |
8309 | "/// operands.\n" |
8310 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8311 | "_mm256_add_ps(__m256 __a, __m256 __b)\n" |
8312 | "{\n" |
8313 | " return (__m256)((__v8sf)__a+(__v8sf)__b);\n" |
8314 | "}\n" |
8315 | "\n" |
8316 | "/// Subtracts two 256-bit vectors of [4 x double].\n" |
8317 | "///\n" |
8318 | "/// \\headerfile <x86intrin.h>\n" |
8319 | "///\n" |
8320 | "/// This intrinsic corresponds to the <c> VSUBPD </c> instruction.\n" |
8321 | "///\n" |
8322 | "/// \\param __a\n" |
8323 | "/// A 256-bit vector of [4 x double] containing the minuend.\n" |
8324 | "/// \\param __b\n" |
8325 | "/// A 256-bit vector of [4 x double] containing the subtrahend.\n" |
8326 | "/// \\returns A 256-bit vector of [4 x double] containing the differences between\n" |
8327 | "/// both operands.\n" |
8328 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8329 | "_mm256_sub_pd(__m256d __a, __m256d __b)\n" |
8330 | "{\n" |
8331 | " return (__m256d)((__v4df)__a-(__v4df)__b);\n" |
8332 | "}\n" |
8333 | "\n" |
8334 | "/// Subtracts two 256-bit vectors of [8 x float].\n" |
8335 | "///\n" |
8336 | "/// \\headerfile <x86intrin.h>\n" |
8337 | "///\n" |
8338 | "/// This intrinsic corresponds to the <c> VSUBPS </c> instruction.\n" |
8339 | "///\n" |
8340 | "/// \\param __a\n" |
8341 | "/// A 256-bit vector of [8 x float] containing the minuend.\n" |
8342 | "/// \\param __b\n" |
8343 | "/// A 256-bit vector of [8 x float] containing the subtrahend.\n" |
8344 | "/// \\returns A 256-bit vector of [8 x float] containing the differences between\n" |
8345 | "/// both operands.\n" |
8346 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8347 | "_mm256_sub_ps(__m256 __a, __m256 __b)\n" |
8348 | "{\n" |
8349 | " return (__m256)((__v8sf)__a-(__v8sf)__b);\n" |
8350 | "}\n" |
8351 | "\n" |
8352 | "/// Adds the even-indexed values and subtracts the odd-indexed values of\n" |
8353 | "/// two 256-bit vectors of [4 x double].\n" |
8354 | "///\n" |
8355 | "/// \\headerfile <x86intrin.h>\n" |
8356 | "///\n" |
8357 | "/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n" |
8358 | "///\n" |
8359 | "/// \\param __a\n" |
8360 | "/// A 256-bit vector of [4 x double] containing the left source operand.\n" |
8361 | "/// \\param __b\n" |
8362 | "/// A 256-bit vector of [4 x double] containing the right source operand.\n" |
8363 | "/// \\returns A 256-bit vector of [4 x double] containing the alternating sums\n" |
8364 | "/// and differences between both operands.\n" |
8365 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8366 | "_mm256_addsub_pd(__m256d __a, __m256d __b)\n" |
8367 | "{\n" |
8368 | " return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);\n" |
8369 | "}\n" |
8370 | "\n" |
8371 | "/// Adds the even-indexed values and subtracts the odd-indexed values of\n" |
8372 | "/// two 256-bit vectors of [8 x float].\n" |
8373 | "///\n" |
8374 | "/// \\headerfile <x86intrin.h>\n" |
8375 | "///\n" |
8376 | "/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n" |
8377 | "///\n" |
8378 | "/// \\param __a\n" |
8379 | "/// A 256-bit vector of [8 x float] containing the left source operand.\n" |
8380 | "/// \\param __b\n" |
8381 | "/// A 256-bit vector of [8 x float] containing the right source operand.\n" |
8382 | "/// \\returns A 256-bit vector of [8 x float] containing the alternating sums and\n" |
8383 | "/// differences between both operands.\n" |
8384 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8385 | "_mm256_addsub_ps(__m256 __a, __m256 __b)\n" |
8386 | "{\n" |
8387 | " return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);\n" |
8388 | "}\n" |
8389 | "\n" |
8390 | "/// Divides two 256-bit vectors of [4 x double].\n" |
8391 | "///\n" |
8392 | "/// \\headerfile <x86intrin.h>\n" |
8393 | "///\n" |
8394 | "/// This intrinsic corresponds to the <c> VDIVPD </c> instruction.\n" |
8395 | "///\n" |
8396 | "/// \\param __a\n" |
8397 | "/// A 256-bit vector of [4 x double] containing the dividend.\n" |
8398 | "/// \\param __b\n" |
8399 | "/// A 256-bit vector of [4 x double] containing the divisor.\n" |
8400 | "/// \\returns A 256-bit vector of [4 x double] containing the quotients of both\n" |
8401 | "/// operands.\n" |
8402 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8403 | "_mm256_div_pd(__m256d __a, __m256d __b)\n" |
8404 | "{\n" |
8405 | " return (__m256d)((__v4df)__a/(__v4df)__b);\n" |
8406 | "}\n" |
8407 | "\n" |
8408 | "/// Divides two 256-bit vectors of [8 x float].\n" |
8409 | "///\n" |
8410 | "/// \\headerfile <x86intrin.h>\n" |
8411 | "///\n" |
8412 | "/// This intrinsic corresponds to the <c> VDIVPS </c> instruction.\n" |
8413 | "///\n" |
8414 | "/// \\param __a\n" |
8415 | "/// A 256-bit vector of [8 x float] containing the dividend.\n" |
8416 | "/// \\param __b\n" |
8417 | "/// A 256-bit vector of [8 x float] containing the divisor.\n" |
8418 | "/// \\returns A 256-bit vector of [8 x float] containing the quotients of both\n" |
8419 | "/// operands.\n" |
8420 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8421 | "_mm256_div_ps(__m256 __a, __m256 __b)\n" |
8422 | "{\n" |
8423 | " return (__m256)((__v8sf)__a/(__v8sf)__b);\n" |
8424 | "}\n" |
8425 | "\n" |
8426 | "/// Compares two 256-bit vectors of [4 x double] and returns the greater\n" |
8427 | "/// of each pair of values.\n" |
8428 | "///\n" |
8429 | "/// \\headerfile <x86intrin.h>\n" |
8430 | "///\n" |
8431 | "/// This intrinsic corresponds to the <c> VMAXPD </c> instruction.\n" |
8432 | "///\n" |
8433 | "/// \\param __a\n" |
8434 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
8435 | "/// \\param __b\n" |
8436 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
8437 | "/// \\returns A 256-bit vector of [4 x double] containing the maximum values\n" |
8438 | "/// between both operands.\n" |
8439 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8440 | "_mm256_max_pd(__m256d __a, __m256d __b)\n" |
8441 | "{\n" |
8442 | " return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);\n" |
8443 | "}\n" |
8444 | "\n" |
8445 | "/// Compares two 256-bit vectors of [8 x float] and returns the greater\n" |
8446 | "/// of each pair of values.\n" |
8447 | "///\n" |
8448 | "/// \\headerfile <x86intrin.h>\n" |
8449 | "///\n" |
8450 | "/// This intrinsic corresponds to the <c> VMAXPS </c> instruction.\n" |
8451 | "///\n" |
8452 | "/// \\param __a\n" |
8453 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
8454 | "/// \\param __b\n" |
8455 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
8456 | "/// \\returns A 256-bit vector of [8 x float] containing the maximum values\n" |
8457 | "/// between both operands.\n" |
8458 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8459 | "_mm256_max_ps(__m256 __a, __m256 __b)\n" |
8460 | "{\n" |
8461 | " return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);\n" |
8462 | "}\n" |
8463 | "\n" |
8464 | "/// Compares two 256-bit vectors of [4 x double] and returns the lesser\n" |
8465 | "/// of each pair of values.\n" |
8466 | "///\n" |
8467 | "/// \\headerfile <x86intrin.h>\n" |
8468 | "///\n" |
8469 | "/// This intrinsic corresponds to the <c> VMINPD </c> instruction.\n" |
8470 | "///\n" |
8471 | "/// \\param __a\n" |
8472 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
8473 | "/// \\param __b\n" |
8474 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
8475 | "/// \\returns A 256-bit vector of [4 x double] containing the minimum values\n" |
8476 | "/// between both operands.\n" |
8477 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8478 | "_mm256_min_pd(__m256d __a, __m256d __b)\n" |
8479 | "{\n" |
8480 | " return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);\n" |
8481 | "}\n" |
8482 | "\n" |
8483 | "/// Compares two 256-bit vectors of [8 x float] and returns the lesser\n" |
8484 | "/// of each pair of values.\n" |
8485 | "///\n" |
8486 | "/// \\headerfile <x86intrin.h>\n" |
8487 | "///\n" |
8488 | "/// This intrinsic corresponds to the <c> VMINPS </c> instruction.\n" |
8489 | "///\n" |
8490 | "/// \\param __a\n" |
8491 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
8492 | "/// \\param __b\n" |
8493 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
8494 | "/// \\returns A 256-bit vector of [8 x float] containing the minimum values\n" |
8495 | "/// between both operands.\n" |
8496 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8497 | "_mm256_min_ps(__m256 __a, __m256 __b)\n" |
8498 | "{\n" |
8499 | " return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);\n" |
8500 | "}\n" |
8501 | "\n" |
8502 | "/// Multiplies two 256-bit vectors of [4 x double].\n" |
8503 | "///\n" |
8504 | "/// \\headerfile <x86intrin.h>\n" |
8505 | "///\n" |
8506 | "/// This intrinsic corresponds to the <c> VMULPD </c> instruction.\n" |
8507 | "///\n" |
8508 | "/// \\param __a\n" |
8509 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
8510 | "/// \\param __b\n" |
8511 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
8512 | "/// \\returns A 256-bit vector of [4 x double] containing the products of both\n" |
8513 | "/// operands.\n" |
8514 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8515 | "_mm256_mul_pd(__m256d __a, __m256d __b)\n" |
8516 | "{\n" |
8517 | " return (__m256d)((__v4df)__a * (__v4df)__b);\n" |
8518 | "}\n" |
8519 | "\n" |
8520 | "/// Multiplies two 256-bit vectors of [8 x float].\n" |
8521 | "///\n" |
8522 | "/// \\headerfile <x86intrin.h>\n" |
8523 | "///\n" |
8524 | "/// This intrinsic corresponds to the <c> VMULPS </c> instruction.\n" |
8525 | "///\n" |
8526 | "/// \\param __a\n" |
8527 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
8528 | "/// \\param __b\n" |
8529 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
8530 | "/// \\returns A 256-bit vector of [8 x float] containing the products of both\n" |
8531 | "/// operands.\n" |
8532 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8533 | "_mm256_mul_ps(__m256 __a, __m256 __b)\n" |
8534 | "{\n" |
8535 | " return (__m256)((__v8sf)__a * (__v8sf)__b);\n" |
8536 | "}\n" |
8537 | "\n" |
8538 | "/// Calculates the square roots of the values in a 256-bit vector of\n" |
8539 | "/// [4 x double].\n" |
8540 | "///\n" |
8541 | "/// \\headerfile <x86intrin.h>\n" |
8542 | "///\n" |
8543 | "/// This intrinsic corresponds to the <c> VSQRTPD </c> instruction.\n" |
8544 | "///\n" |
8545 | "/// \\param __a\n" |
8546 | "/// A 256-bit vector of [4 x double].\n" |
8547 | "/// \\returns A 256-bit vector of [4 x double] containing the square roots of the\n" |
8548 | "/// values in the operand.\n" |
8549 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8550 | "_mm256_sqrt_pd(__m256d __a)\n" |
8551 | "{\n" |
8552 | " return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);\n" |
8553 | "}\n" |
8554 | "\n" |
8555 | "/// Calculates the square roots of the values in a 256-bit vector of\n" |
8556 | "/// [8 x float].\n" |
8557 | "///\n" |
8558 | "/// \\headerfile <x86intrin.h>\n" |
8559 | "///\n" |
8560 | "/// This intrinsic corresponds to the <c> VSQRTPS </c> instruction.\n" |
8561 | "///\n" |
8562 | "/// \\param __a\n" |
8563 | "/// A 256-bit vector of [8 x float].\n" |
8564 | "/// \\returns A 256-bit vector of [8 x float] containing the square roots of the\n" |
8565 | "/// values in the operand.\n" |
8566 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8567 | "_mm256_sqrt_ps(__m256 __a)\n" |
8568 | "{\n" |
8569 | " return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);\n" |
8570 | "}\n" |
8571 | "\n" |
8572 | "/// Calculates the reciprocal square roots of the values in a 256-bit\n" |
8573 | "/// vector of [8 x float].\n" |
8574 | "///\n" |
8575 | "/// \\headerfile <x86intrin.h>\n" |
8576 | "///\n" |
8577 | "/// This intrinsic corresponds to the <c> VRSQRTPS </c> instruction.\n" |
8578 | "///\n" |
8579 | "/// \\param __a\n" |
8580 | "/// A 256-bit vector of [8 x float].\n" |
8581 | "/// \\returns A 256-bit vector of [8 x float] containing the reciprocal square\n" |
8582 | "/// roots of the values in the operand.\n" |
8583 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8584 | "_mm256_rsqrt_ps(__m256 __a)\n" |
8585 | "{\n" |
8586 | " return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);\n" |
8587 | "}\n" |
8588 | "\n" |
8589 | "/// Calculates the reciprocals of the values in a 256-bit vector of\n" |
8590 | "/// [8 x float].\n" |
8591 | "///\n" |
8592 | "/// \\headerfile <x86intrin.h>\n" |
8593 | "///\n" |
8594 | "/// This intrinsic corresponds to the <c> VRCPPS </c> instruction.\n" |
8595 | "///\n" |
8596 | "/// \\param __a\n" |
8597 | "/// A 256-bit vector of [8 x float].\n" |
8598 | "/// \\returns A 256-bit vector of [8 x float] containing the reciprocals of the\n" |
8599 | "/// values in the operand.\n" |
8600 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8601 | "_mm256_rcp_ps(__m256 __a)\n" |
8602 | "{\n" |
8603 | " return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);\n" |
8604 | "}\n" |
8605 | "\n" |
8606 | "/// Rounds the values in a 256-bit vector of [4 x double] as specified\n" |
8607 | "/// by the byte operand. The source values are rounded to integer values and\n" |
8608 | "/// returned as 64-bit double-precision floating-point values.\n" |
8609 | "///\n" |
8610 | "/// \\headerfile <x86intrin.h>\n" |
8611 | "///\n" |
8612 | "/// \\code\n" |
8613 | "/// __m256d _mm256_round_pd(__m256d V, const int M);\n" |
8614 | "/// \\endcode\n" |
8615 | "///\n" |
8616 | "/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n" |
8617 | "///\n" |
8618 | "/// \\param V\n" |
8619 | "/// A 256-bit vector of [4 x double].\n" |
8620 | "/// \\param M\n" |
8621 | "/// An integer value that specifies the rounding operation. \\n\n" |
8622 | "/// Bits [7:4] are reserved. \\n\n" |
8623 | "/// Bit [3] is a precision exception value: \\n\n" |
8624 | "/// 0: A normal PE exception is used. \\n\n" |
8625 | "/// 1: The PE field is not updated. \\n\n" |
8626 | "/// Bit [2] is the rounding control source: \\n\n" |
8627 | "/// 0: Use bits [1:0] of \\a M. \\n\n" |
8628 | "/// 1: Use the current MXCSR setting. \\n\n" |
8629 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
8630 | "/// 00: Nearest. \\n\n" |
8631 | "/// 01: Downward (toward negative infinity). \\n\n" |
8632 | "/// 10: Upward (toward positive infinity). \\n\n" |
8633 | "/// 11: Truncated.\n" |
8634 | "/// \\returns A 256-bit vector of [4 x double] containing the rounded values.\n" |
8635 | "#define _mm256_round_pd(V, M) \\\n" |
8636 | " (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))\n" |
8637 | "\n" |
8638 | "/// Rounds the values stored in a 256-bit vector of [8 x float] as\n" |
8639 | "/// specified by the byte operand. The source values are rounded to integer\n" |
8640 | "/// values and returned as floating-point values.\n" |
8641 | "///\n" |
8642 | "/// \\headerfile <x86intrin.h>\n" |
8643 | "///\n" |
8644 | "/// \\code\n" |
8645 | "/// __m256 _mm256_round_ps(__m256 V, const int M);\n" |
8646 | "/// \\endcode\n" |
8647 | "///\n" |
8648 | "/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n" |
8649 | "///\n" |
8650 | "/// \\param V\n" |
8651 | "/// A 256-bit vector of [8 x float].\n" |
8652 | "/// \\param M\n" |
8653 | "/// An integer value that specifies the rounding operation. \\n\n" |
8654 | "/// Bits [7:4] are reserved. \\n\n" |
8655 | "/// Bit [3] is a precision exception value: \\n\n" |
8656 | "/// 0: A normal PE exception is used. \\n\n" |
8657 | "/// 1: The PE field is not updated. \\n\n" |
8658 | "/// Bit [2] is the rounding control source: \\n\n" |
8659 | "/// 0: Use bits [1:0] of \\a M. \\n\n" |
8660 | "/// 1: Use the current MXCSR setting. \\n\n" |
8661 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
8662 | "/// 00: Nearest. \\n\n" |
8663 | "/// 01: Downward (toward negative infinity). \\n\n" |
8664 | "/// 10: Upward (toward positive infinity). \\n\n" |
8665 | "/// 11: Truncated.\n" |
8666 | "/// \\returns A 256-bit vector of [8 x float] containing the rounded values.\n" |
8667 | "#define _mm256_round_ps(V, M) \\\n" |
8668 | " (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))\n" |
8669 | "\n" |
8670 | "/// Rounds up the values stored in a 256-bit vector of [4 x double]. The\n" |
8671 | "/// source values are rounded up to integer values and returned as 64-bit\n" |
8672 | "/// double-precision floating-point values.\n" |
8673 | "///\n" |
8674 | "/// \\headerfile <x86intrin.h>\n" |
8675 | "///\n" |
8676 | "/// \\code\n" |
8677 | "/// __m256d _mm256_ceil_pd(__m256d V);\n" |
8678 | "/// \\endcode\n" |
8679 | "///\n" |
8680 | "/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n" |
8681 | "///\n" |
8682 | "/// \\param V\n" |
8683 | "/// A 256-bit vector of [4 x double].\n" |
8684 | "/// \\returns A 256-bit vector of [4 x double] containing the rounded up values.\n" |
8685 | "#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)\n" |
8686 | "\n" |
8687 | "/// Rounds down the values stored in a 256-bit vector of [4 x double].\n" |
8688 | "/// The source values are rounded down to integer values and returned as\n" |
8689 | "/// 64-bit double-precision floating-point values.\n" |
8690 | "///\n" |
8691 | "/// \\headerfile <x86intrin.h>\n" |
8692 | "///\n" |
8693 | "/// \\code\n" |
8694 | "/// __m256d _mm256_floor_pd(__m256d V);\n" |
8695 | "/// \\endcode\n" |
8696 | "///\n" |
8697 | "/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n" |
8698 | "///\n" |
8699 | "/// \\param V\n" |
8700 | "/// A 256-bit vector of [4 x double].\n" |
8701 | "/// \\returns A 256-bit vector of [4 x double] containing the rounded down\n" |
8702 | "/// values.\n" |
8703 | "#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)\n" |
8704 | "\n" |
8705 | "/// Rounds up the values stored in a 256-bit vector of [8 x float]. The\n" |
8706 | "/// source values are rounded up to integer values and returned as\n" |
8707 | "/// floating-point values.\n" |
8708 | "///\n" |
8709 | "/// \\headerfile <x86intrin.h>\n" |
8710 | "///\n" |
8711 | "/// \\code\n" |
8712 | "/// __m256 _mm256_ceil_ps(__m256 V);\n" |
8713 | "/// \\endcode\n" |
8714 | "///\n" |
8715 | "/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n" |
8716 | "///\n" |
8717 | "/// \\param V\n" |
8718 | "/// A 256-bit vector of [8 x float].\n" |
8719 | "/// \\returns A 256-bit vector of [8 x float] containing the rounded up values.\n" |
8720 | "#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)\n" |
8721 | "\n" |
8722 | "/// Rounds down the values stored in a 256-bit vector of [8 x float]. The\n" |
8723 | "/// source values are rounded down to integer values and returned as\n" |
8724 | "/// floating-point values.\n" |
8725 | "///\n" |
8726 | "/// \\headerfile <x86intrin.h>\n" |
8727 | "///\n" |
8728 | "/// \\code\n" |
8729 | "/// __m256 _mm256_floor_ps(__m256 V);\n" |
8730 | "/// \\endcode\n" |
8731 | "///\n" |
8732 | "/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n" |
8733 | "///\n" |
8734 | "/// \\param V\n" |
8735 | "/// A 256-bit vector of [8 x float].\n" |
8736 | "/// \\returns A 256-bit vector of [8 x float] containing the rounded down values.\n" |
8737 | "#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)\n" |
8738 | "\n" |
8739 | "/* Logical */\n" |
8740 | "/// Performs a bitwise AND of two 256-bit vectors of [4 x double].\n" |
8741 | "///\n" |
8742 | "/// \\headerfile <x86intrin.h>\n" |
8743 | "///\n" |
8744 | "/// This intrinsic corresponds to the <c> VANDPD </c> instruction.\n" |
8745 | "///\n" |
8746 | "/// \\param __a\n" |
8747 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8748 | "/// \\param __b\n" |
8749 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8750 | "/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n" |
8751 | "/// values between both operands.\n" |
8752 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8753 | "_mm256_and_pd(__m256d __a, __m256d __b)\n" |
8754 | "{\n" |
8755 | " return (__m256d)((__v4du)__a & (__v4du)__b);\n" |
8756 | "}\n" |
8757 | "\n" |
8758 | "/// Performs a bitwise AND of two 256-bit vectors of [8 x float].\n" |
8759 | "///\n" |
8760 | "/// \\headerfile <x86intrin.h>\n" |
8761 | "///\n" |
8762 | "/// This intrinsic corresponds to the <c> VANDPS </c> instruction.\n" |
8763 | "///\n" |
8764 | "/// \\param __a\n" |
8765 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8766 | "/// \\param __b\n" |
8767 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8768 | "/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n" |
8769 | "/// values between both operands.\n" |
8770 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8771 | "_mm256_and_ps(__m256 __a, __m256 __b)\n" |
8772 | "{\n" |
8773 | " return (__m256)((__v8su)__a & (__v8su)__b);\n" |
8774 | "}\n" |
8775 | "\n" |
8776 | "/// Performs a bitwise AND of two 256-bit vectors of [4 x double], using\n" |
8777 | "/// the one's complement of the values contained in the first source operand.\n" |
8778 | "///\n" |
8779 | "/// \\headerfile <x86intrin.h>\n" |
8780 | "///\n" |
8781 | "/// This intrinsic corresponds to the <c> VANDNPD </c> instruction.\n" |
8782 | "///\n" |
8783 | "/// \\param __a\n" |
8784 | "/// A 256-bit vector of [4 x double] containing the left source operand. The\n" |
8785 | "/// one's complement of this value is used in the bitwise AND.\n" |
8786 | "/// \\param __b\n" |
8787 | "/// A 256-bit vector of [4 x double] containing the right source operand.\n" |
8788 | "/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n" |
8789 | "/// values of the second operand and the one's complement of the first\n" |
8790 | "/// operand.\n" |
8791 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8792 | "_mm256_andnot_pd(__m256d __a, __m256d __b)\n" |
8793 | "{\n" |
8794 | " return (__m256d)(~(__v4du)__a & (__v4du)__b);\n" |
8795 | "}\n" |
8796 | "\n" |
8797 | "/// Performs a bitwise AND of two 256-bit vectors of [8 x float], using\n" |
8798 | "/// the one's complement of the values contained in the first source operand.\n" |
8799 | "///\n" |
8800 | "/// \\headerfile <x86intrin.h>\n" |
8801 | "///\n" |
8802 | "/// This intrinsic corresponds to the <c> VANDNPS </c> instruction.\n" |
8803 | "///\n" |
8804 | "/// \\param __a\n" |
8805 | "/// A 256-bit vector of [8 x float] containing the left source operand. The\n" |
8806 | "/// one's complement of this value is used in the bitwise AND.\n" |
8807 | "/// \\param __b\n" |
8808 | "/// A 256-bit vector of [8 x float] containing the right source operand.\n" |
8809 | "/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n" |
8810 | "/// values of the second operand and the one's complement of the first\n" |
8811 | "/// operand.\n" |
8812 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8813 | "_mm256_andnot_ps(__m256 __a, __m256 __b)\n" |
8814 | "{\n" |
8815 | " return (__m256)(~(__v8su)__a & (__v8su)__b);\n" |
8816 | "}\n" |
8817 | "\n" |
8818 | "/// Performs a bitwise OR of two 256-bit vectors of [4 x double].\n" |
8819 | "///\n" |
8820 | "/// \\headerfile <x86intrin.h>\n" |
8821 | "///\n" |
8822 | "/// This intrinsic corresponds to the <c> VORPD </c> instruction.\n" |
8823 | "///\n" |
8824 | "/// \\param __a\n" |
8825 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8826 | "/// \\param __b\n" |
8827 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8828 | "/// \\returns A 256-bit vector of [4 x double] containing the bitwise OR of the\n" |
8829 | "/// values between both operands.\n" |
8830 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8831 | "_mm256_or_pd(__m256d __a, __m256d __b)\n" |
8832 | "{\n" |
8833 | " return (__m256d)((__v4du)__a | (__v4du)__b);\n" |
8834 | "}\n" |
8835 | "\n" |
8836 | "/// Performs a bitwise OR of two 256-bit vectors of [8 x float].\n" |
8837 | "///\n" |
8838 | "/// \\headerfile <x86intrin.h>\n" |
8839 | "///\n" |
8840 | "/// This intrinsic corresponds to the <c> VORPS </c> instruction.\n" |
8841 | "///\n" |
8842 | "/// \\param __a\n" |
8843 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8844 | "/// \\param __b\n" |
8845 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8846 | "/// \\returns A 256-bit vector of [8 x float] containing the bitwise OR of the\n" |
8847 | "/// values between both operands.\n" |
8848 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8849 | "_mm256_or_ps(__m256 __a, __m256 __b)\n" |
8850 | "{\n" |
8851 | " return (__m256)((__v8su)__a | (__v8su)__b);\n" |
8852 | "}\n" |
8853 | "\n" |
8854 | "/// Performs a bitwise XOR of two 256-bit vectors of [4 x double].\n" |
8855 | "///\n" |
8856 | "/// \\headerfile <x86intrin.h>\n" |
8857 | "///\n" |
8858 | "/// This intrinsic corresponds to the <c> VXORPD </c> instruction.\n" |
8859 | "///\n" |
8860 | "/// \\param __a\n" |
8861 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8862 | "/// \\param __b\n" |
8863 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8864 | "/// \\returns A 256-bit vector of [4 x double] containing the bitwise XOR of the\n" |
8865 | "/// values between both operands.\n" |
8866 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8867 | "_mm256_xor_pd(__m256d __a, __m256d __b)\n" |
8868 | "{\n" |
8869 | " return (__m256d)((__v4du)__a ^ (__v4du)__b);\n" |
8870 | "}\n" |
8871 | "\n" |
8872 | "/// Performs a bitwise XOR of two 256-bit vectors of [8 x float].\n" |
8873 | "///\n" |
8874 | "/// \\headerfile <x86intrin.h>\n" |
8875 | "///\n" |
8876 | "/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n" |
8877 | "///\n" |
8878 | "/// \\param __a\n" |
8879 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8880 | "/// \\param __b\n" |
8881 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8882 | "/// \\returns A 256-bit vector of [8 x float] containing the bitwise XOR of the\n" |
8883 | "/// values between both operands.\n" |
8884 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8885 | "_mm256_xor_ps(__m256 __a, __m256 __b)\n" |
8886 | "{\n" |
8887 | " return (__m256)((__v8su)__a ^ (__v8su)__b);\n" |
8888 | "}\n" |
8889 | "\n" |
8890 | "/* Horizontal arithmetic */\n" |
8891 | "/// Horizontally adds the adjacent pairs of values contained in two\n" |
8892 | "/// 256-bit vectors of [4 x double].\n" |
8893 | "///\n" |
8894 | "/// \\headerfile <x86intrin.h>\n" |
8895 | "///\n" |
8896 | "/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n" |
8897 | "///\n" |
8898 | "/// \\param __a\n" |
8899 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8900 | "/// The horizontal sums of the values are returned in the even-indexed\n" |
8901 | "/// elements of a vector of [4 x double].\n" |
8902 | "/// \\param __b\n" |
8903 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8904 | "/// The horizontal sums of the values are returned in the odd-indexed\n" |
8905 | "/// elements of a vector of [4 x double].\n" |
8906 | "/// \\returns A 256-bit vector of [4 x double] containing the horizontal sums of\n" |
8907 | "/// both operands.\n" |
8908 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8909 | "_mm256_hadd_pd(__m256d __a, __m256d __b)\n" |
8910 | "{\n" |
8911 | " return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);\n" |
8912 | "}\n" |
8913 | "\n" |
8914 | "/// Horizontally adds the adjacent pairs of values contained in two\n" |
8915 | "/// 256-bit vectors of [8 x float].\n" |
8916 | "///\n" |
8917 | "/// \\headerfile <x86intrin.h>\n" |
8918 | "///\n" |
8919 | "/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n" |
8920 | "///\n" |
8921 | "/// \\param __a\n" |
8922 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8923 | "/// The horizontal sums of the values are returned in the elements with\n" |
8924 | "/// index 0, 1, 4, 5 of a vector of [8 x float].\n" |
8925 | "/// \\param __b\n" |
8926 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8927 | "/// The horizontal sums of the values are returned in the elements with\n" |
8928 | "/// index 2, 3, 6, 7 of a vector of [8 x float].\n" |
8929 | "/// \\returns A 256-bit vector of [8 x float] containing the horizontal sums of\n" |
8930 | "/// both operands.\n" |
8931 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8932 | "_mm256_hadd_ps(__m256 __a, __m256 __b)\n" |
8933 | "{\n" |
8934 | " return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);\n" |
8935 | "}\n" |
8936 | "\n" |
8937 | "/// Horizontally subtracts the adjacent pairs of values contained in two\n" |
8938 | "/// 256-bit vectors of [4 x double].\n" |
8939 | "///\n" |
8940 | "/// \\headerfile <x86intrin.h>\n" |
8941 | "///\n" |
8942 | "/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n" |
8943 | "///\n" |
8944 | "/// \\param __a\n" |
8945 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8946 | "/// The horizontal differences between the values are returned in the\n" |
8947 | "/// even-indexed elements of a vector of [4 x double].\n" |
8948 | "/// \\param __b\n" |
8949 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
8950 | "/// The horizontal differences between the values are returned in the\n" |
8951 | "/// odd-indexed elements of a vector of [4 x double].\n" |
8952 | "/// \\returns A 256-bit vector of [4 x double] containing the horizontal\n" |
8953 | "/// differences of both operands.\n" |
8954 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
8955 | "_mm256_hsub_pd(__m256d __a, __m256d __b)\n" |
8956 | "{\n" |
8957 | " return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);\n" |
8958 | "}\n" |
8959 | "\n" |
8960 | "/// Horizontally subtracts the adjacent pairs of values contained in two\n" |
8961 | "/// 256-bit vectors of [8 x float].\n" |
8962 | "///\n" |
8963 | "/// \\headerfile <x86intrin.h>\n" |
8964 | "///\n" |
8965 | "/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n" |
8966 | "///\n" |
8967 | "/// \\param __a\n" |
8968 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8969 | "/// The horizontal differences between the values are returned in the\n" |
8970 | "/// elements with index 0, 1, 4, 5 of a vector of [8 x float].\n" |
8971 | "/// \\param __b\n" |
8972 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
8973 | "/// The horizontal differences between the values are returned in the\n" |
8974 | "/// elements with index 2, 3, 6, 7 of a vector of [8 x float].\n" |
8975 | "/// \\returns A 256-bit vector of [8 x float] containing the horizontal\n" |
8976 | "/// differences of both operands.\n" |
8977 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
8978 | "_mm256_hsub_ps(__m256 __a, __m256 __b)\n" |
8979 | "{\n" |
8980 | " return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);\n" |
8981 | "}\n" |
8982 | "\n" |
8983 | "/* Vector permutations */\n" |
8984 | "/// Copies the values in a 128-bit vector of [2 x double] as specified\n" |
8985 | "/// by the 128-bit integer vector operand.\n" |
8986 | "///\n" |
8987 | "/// \\headerfile <x86intrin.h>\n" |
8988 | "///\n" |
8989 | "/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n" |
8990 | "///\n" |
8991 | "/// \\param __a\n" |
8992 | "/// A 128-bit vector of [2 x double].\n" |
8993 | "/// \\param __c\n" |
8994 | "/// A 128-bit integer vector operand specifying how the values are to be\n" |
8995 | "/// copied. \\n\n" |
8996 | "/// Bit [1]: \\n\n" |
8997 | "/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n" |
8998 | "/// vector. \\n\n" |
8999 | "/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n" |
9000 | "/// returned vector. \\n\n" |
9001 | "/// Bit [65]: \\n\n" |
9002 | "/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n" |
9003 | "/// returned vector. \\n\n" |
9004 | "/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n" |
9005 | "/// returned vector.\n" |
9006 | "/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n" |
9007 | "static __inline __m128d __DEFAULT_FN_ATTRS128\n" |
9008 | "_mm_permutevar_pd(__m128d __a, __m128i __c)\n" |
9009 | "{\n" |
9010 | " return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);\n" |
9011 | "}\n" |
9012 | "\n" |
9013 | "/// Copies the values in a 256-bit vector of [4 x double] as specified\n" |
9014 | "/// by the 256-bit integer vector operand.\n" |
9015 | "///\n" |
9016 | "/// \\headerfile <x86intrin.h>\n" |
9017 | "///\n" |
9018 | "/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n" |
9019 | "///\n" |
9020 | "/// \\param __a\n" |
9021 | "/// A 256-bit vector of [4 x double].\n" |
9022 | "/// \\param __c\n" |
9023 | "/// A 256-bit integer vector operand specifying how the values are to be\n" |
9024 | "/// copied. \\n\n" |
9025 | "/// Bit [1]: \\n\n" |
9026 | "/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n" |
9027 | "/// vector. \\n\n" |
9028 | "/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n" |
9029 | "/// returned vector. \\n\n" |
9030 | "/// Bit [65]: \\n\n" |
9031 | "/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n" |
9032 | "/// returned vector. \\n\n" |
9033 | "/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n" |
9034 | "/// returned vector. \\n\n" |
9035 | "/// Bit [129]: \\n\n" |
9036 | "/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n" |
9037 | "/// returned vector. \\n\n" |
9038 | "/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n" |
9039 | "/// returned vector. \\n\n" |
9040 | "/// Bit [193]: \\n\n" |
9041 | "/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n" |
9042 | "/// returned vector. \\n\n" |
9043 | "/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n" |
9044 | "/// returned vector.\n" |
9045 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
9046 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
9047 | "_mm256_permutevar_pd(__m256d __a, __m256i __c)\n" |
9048 | "{\n" |
9049 | " return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);\n" |
9050 | "}\n" |
9051 | "\n" |
9052 | "/// Copies the values stored in a 128-bit vector of [4 x float] as\n" |
9053 | "/// specified by the 128-bit integer vector operand.\n" |
9054 | "/// \\headerfile <x86intrin.h>\n" |
9055 | "///\n" |
9056 | "/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n" |
9057 | "///\n" |
9058 | "/// \\param __a\n" |
9059 | "/// A 128-bit vector of [4 x float].\n" |
9060 | "/// \\param __c\n" |
9061 | "/// A 128-bit integer vector operand specifying how the values are to be\n" |
9062 | "/// copied. \\n\n" |
9063 | "/// Bits [1:0]: \\n\n" |
9064 | "/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n" |
9065 | "/// returned vector. \\n\n" |
9066 | "/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n" |
9067 | "/// returned vector. \\n\n" |
9068 | "/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n" |
9069 | "/// returned vector. \\n\n" |
9070 | "/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n" |
9071 | "/// returned vector. \\n\n" |
9072 | "/// Bits [33:32]: \\n\n" |
9073 | "/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n" |
9074 | "/// returned vector. \\n\n" |
9075 | "/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n" |
9076 | "/// returned vector. \\n\n" |
9077 | "/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n" |
9078 | "/// returned vector. \\n\n" |
9079 | "/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n" |
9080 | "/// returned vector. \\n\n" |
9081 | "/// Bits [65:64]: \\n\n" |
9082 | "/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n" |
9083 | "/// returned vector. \\n\n" |
9084 | "/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n" |
9085 | "/// returned vector. \\n\n" |
9086 | "/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n" |
9087 | "/// returned vector. \\n\n" |
9088 | "/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n" |
9089 | "/// returned vector. \\n\n" |
9090 | "/// Bits [97:96]: \\n\n" |
9091 | "/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n" |
9092 | "/// returned vector. \\n\n" |
9093 | "/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n" |
9094 | "/// returned vector. \\n\n" |
9095 | "/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n" |
9096 | "/// returned vector. \\n\n" |
9097 | "/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n" |
9098 | "/// returned vector.\n" |
9099 | "/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n" |
9100 | "static __inline __m128 __DEFAULT_FN_ATTRS128\n" |
9101 | "_mm_permutevar_ps(__m128 __a, __m128i __c)\n" |
9102 | "{\n" |
9103 | " return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);\n" |
9104 | "}\n" |
9105 | "\n" |
9106 | "/// Copies the values stored in a 256-bit vector of [8 x float] as\n" |
9107 | "/// specified by the 256-bit integer vector operand.\n" |
9108 | "///\n" |
9109 | "/// \\headerfile <x86intrin.h>\n" |
9110 | "///\n" |
9111 | "/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n" |
9112 | "///\n" |
9113 | "/// \\param __a\n" |
9114 | "/// A 256-bit vector of [8 x float].\n" |
9115 | "/// \\param __c\n" |
9116 | "/// A 256-bit integer vector operand specifying how the values are to be\n" |
9117 | "/// copied. \\n\n" |
9118 | "/// Bits [1:0]: \\n\n" |
9119 | "/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n" |
9120 | "/// returned vector. \\n\n" |
9121 | "/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n" |
9122 | "/// returned vector. \\n\n" |
9123 | "/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n" |
9124 | "/// returned vector. \\n\n" |
9125 | "/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n" |
9126 | "/// returned vector. \\n\n" |
9127 | "/// Bits [33:32]: \\n\n" |
9128 | "/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n" |
9129 | "/// returned vector. \\n\n" |
9130 | "/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n" |
9131 | "/// returned vector. \\n\n" |
9132 | "/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n" |
9133 | "/// returned vector. \\n\n" |
9134 | "/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n" |
9135 | "/// returned vector. \\n\n" |
9136 | "/// Bits [65:64]: \\n\n" |
9137 | "/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n" |
9138 | "/// returned vector. \\n\n" |
9139 | "/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n" |
9140 | "/// returned vector. \\n\n" |
9141 | "/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n" |
9142 | "/// returned vector. \\n\n" |
9143 | "/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n" |
9144 | "/// returned vector. \\n\n" |
9145 | "/// Bits [97:96]: \\n\n" |
9146 | "/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n" |
9147 | "/// returned vector. \\n\n" |
9148 | "/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n" |
9149 | "/// returned vector. \\n\n" |
9150 | "/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n" |
9151 | "/// returned vector. \\n\n" |
9152 | "/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n" |
9153 | "/// returned vector. \\n\n" |
9154 | "/// Bits [129:128]: \\n\n" |
9155 | "/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n" |
9156 | "/// returned vector. \\n\n" |
9157 | "/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n" |
9158 | "/// returned vector. \\n\n" |
9159 | "/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n" |
9160 | "/// returned vector. \\n\n" |
9161 | "/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n" |
9162 | "/// returned vector. \\n\n" |
9163 | "/// Bits [161:160]: \\n\n" |
9164 | "/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n" |
9165 | "/// returned vector. \\n\n" |
9166 | "/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n" |
9167 | "/// returned vector. \\n\n" |
9168 | "/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n" |
9169 | "/// returned vector. \\n\n" |
9170 | "/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n" |
9171 | "/// returned vector. \\n\n" |
9172 | "/// Bits [193:192]: \\n\n" |
9173 | "/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n" |
9174 | "/// returned vector. \\n\n" |
9175 | "/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n" |
9176 | "/// returned vector. \\n\n" |
9177 | "/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n" |
9178 | "/// returned vector. \\n\n" |
9179 | "/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n" |
9180 | "/// returned vector. \\n\n" |
9181 | "/// Bits [225:224]: \\n\n" |
9182 | "/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n" |
9183 | "/// returned vector. \\n\n" |
9184 | "/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n" |
9185 | "/// returned vector. \\n\n" |
9186 | "/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n" |
9187 | "/// returned vector. \\n\n" |
9188 | "/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n" |
9189 | "/// returned vector.\n" |
9190 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
9191 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
9192 | "_mm256_permutevar_ps(__m256 __a, __m256i __c)\n" |
9193 | "{\n" |
9194 | " return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);\n" |
9195 | "}\n" |
9196 | "\n" |
9197 | "/// Copies the values in a 128-bit vector of [2 x double] as specified\n" |
9198 | "/// by the immediate integer operand.\n" |
9199 | "///\n" |
9200 | "/// \\headerfile <x86intrin.h>\n" |
9201 | "///\n" |
9202 | "/// \\code\n" |
9203 | "/// __m128d _mm_permute_pd(__m128d A, const int C);\n" |
9204 | "/// \\endcode\n" |
9205 | "///\n" |
9206 | "/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n" |
9207 | "///\n" |
9208 | "/// \\param A\n" |
9209 | "/// A 128-bit vector of [2 x double].\n" |
9210 | "/// \\param C\n" |
9211 | "/// An immediate integer operand specifying how the values are to be\n" |
9212 | "/// copied. \\n\n" |
9213 | "/// Bit [0]: \\n\n" |
9214 | "/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n" |
9215 | "/// vector. \\n\n" |
9216 | "/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n" |
9217 | "/// returned vector. \\n\n" |
9218 | "/// Bit [1]: \\n\n" |
9219 | "/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n" |
9220 | "/// returned vector. \\n\n" |
9221 | "/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n" |
9222 | "/// returned vector.\n" |
9223 | "/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n" |
9224 | "#define _mm_permute_pd(A, C) \\\n" |
9225 | " (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))\n" |
9226 | "\n" |
9227 | "/// Copies the values in a 256-bit vector of [4 x double] as specified by\n" |
9228 | "/// the immediate integer operand.\n" |
9229 | "///\n" |
9230 | "/// \\headerfile <x86intrin.h>\n" |
9231 | "///\n" |
9232 | "/// \\code\n" |
9233 | "/// __m256d _mm256_permute_pd(__m256d A, const int C);\n" |
9234 | "/// \\endcode\n" |
9235 | "///\n" |
9236 | "/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n" |
9237 | "///\n" |
9238 | "/// \\param A\n" |
9239 | "/// A 256-bit vector of [4 x double].\n" |
9240 | "/// \\param C\n" |
9241 | "/// An immediate integer operand specifying how the values are to be\n" |
9242 | "/// copied. \\n\n" |
9243 | "/// Bit [0]: \\n\n" |
9244 | "/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n" |
9245 | "/// vector. \\n\n" |
9246 | "/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n" |
9247 | "/// returned vector. \\n\n" |
9248 | "/// Bit [1]: \\n\n" |
9249 | "/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n" |
9250 | "/// returned vector. \\n\n" |
9251 | "/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n" |
9252 | "/// returned vector. \\n\n" |
9253 | "/// Bit [2]: \\n\n" |
9254 | "/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n" |
9255 | "/// returned vector. \\n\n" |
9256 | "/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n" |
9257 | "/// returned vector. \\n\n" |
9258 | "/// Bit [3]: \\n\n" |
9259 | "/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n" |
9260 | "/// returned vector. \\n\n" |
9261 | "/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n" |
9262 | "/// returned vector.\n" |
9263 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
9264 | "#define _mm256_permute_pd(A, C) \\\n" |
9265 | " (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))\n" |
9266 | "\n" |
9267 | "/// Copies the values in a 128-bit vector of [4 x float] as specified by\n" |
9268 | "/// the immediate integer operand.\n" |
9269 | "///\n" |
9270 | "/// \\headerfile <x86intrin.h>\n" |
9271 | "///\n" |
9272 | "/// \\code\n" |
9273 | "/// __m128 _mm_permute_ps(__m128 A, const int C);\n" |
9274 | "/// \\endcode\n" |
9275 | "///\n" |
9276 | "/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n" |
9277 | "///\n" |
9278 | "/// \\param A\n" |
9279 | "/// A 128-bit vector of [4 x float].\n" |
9280 | "/// \\param C\n" |
9281 | "/// An immediate integer operand specifying how the values are to be\n" |
9282 | "/// copied. \\n\n" |
9283 | "/// Bits [1:0]: \\n\n" |
9284 | "/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n" |
9285 | "/// returned vector. \\n\n" |
9286 | "/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n" |
9287 | "/// returned vector. \\n\n" |
9288 | "/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n" |
9289 | "/// returned vector. \\n\n" |
9290 | "/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n" |
9291 | "/// returned vector. \\n\n" |
9292 | "/// Bits [3:2]: \\n\n" |
9293 | "/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n" |
9294 | "/// returned vector. \\n\n" |
9295 | "/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n" |
9296 | "/// returned vector. \\n\n" |
9297 | "/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n" |
9298 | "/// returned vector. \\n\n" |
9299 | "/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n" |
9300 | "/// returned vector. \\n\n" |
9301 | "/// Bits [5:4]: \\n\n" |
9302 | "/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n" |
9303 | "/// returned vector. \\n\n" |
9304 | "/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n" |
9305 | "/// returned vector. \\n\n" |
9306 | "/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n" |
9307 | "/// returned vector. \\n\n" |
9308 | "/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n" |
9309 | "/// returned vector. \\n\n" |
9310 | "/// Bits [7:6]: \\n\n" |
9311 | "/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n" |
9312 | "/// returned vector. \\n\n" |
9313 | "/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n" |
9314 | "/// returned vector. \\n\n" |
9315 | "/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n" |
9316 | "/// returned vector. \\n\n" |
9317 | "/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n" |
9318 | "/// returned vector.\n" |
9319 | "/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n" |
9320 | "#define _mm_permute_ps(A, C) \\\n" |
9321 | " (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))\n" |
9322 | "\n" |
9323 | "/// Copies the values in a 256-bit vector of [8 x float] as specified by\n" |
9324 | "/// the immediate integer operand.\n" |
9325 | "///\n" |
9326 | "/// \\headerfile <x86intrin.h>\n" |
9327 | "///\n" |
9328 | "/// \\code\n" |
9329 | "/// __m256 _mm256_permute_ps(__m256 A, const int C);\n" |
9330 | "/// \\endcode\n" |
9331 | "///\n" |
9332 | "/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n" |
9333 | "///\n" |
9334 | "/// \\param A\n" |
9335 | "/// A 256-bit vector of [8 x float].\n" |
9336 | "/// \\param C\n" |
9337 | "/// An immediate integer operand specifying how the values are to be\n" |
9338 | "/// copied. \\n\n" |
9339 | "/// Bits [1:0]: \\n\n" |
9340 | "/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n" |
9341 | "/// returned vector. \\n\n" |
9342 | "/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n" |
9343 | "/// returned vector. \\n\n" |
9344 | "/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n" |
9345 | "/// returned vector. \\n\n" |
9346 | "/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n" |
9347 | "/// returned vector. \\n\n" |
9348 | "/// Bits [3:2]: \\n\n" |
9349 | "/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n" |
9350 | "/// returned vector. \\n\n" |
9351 | "/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n" |
9352 | "/// returned vector. \\n\n" |
9353 | "/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n" |
9354 | "/// returned vector. \\n\n" |
9355 | "/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n" |
9356 | "/// returned vector. \\n\n" |
9357 | "/// Bits [5:4]: \\n\n" |
9358 | "/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n" |
9359 | "/// returned vector. \\n\n" |
9360 | "/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n" |
9361 | "/// returned vector. \\n\n" |
9362 | "/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n" |
9363 | "/// returned vector. \\n\n" |
9364 | "/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n" |
9365 | "/// returned vector. \\n\n" |
9366 | "/// Bits [7:6]: \\n\n" |
9367 | "/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n" |
9368 | "/// returned vector. \\n\n" |
9369 | "/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n" |
9370 | "/// returned vector. \\n\n" |
9371 | "/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n" |
9372 | "/// returned vector. \\n\n" |
9373 | "/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n" |
9374 | "/// returned vector. \\n\n" |
9375 | "/// Bits [1:0]: \\n\n" |
9376 | "/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n" |
9377 | "/// returned vector. \\n\n" |
9378 | "/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n" |
9379 | "/// returned vector. \\n\n" |
9380 | "/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n" |
9381 | "/// returned vector. \\n\n" |
9382 | "/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n" |
9383 | "/// returned vector. \\n\n" |
9384 | "/// Bits [3:2]: \\n\n" |
9385 | "/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n" |
9386 | "/// returned vector. \\n\n" |
9387 | "/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n" |
9388 | "/// returned vector. \\n\n" |
9389 | "/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n" |
9390 | "/// returned vector. \\n\n" |
9391 | "/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n" |
9392 | "/// returned vector. \\n\n" |
9393 | "/// Bits [5:4]: \\n\n" |
9394 | "/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n" |
9395 | "/// returned vector. \\n\n" |
9396 | "/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n" |
9397 | "/// returned vector. \\n\n" |
9398 | "/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n" |
9399 | "/// returned vector. \\n\n" |
9400 | "/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n" |
9401 | "/// returned vector. \\n\n" |
9402 | "/// Bits [7:6]: \\n\n" |
9403 | "/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n" |
9404 | "/// returned vector. \\n\n" |
9405 | "/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n" |
9406 | "/// returned vector. \\n\n" |
9407 | "/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n" |
9408 | "/// returned vector. \\n\n" |
9409 | "/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n" |
9410 | "/// returned vector.\n" |
9411 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
9412 | "#define _mm256_permute_ps(A, C) \\\n" |
9413 | " (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))\n" |
9414 | "\n" |
9415 | "/// Permutes 128-bit data values stored in two 256-bit vectors of\n" |
9416 | "/// [4 x double], as specified by the immediate integer operand.\n" |
9417 | "///\n" |
9418 | "/// \\headerfile <x86intrin.h>\n" |
9419 | "///\n" |
9420 | "/// \\code\n" |
9421 | "/// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);\n" |
9422 | "/// \\endcode\n" |
9423 | "///\n" |
9424 | "/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n" |
9425 | "///\n" |
9426 | "/// \\param V1\n" |
9427 | "/// A 256-bit vector of [4 x double].\n" |
9428 | "/// \\param V2\n" |
9429 | "/// A 256-bit vector of [4 x double.\n" |
9430 | "/// \\param M\n" |
9431 | "/// An immediate integer operand specifying how the values are to be\n" |
9432 | "/// permuted. \\n\n" |
9433 | "/// Bits [1:0]: \\n\n" |
9434 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n" |
9435 | "/// destination. \\n\n" |
9436 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n" |
9437 | "/// destination. \\n\n" |
9438 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n" |
9439 | "/// destination. \\n\n" |
9440 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n" |
9441 | "/// destination. \\n\n" |
9442 | "/// Bits [5:4]: \\n\n" |
9443 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n" |
9444 | "/// destination. \\n\n" |
9445 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n" |
9446 | "/// destination. \\n\n" |
9447 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n" |
9448 | "/// destination. \\n\n" |
9449 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n" |
9450 | "/// destination.\n" |
9451 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
9452 | "#define _mm256_permute2f128_pd(V1, V2, M) \\\n" |
9453 | " (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \\\n" |
9454 | " (__v4df)(__m256d)(V2), (int)(M))\n" |
9455 | "\n" |
9456 | "/// Permutes 128-bit data values stored in two 256-bit vectors of\n" |
9457 | "/// [8 x float], as specified by the immediate integer operand.\n" |
9458 | "///\n" |
9459 | "/// \\headerfile <x86intrin.h>\n" |
9460 | "///\n" |
9461 | "/// \\code\n" |
9462 | "/// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);\n" |
9463 | "/// \\endcode\n" |
9464 | "///\n" |
9465 | "/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n" |
9466 | "///\n" |
9467 | "/// \\param V1\n" |
9468 | "/// A 256-bit vector of [8 x float].\n" |
9469 | "/// \\param V2\n" |
9470 | "/// A 256-bit vector of [8 x float].\n" |
9471 | "/// \\param M\n" |
9472 | "/// An immediate integer operand specifying how the values are to be\n" |
9473 | "/// permuted. \\n\n" |
9474 | "/// Bits [1:0]: \\n\n" |
9475 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n" |
9476 | "/// destination. \\n\n" |
9477 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n" |
9478 | "/// destination. \\n\n" |
9479 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n" |
9480 | "/// destination. \\n\n" |
9481 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n" |
9482 | "/// destination. \\n\n" |
9483 | "/// Bits [5:4]: \\n\n" |
9484 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n" |
9485 | "/// destination. \\n\n" |
9486 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n" |
9487 | "/// destination. \\n\n" |
9488 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n" |
9489 | "/// destination. \\n\n" |
9490 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n" |
9491 | "/// destination.\n" |
9492 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
9493 | "#define _mm256_permute2f128_ps(V1, V2, M) \\\n" |
9494 | " (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \\\n" |
9495 | " (__v8sf)(__m256)(V2), (int)(M))\n" |
9496 | "\n" |
9497 | "/// Permutes 128-bit data values stored in two 256-bit integer vectors,\n" |
9498 | "/// as specified by the immediate integer operand.\n" |
9499 | "///\n" |
9500 | "/// \\headerfile <x86intrin.h>\n" |
9501 | "///\n" |
9502 | "/// \\code\n" |
9503 | "/// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);\n" |
9504 | "/// \\endcode\n" |
9505 | "///\n" |
9506 | "/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n" |
9507 | "///\n" |
9508 | "/// \\param V1\n" |
9509 | "/// A 256-bit integer vector.\n" |
9510 | "/// \\param V2\n" |
9511 | "/// A 256-bit integer vector.\n" |
9512 | "/// \\param M\n" |
9513 | "/// An immediate integer operand specifying how the values are to be copied.\n" |
9514 | "/// Bits [1:0]: \\n\n" |
9515 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n" |
9516 | "/// destination. \\n\n" |
9517 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n" |
9518 | "/// destination. \\n\n" |
9519 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n" |
9520 | "/// destination. \\n\n" |
9521 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n" |
9522 | "/// destination. \\n\n" |
9523 | "/// Bits [5:4]: \\n\n" |
9524 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n" |
9525 | "/// destination. \\n\n" |
9526 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n" |
9527 | "/// destination. \\n\n" |
9528 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n" |
9529 | "/// destination. \\n\n" |
9530 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n" |
9531 | "/// destination.\n" |
9532 | "/// \\returns A 256-bit integer vector containing the copied values.\n" |
9533 | "#define _mm256_permute2f128_si256(V1, V2, M) \\\n" |
9534 | " (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \\\n" |
9535 | " (__v8si)(__m256i)(V2), (int)(M))\n" |
9536 | "\n" |
9537 | "/* Vector Blend */\n" |
9538 | "/// Merges 64-bit double-precision data values stored in either of the\n" |
9539 | "/// two 256-bit vectors of [4 x double], as specified by the immediate\n" |
9540 | "/// integer operand.\n" |
9541 | "///\n" |
9542 | "/// \\headerfile <x86intrin.h>\n" |
9543 | "///\n" |
9544 | "/// \\code\n" |
9545 | "/// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);\n" |
9546 | "/// \\endcode\n" |
9547 | "///\n" |
9548 | "/// This intrinsic corresponds to the <c> VBLENDPD </c> instruction.\n" |
9549 | "///\n" |
9550 | "/// \\param V1\n" |
9551 | "/// A 256-bit vector of [4 x double].\n" |
9552 | "/// \\param V2\n" |
9553 | "/// A 256-bit vector of [4 x double].\n" |
9554 | "/// \\param M\n" |
9555 | "/// An immediate integer operand, with mask bits [3:0] specifying how the\n" |
9556 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
9557 | "/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n" |
9558 | "/// element in operand \\a V1 is copied to the same position in the\n" |
9559 | "/// destination. When a mask bit is 1, the corresponding 64-bit element in\n" |
9560 | "/// operand \\a V2 is copied to the same position in the destination.\n" |
9561 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
9562 | "#define _mm256_blend_pd(V1, V2, M) \\\n" |
9563 | " (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \\\n" |
9564 | " (__v4df)(__m256d)(V2), (int)(M))\n" |
9565 | "\n" |
9566 | "/// Merges 32-bit single-precision data values stored in either of the\n" |
9567 | "/// two 256-bit vectors of [8 x float], as specified by the immediate\n" |
9568 | "/// integer operand.\n" |
9569 | "///\n" |
9570 | "/// \\headerfile <x86intrin.h>\n" |
9571 | "///\n" |
9572 | "/// \\code\n" |
9573 | "/// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);\n" |
9574 | "/// \\endcode\n" |
9575 | "///\n" |
9576 | "/// This intrinsic corresponds to the <c> VBLENDPS </c> instruction.\n" |
9577 | "///\n" |
9578 | "/// \\param V1\n" |
9579 | "/// A 256-bit vector of [8 x float].\n" |
9580 | "/// \\param V2\n" |
9581 | "/// A 256-bit vector of [8 x float].\n" |
9582 | "/// \\param M\n" |
9583 | "/// An immediate integer operand, with mask bits [7:0] specifying how the\n" |
9584 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
9585 | "/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n" |
9586 | "/// element in operand \\a V1 is copied to the same position in the\n" |
9587 | "/// destination. When a mask bit is 1, the corresponding 32-bit element in\n" |
9588 | "/// operand \\a V2 is copied to the same position in the destination.\n" |
9589 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
9590 | "#define _mm256_blend_ps(V1, V2, M) \\\n" |
9591 | " (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \\\n" |
9592 | " (__v8sf)(__m256)(V2), (int)(M))\n" |
9593 | "\n" |
9594 | "/// Merges 64-bit double-precision data values stored in either of the\n" |
9595 | "/// two 256-bit vectors of [4 x double], as specified by the 256-bit vector\n" |
9596 | "/// operand.\n" |
9597 | "///\n" |
9598 | "/// \\headerfile <x86intrin.h>\n" |
9599 | "///\n" |
9600 | "/// This intrinsic corresponds to the <c> VBLENDVPD </c> instruction.\n" |
9601 | "///\n" |
9602 | "/// \\param __a\n" |
9603 | "/// A 256-bit vector of [4 x double].\n" |
9604 | "/// \\param __b\n" |
9605 | "/// A 256-bit vector of [4 x double].\n" |
9606 | "/// \\param __c\n" |
9607 | "/// A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying\n" |
9608 | "/// how the values are to be copied. The position of the mask bit corresponds\n" |
9609 | "/// to the most significant bit of a copied value. When a mask bit is 0, the\n" |
9610 | "/// corresponding 64-bit element in operand \\a __a is copied to the same\n" |
9611 | "/// position in the destination. When a mask bit is 1, the corresponding\n" |
9612 | "/// 64-bit element in operand \\a __b is copied to the same position in the\n" |
9613 | "/// destination.\n" |
9614 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
9615 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
9616 | "_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)\n" |
9617 | "{\n" |
9618 | " return (__m256d)__builtin_ia32_blendvpd256(\n" |
9619 | " (__v4df)__a, (__v4df)__b, (__v4df)__c);\n" |
9620 | "}\n" |
9621 | "\n" |
9622 | "/// Merges 32-bit single-precision data values stored in either of the\n" |
9623 | "/// two 256-bit vectors of [8 x float], as specified by the 256-bit vector\n" |
9624 | "/// operand.\n" |
9625 | "///\n" |
9626 | "/// \\headerfile <x86intrin.h>\n" |
9627 | "///\n" |
9628 | "/// This intrinsic corresponds to the <c> VBLENDVPS </c> instruction.\n" |
9629 | "///\n" |
9630 | "/// \\param __a\n" |
9631 | "/// A 256-bit vector of [8 x float].\n" |
9632 | "/// \\param __b\n" |
9633 | "/// A 256-bit vector of [8 x float].\n" |
9634 | "/// \\param __c\n" |
9635 | "/// A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63,\n" |
9636 | "/// and 31 specifying how the values are to be copied. The position of the\n" |
9637 | "/// mask bit corresponds to the most significant bit of a copied value. When\n" |
9638 | "/// a mask bit is 0, the corresponding 32-bit element in operand \\a __a is\n" |
9639 | "/// copied to the same position in the destination. When a mask bit is 1, the\n" |
9640 | "/// corresponding 32-bit element in operand \\a __b is copied to the same\n" |
9641 | "/// position in the destination.\n" |
9642 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
9643 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
9644 | "_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)\n" |
9645 | "{\n" |
9646 | " return (__m256)__builtin_ia32_blendvps256(\n" |
9647 | " (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);\n" |
9648 | "}\n" |
9649 | "\n" |
9650 | "/* Vector Dot Product */\n" |
9651 | "/// Computes two dot products in parallel, using the lower and upper\n" |
9652 | "/// halves of two [8 x float] vectors as input to the two computations, and\n" |
9653 | "/// returning the two dot products in the lower and upper halves of the\n" |
9654 | "/// [8 x float] result.\n" |
9655 | "///\n" |
9656 | "/// The immediate integer operand controls which input elements will\n" |
9657 | "/// contribute to the dot product, and where the final results are returned.\n" |
9658 | "/// In general, for each dot product, the four corresponding elements of the\n" |
9659 | "/// input vectors are multiplied; the first two and second two products are\n" |
9660 | "/// summed, then the two sums are added to form the final result.\n" |
9661 | "///\n" |
9662 | "/// \\headerfile <x86intrin.h>\n" |
9663 | "///\n" |
9664 | "/// \\code\n" |
9665 | "/// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);\n" |
9666 | "/// \\endcode\n" |
9667 | "///\n" |
9668 | "/// This intrinsic corresponds to the <c> VDPPS </c> instruction.\n" |
9669 | "///\n" |
9670 | "/// \\param V1\n" |
9671 | "/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n" |
9672 | "/// \\param V2\n" |
9673 | "/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n" |
9674 | "/// \\param M\n" |
9675 | "/// An immediate integer argument. Bits [7:4] determine which elements of\n" |
9676 | "/// the input vectors are used, with bit [4] corresponding to the lowest\n" |
9677 | "/// element and bit [7] corresponding to the highest element of each [4 x\n" |
9678 | "/// float] subvector. If a bit is set, the corresponding elements from the\n" |
9679 | "/// two input vectors are used as an input for dot product; otherwise that\n" |
9680 | "/// input is treated as zero. Bits [3:0] determine which elements of the\n" |
9681 | "/// result will receive a copy of the final dot product, with bit [0]\n" |
9682 | "/// corresponding to the lowest element and bit [3] corresponding to the\n" |
9683 | "/// highest element of each [4 x float] subvector. If a bit is set, the dot\n" |
9684 | "/// product is returned in the corresponding element; otherwise that element\n" |
9685 | "/// is set to zero. The bitmask is applied in the same way to each of the\n" |
9686 | "/// two parallel dot product computations.\n" |
9687 | "/// \\returns A 256-bit vector of [8 x float] containing the two dot products.\n" |
9688 | "#define _mm256_dp_ps(V1, V2, M) \\\n" |
9689 | " (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \\\n" |
9690 | " (__v8sf)(__m256)(V2), (M))\n" |
9691 | "\n" |
9692 | "/* Vector shuffle */\n" |
9693 | "/// Selects 8 float values from the 256-bit operands of [8 x float], as\n" |
9694 | "/// specified by the immediate value operand.\n" |
9695 | "///\n" |
9696 | "/// The four selected elements in each operand are copied to the destination\n" |
9697 | "/// according to the bits specified in the immediate operand. The selected\n" |
9698 | "/// elements from the first 256-bit operand are copied to bits [63:0] and\n" |
9699 | "/// bits [191:128] of the destination, and the selected elements from the\n" |
9700 | "/// second 256-bit operand are copied to bits [127:64] and bits [255:192] of\n" |
9701 | "/// the destination. For example, if bits [7:0] of the immediate operand\n" |
9702 | "/// contain a value of 0xFF, the 256-bit destination vector would contain the\n" |
9703 | "/// following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].\n" |
9704 | "///\n" |
9705 | "/// \\headerfile <x86intrin.h>\n" |
9706 | "///\n" |
9707 | "/// \\code\n" |
9708 | "/// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);\n" |
9709 | "/// \\endcode\n" |
9710 | "///\n" |
9711 | "/// This intrinsic corresponds to the <c> VSHUFPS </c> instruction.\n" |
9712 | "///\n" |
9713 | "/// \\param a\n" |
9714 | "/// A 256-bit vector of [8 x float]. The four selected elements in this\n" |
9715 | "/// operand are copied to bits [63:0] and bits [191:128] in the destination,\n" |
9716 | "/// according to the bits specified in the immediate operand.\n" |
9717 | "/// \\param b\n" |
9718 | "/// A 256-bit vector of [8 x float]. The four selected elements in this\n" |
9719 | "/// operand are copied to bits [127:64] and bits [255:192] in the\n" |
9720 | "/// destination, according to the bits specified in the immediate operand.\n" |
9721 | "/// \\param mask\n" |
9722 | "/// An immediate value containing an 8-bit value specifying which elements to\n" |
9723 | "/// copy from \\a a and \\a b \\n.\n" |
9724 | "/// Bits [3:0] specify the values copied from operand \\a a. \\n\n" |
9725 | "/// Bits [7:4] specify the values copied from operand \\a b. \\n\n" |
9726 | "/// The destinations within the 256-bit destination are assigned values as\n" |
9727 | "/// follows, according to the bit value assignments described below: \\n\n" |
9728 | "/// Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the\n" |
9729 | "/// destination. \\n\n" |
9730 | "/// Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the\n" |
9731 | "/// destination. \\n\n" |
9732 | "/// Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the\n" |
9733 | "/// destination. \\n\n" |
9734 | "/// Bits [7:6] are used to assign values to bits [127:96] and [255:224] in\n" |
9735 | "/// the destination. \\n\n" |
9736 | "/// Bit value assignments: \\n\n" |
9737 | "/// 00: Bits [31:0] and [159:128] are copied from the selected operand. \\n\n" |
9738 | "/// 01: Bits [63:32] and [191:160] are copied from the selected operand. \\n\n" |
9739 | "/// 10: Bits [95:64] and [223:192] are copied from the selected operand. \\n\n" |
9740 | "/// 11: Bits [127:96] and [255:224] are copied from the selected operand.\n" |
9741 | "/// \\returns A 256-bit vector of [8 x float] containing the shuffled values.\n" |
9742 | "#define _mm256_shuffle_ps(a, b, mask) \\\n" |
9743 | " (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \\\n" |
9744 | " (__v8sf)(__m256)(b), (int)(mask))\n" |
9745 | "\n" |
9746 | "/// Selects four double-precision values from the 256-bit operands of\n" |
9747 | "/// [4 x double], as specified by the immediate value operand.\n" |
9748 | "///\n" |
9749 | "/// The selected elements from the first 256-bit operand are copied to bits\n" |
9750 | "/// [63:0] and bits [191:128] in the destination, and the selected elements\n" |
9751 | "/// from the second 256-bit operand are copied to bits [127:64] and bits\n" |
9752 | "/// [255:192] in the destination. For example, if bits [3:0] of the immediate\n" |
9753 | "/// operand contain a value of 0xF, the 256-bit destination vector would\n" |
9754 | "/// contain the following values: b[3], a[3], b[1], a[1].\n" |
9755 | "///\n" |
9756 | "/// \\headerfile <x86intrin.h>\n" |
9757 | "///\n" |
9758 | "/// \\code\n" |
9759 | "/// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);\n" |
9760 | "/// \\endcode\n" |
9761 | "///\n" |
9762 | "/// This intrinsic corresponds to the <c> VSHUFPD </c> instruction.\n" |
9763 | "///\n" |
9764 | "/// \\param a\n" |
9765 | "/// A 256-bit vector of [4 x double].\n" |
9766 | "/// \\param b\n" |
9767 | "/// A 256-bit vector of [4 x double].\n" |
9768 | "/// \\param mask\n" |
9769 | "/// An immediate value containing 8-bit values specifying which elements to\n" |
9770 | "/// copy from \\a a and \\a b: \\n\n" |
9771 | "/// Bit [0]=0: Bits [63:0] are copied from \\a a to bits [63:0] of the\n" |
9772 | "/// destination. \\n\n" |
9773 | "/// Bit [0]=1: Bits [127:64] are copied from \\a a to bits [63:0] of the\n" |
9774 | "/// destination. \\n\n" |
9775 | "/// Bit [1]=0: Bits [63:0] are copied from \\a b to bits [127:64] of the\n" |
9776 | "/// destination. \\n\n" |
9777 | "/// Bit [1]=1: Bits [127:64] are copied from \\a b to bits [127:64] of the\n" |
9778 | "/// destination. \\n\n" |
9779 | "/// Bit [2]=0: Bits [191:128] are copied from \\a a to bits [191:128] of the\n" |
9780 | "/// destination. \\n\n" |
9781 | "/// Bit [2]=1: Bits [255:192] are copied from \\a a to bits [191:128] of the\n" |
9782 | "/// destination. \\n\n" |
9783 | "/// Bit [3]=0: Bits [191:128] are copied from \\a b to bits [255:192] of the\n" |
9784 | "/// destination. \\n\n" |
9785 | "/// Bit [3]=1: Bits [255:192] are copied from \\a b to bits [255:192] of the\n" |
9786 | "/// destination.\n" |
9787 | "/// \\returns A 256-bit vector of [4 x double] containing the shuffled values.\n" |
9788 | "#define _mm256_shuffle_pd(a, b, mask) \\\n" |
9789 | " (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \\\n" |
9790 | " (__v4df)(__m256d)(b), (int)(mask))\n" |
9791 | "\n" |
9792 | "/* Compare */\n" |
9793 | "#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */\n" |
9794 | "#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */\n" |
9795 | "#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */\n" |
9796 | "#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */\n" |
9797 | "#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */\n" |
9798 | "#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */\n" |
9799 | "#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */\n" |
9800 | "#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */\n" |
9801 | "#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */\n" |
9802 | "#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */\n" |
9803 | "#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */\n" |
9804 | "#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */\n" |
9805 | "#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */\n" |
9806 | "#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */\n" |
9807 | "#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */\n" |
9808 | "#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */\n" |
9809 | "#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */\n" |
9810 | "#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */\n" |
9811 | "#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */\n" |
9812 | "#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */\n" |
9813 | "#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */\n" |
9814 | "#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */\n" |
9815 | "#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */\n" |
9816 | "#define _CMP_ORD_S 0x17 /* Ordered (signaling) */\n" |
9817 | "#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */\n" |
9818 | "#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */\n" |
9819 | "#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */\n" |
9820 | "#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */\n" |
9821 | "#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */\n" |
9822 | "#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */\n" |
9823 | "#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */\n" |
9824 | "#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */\n" |
9825 | "\n" |
9826 | "/// Compares each of the corresponding double-precision values of two\n" |
9827 | "/// 128-bit vectors of [2 x double], using the operation specified by the\n" |
9828 | "/// immediate integer operand.\n" |
9829 | "///\n" |
9830 | "/// Returns a [2 x double] vector consisting of two doubles corresponding to\n" |
9831 | "/// the two comparison results: zero if the comparison is false, and all 1's\n" |
9832 | "/// if the comparison is true.\n" |
9833 | "///\n" |
9834 | "/// \\headerfile <x86intrin.h>\n" |
9835 | "///\n" |
9836 | "/// \\code\n" |
9837 | "/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);\n" |
9838 | "/// \\endcode\n" |
9839 | "///\n" |
9840 | "/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n" |
9841 | "///\n" |
9842 | "/// \\param a\n" |
9843 | "/// A 128-bit vector of [2 x double].\n" |
9844 | "/// \\param b\n" |
9845 | "/// A 128-bit vector of [2 x double].\n" |
9846 | "/// \\param c\n" |
9847 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
9848 | "/// operation to use: \\n\n" |
9849 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
9850 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
9851 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
9852 | "/// 0x03: Unordered (non-signaling) \\n\n" |
9853 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
9854 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
9855 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
9856 | "/// 0x07: Ordered (non-signaling) \\n\n" |
9857 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
9858 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
9859 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
9860 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
9861 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
9862 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
9863 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
9864 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
9865 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
9866 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
9867 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
9868 | "/// 0x13: Unordered (signaling) \\n\n" |
9869 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
9870 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
9871 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
9872 | "/// 0x17: Ordered (signaling) \\n\n" |
9873 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
9874 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
9875 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
9876 | "/// 0x1B: False (ordered, signaling) \\n\n" |
9877 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
9878 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
9879 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
9880 | "/// 0x1F: True (unordered, signaling)\n" |
9881 | "/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n" |
9882 | "#define _mm_cmp_pd(a, b, c) \\\n" |
9883 | " (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \\\n" |
9884 | " (__v2df)(__m128d)(b), (c))\n" |
9885 | "\n" |
9886 | "/// Compares each of the corresponding values of two 128-bit vectors of\n" |
9887 | "/// [4 x float], using the operation specified by the immediate integer\n" |
9888 | "/// operand.\n" |
9889 | "///\n" |
9890 | "/// Returns a [4 x float] vector consisting of four floats corresponding to\n" |
9891 | "/// the four comparison results: zero if the comparison is false, and all 1's\n" |
9892 | "/// if the comparison is true.\n" |
9893 | "///\n" |
9894 | "/// \\headerfile <x86intrin.h>\n" |
9895 | "///\n" |
9896 | "/// \\code\n" |
9897 | "/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);\n" |
9898 | "/// \\endcode\n" |
9899 | "///\n" |
9900 | "/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n" |
9901 | "///\n" |
9902 | "/// \\param a\n" |
9903 | "/// A 128-bit vector of [4 x float].\n" |
9904 | "/// \\param b\n" |
9905 | "/// A 128-bit vector of [4 x float].\n" |
9906 | "/// \\param c\n" |
9907 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
9908 | "/// operation to use: \\n\n" |
9909 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
9910 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
9911 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
9912 | "/// 0x03: Unordered (non-signaling) \\n\n" |
9913 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
9914 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
9915 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
9916 | "/// 0x07: Ordered (non-signaling) \\n\n" |
9917 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
9918 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
9919 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
9920 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
9921 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
9922 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
9923 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
9924 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
9925 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
9926 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
9927 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
9928 | "/// 0x13: Unordered (signaling) \\n\n" |
9929 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
9930 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
9931 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
9932 | "/// 0x17: Ordered (signaling) \\n\n" |
9933 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
9934 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
9935 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
9936 | "/// 0x1B: False (ordered, signaling) \\n\n" |
9937 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
9938 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
9939 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
9940 | "/// 0x1F: True (unordered, signaling)\n" |
9941 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
9942 | "#define _mm_cmp_ps(a, b, c) \\\n" |
9943 | " (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \\\n" |
9944 | " (__v4sf)(__m128)(b), (c))\n" |
9945 | "\n" |
9946 | "/// Compares each of the corresponding double-precision values of two\n" |
9947 | "/// 256-bit vectors of [4 x double], using the operation specified by the\n" |
9948 | "/// immediate integer operand.\n" |
9949 | "///\n" |
9950 | "/// Returns a [4 x double] vector consisting of four doubles corresponding to\n" |
9951 | "/// the four comparison results: zero if the comparison is false, and all 1's\n" |
9952 | "/// if the comparison is true.\n" |
9953 | "///\n" |
9954 | "/// \\headerfile <x86intrin.h>\n" |
9955 | "///\n" |
9956 | "/// \\code\n" |
9957 | "/// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);\n" |
9958 | "/// \\endcode\n" |
9959 | "///\n" |
9960 | "/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n" |
9961 | "///\n" |
9962 | "/// \\param a\n" |
9963 | "/// A 256-bit vector of [4 x double].\n" |
9964 | "/// \\param b\n" |
9965 | "/// A 256-bit vector of [4 x double].\n" |
9966 | "/// \\param c\n" |
9967 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
9968 | "/// operation to use: \\n\n" |
9969 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
9970 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
9971 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
9972 | "/// 0x03: Unordered (non-signaling) \\n\n" |
9973 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
9974 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
9975 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
9976 | "/// 0x07: Ordered (non-signaling) \\n\n" |
9977 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
9978 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
9979 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
9980 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
9981 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
9982 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
9983 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
9984 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
9985 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
9986 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
9987 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
9988 | "/// 0x13: Unordered (signaling) \\n\n" |
9989 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
9990 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
9991 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
9992 | "/// 0x17: Ordered (signaling) \\n\n" |
9993 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
9994 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
9995 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
9996 | "/// 0x1B: False (ordered, signaling) \\n\n" |
9997 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
9998 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
9999 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
10000 | "/// 0x1F: True (unordered, signaling)\n" |
10001 | "/// \\returns A 256-bit vector of [4 x double] containing the comparison results.\n" |
10002 | "#define _mm256_cmp_pd(a, b, c) \\\n" |
10003 | " (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \\\n" |
10004 | " (__v4df)(__m256d)(b), (c))\n" |
10005 | "\n" |
10006 | "/// Compares each of the corresponding values of two 256-bit vectors of\n" |
10007 | "/// [8 x float], using the operation specified by the immediate integer\n" |
10008 | "/// operand.\n" |
10009 | "///\n" |
10010 | "/// Returns a [8 x float] vector consisting of eight floats corresponding to\n" |
10011 | "/// the eight comparison results: zero if the comparison is false, and all\n" |
10012 | "/// 1's if the comparison is true.\n" |
10013 | "///\n" |
10014 | "/// \\headerfile <x86intrin.h>\n" |
10015 | "///\n" |
10016 | "/// \\code\n" |
10017 | "/// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);\n" |
10018 | "/// \\endcode\n" |
10019 | "///\n" |
10020 | "/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n" |
10021 | "///\n" |
10022 | "/// \\param a\n" |
10023 | "/// A 256-bit vector of [8 x float].\n" |
10024 | "/// \\param b\n" |
10025 | "/// A 256-bit vector of [8 x float].\n" |
10026 | "/// \\param c\n" |
10027 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
10028 | "/// operation to use: \\n\n" |
10029 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
10030 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
10031 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
10032 | "/// 0x03: Unordered (non-signaling) \\n\n" |
10033 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
10034 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
10035 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
10036 | "/// 0x07: Ordered (non-signaling) \\n\n" |
10037 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
10038 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
10039 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
10040 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
10041 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
10042 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
10043 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
10044 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
10045 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
10046 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
10047 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
10048 | "/// 0x13: Unordered (signaling) \\n\n" |
10049 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
10050 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
10051 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
10052 | "/// 0x17: Ordered (signaling) \\n\n" |
10053 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
10054 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
10055 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
10056 | "/// 0x1B: False (ordered, signaling) \\n\n" |
10057 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
10058 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
10059 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
10060 | "/// 0x1F: True (unordered, signaling)\n" |
10061 | "/// \\returns A 256-bit vector of [8 x float] containing the comparison results.\n" |
10062 | "#define _mm256_cmp_ps(a, b, c) \\\n" |
10063 | " (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \\\n" |
10064 | " (__v8sf)(__m256)(b), (c))\n" |
10065 | "\n" |
10066 | "/// Compares each of the corresponding scalar double-precision values of\n" |
10067 | "/// two 128-bit vectors of [2 x double], using the operation specified by the\n" |
10068 | "/// immediate integer operand.\n" |
10069 | "///\n" |
10070 | "/// If the result is true, all 64 bits of the destination vector are set;\n" |
10071 | "/// otherwise they are cleared.\n" |
10072 | "///\n" |
10073 | "/// \\headerfile <x86intrin.h>\n" |
10074 | "///\n" |
10075 | "/// \\code\n" |
10076 | "/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);\n" |
10077 | "/// \\endcode\n" |
10078 | "///\n" |
10079 | "/// This intrinsic corresponds to the <c> VCMPSD </c> instruction.\n" |
10080 | "///\n" |
10081 | "/// \\param a\n" |
10082 | "/// A 128-bit vector of [2 x double].\n" |
10083 | "/// \\param b\n" |
10084 | "/// A 128-bit vector of [2 x double].\n" |
10085 | "/// \\param c\n" |
10086 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
10087 | "/// operation to use: \\n\n" |
10088 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
10089 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
10090 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
10091 | "/// 0x03: Unordered (non-signaling) \\n\n" |
10092 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
10093 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
10094 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
10095 | "/// 0x07: Ordered (non-signaling) \\n\n" |
10096 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
10097 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
10098 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
10099 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
10100 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
10101 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
10102 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
10103 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
10104 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
10105 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
10106 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
10107 | "/// 0x13: Unordered (signaling) \\n\n" |
10108 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
10109 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
10110 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
10111 | "/// 0x17: Ordered (signaling) \\n\n" |
10112 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
10113 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
10114 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
10115 | "/// 0x1B: False (ordered, signaling) \\n\n" |
10116 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
10117 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
10118 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
10119 | "/// 0x1F: True (unordered, signaling)\n" |
10120 | "/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n" |
10121 | "#define _mm_cmp_sd(a, b, c) \\\n" |
10122 | " (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \\\n" |
10123 | " (__v2df)(__m128d)(b), (c))\n" |
10124 | "\n" |
10125 | "/// Compares each of the corresponding scalar values of two 128-bit\n" |
10126 | "/// vectors of [4 x float], using the operation specified by the immediate\n" |
10127 | "/// integer operand.\n" |
10128 | "///\n" |
10129 | "/// If the result is true, all 32 bits of the destination vector are set;\n" |
10130 | "/// otherwise they are cleared.\n" |
10131 | "///\n" |
10132 | "/// \\headerfile <x86intrin.h>\n" |
10133 | "///\n" |
10134 | "/// \\code\n" |
10135 | "/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);\n" |
10136 | "/// \\endcode\n" |
10137 | "///\n" |
10138 | "/// This intrinsic corresponds to the <c> VCMPSS </c> instruction.\n" |
10139 | "///\n" |
10140 | "/// \\param a\n" |
10141 | "/// A 128-bit vector of [4 x float].\n" |
10142 | "/// \\param b\n" |
10143 | "/// A 128-bit vector of [4 x float].\n" |
10144 | "/// \\param c\n" |
10145 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
10146 | "/// operation to use: \\n\n" |
10147 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
10148 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
10149 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
10150 | "/// 0x03: Unordered (non-signaling) \\n\n" |
10151 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
10152 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
10153 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
10154 | "/// 0x07: Ordered (non-signaling) \\n\n" |
10155 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
10156 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
10157 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
10158 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
10159 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
10160 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
10161 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
10162 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
10163 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
10164 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
10165 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
10166 | "/// 0x13: Unordered (signaling) \\n\n" |
10167 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
10168 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
10169 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
10170 | "/// 0x17: Ordered (signaling) \\n\n" |
10171 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
10172 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
10173 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
10174 | "/// 0x1B: False (ordered, signaling) \\n\n" |
10175 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
10176 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
10177 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
10178 | "/// 0x1F: True (unordered, signaling)\n" |
10179 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
10180 | "#define _mm_cmp_ss(a, b, c) \\\n" |
10181 | " (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \\\n" |
10182 | " (__v4sf)(__m128)(b), (c))\n" |
10183 | "\n" |
10184 | "/// Takes a [8 x i32] vector and returns the vector element value\n" |
10185 | "/// indexed by the immediate constant operand.\n" |
10186 | "///\n" |
10187 | "/// \\headerfile <x86intrin.h>\n" |
10188 | "///\n" |
10189 | "/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n" |
10190 | "/// instruction.\n" |
10191 | "///\n" |
10192 | "/// \\param __a\n" |
10193 | "/// A 256-bit vector of [8 x i32].\n" |
10194 | "/// \\param __imm\n" |
10195 | "/// An immediate integer operand with bits [2:0] determining which vector\n" |
10196 | "/// element is extracted and returned.\n" |
10197 | "/// \\returns A 32-bit integer containing the extracted 32 bits of extended\n" |
10198 | "/// packed data.\n" |
10199 | "#define _mm256_extract_epi32(X, N) \\\n" |
10200 | " (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))\n" |
10201 | "\n" |
10202 | "/// Takes a [16 x i16] vector and returns the vector element value\n" |
10203 | "/// indexed by the immediate constant operand.\n" |
10204 | "///\n" |
10205 | "/// \\headerfile <x86intrin.h>\n" |
10206 | "///\n" |
10207 | "/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n" |
10208 | "/// instruction.\n" |
10209 | "///\n" |
10210 | "/// \\param __a\n" |
10211 | "/// A 256-bit integer vector of [16 x i16].\n" |
10212 | "/// \\param __imm\n" |
10213 | "/// An immediate integer operand with bits [3:0] determining which vector\n" |
10214 | "/// element is extracted and returned.\n" |
10215 | "/// \\returns A 32-bit integer containing the extracted 16 bits of zero extended\n" |
10216 | "/// packed data.\n" |
10217 | "#define _mm256_extract_epi16(X, N) \\\n" |
10218 | " (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \\\n" |
10219 | " (int)(N))\n" |
10220 | "\n" |
10221 | "/// Takes a [32 x i8] vector and returns the vector element value\n" |
10222 | "/// indexed by the immediate constant operand.\n" |
10223 | "///\n" |
10224 | "/// \\headerfile <x86intrin.h>\n" |
10225 | "///\n" |
10226 | "/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n" |
10227 | "/// instruction.\n" |
10228 | "///\n" |
10229 | "/// \\param __a\n" |
10230 | "/// A 256-bit integer vector of [32 x i8].\n" |
10231 | "/// \\param __imm\n" |
10232 | "/// An immediate integer operand with bits [4:0] determining which vector\n" |
10233 | "/// element is extracted and returned.\n" |
10234 | "/// \\returns A 32-bit integer containing the extracted 8 bits of zero extended\n" |
10235 | "/// packed data.\n" |
10236 | "#define _mm256_extract_epi8(X, N) \\\n" |
10237 | " (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \\\n" |
10238 | " (int)(N))\n" |
10239 | "\n" |
10240 | "#ifdef __x86_64__\n" |
10241 | "/// Takes a [4 x i64] vector and returns the vector element value\n" |
10242 | "/// indexed by the immediate constant operand.\n" |
10243 | "///\n" |
10244 | "/// \\headerfile <x86intrin.h>\n" |
10245 | "///\n" |
10246 | "/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n" |
10247 | "/// instruction.\n" |
10248 | "///\n" |
10249 | "/// \\param __a\n" |
10250 | "/// A 256-bit integer vector of [4 x i64].\n" |
10251 | "/// \\param __imm\n" |
10252 | "/// An immediate integer operand with bits [1:0] determining which vector\n" |
10253 | "/// element is extracted and returned.\n" |
10254 | "/// \\returns A 64-bit integer containing the extracted 64 bits of extended\n" |
10255 | "/// packed data.\n" |
10256 | "#define _mm256_extract_epi64(X, N) \\\n" |
10257 | " (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))\n" |
10258 | "#endif\n" |
10259 | "\n" |
10260 | "/// Takes a [8 x i32] vector and replaces the vector element value\n" |
10261 | "/// indexed by the immediate constant operand by a new value. Returns the\n" |
10262 | "/// modified vector.\n" |
10263 | "///\n" |
10264 | "/// \\headerfile <x86intrin.h>\n" |
10265 | "///\n" |
10266 | "/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n" |
10267 | "/// instruction.\n" |
10268 | "///\n" |
10269 | "/// \\param __a\n" |
10270 | "/// A vector of [8 x i32] to be used by the insert operation.\n" |
10271 | "/// \\param __b\n" |
10272 | "/// An integer value. The replacement value for the insert operation.\n" |
10273 | "/// \\param __imm\n" |
10274 | "/// An immediate integer specifying the index of the vector element to be\n" |
10275 | "/// replaced.\n" |
10276 | "/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n" |
10277 | "/// \\a __imm with \\a __b.\n" |
10278 | "#define _mm256_insert_epi32(X, I, N) \\\n" |
10279 | " (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \\\n" |
10280 | " (int)(I), (int)(N))\n" |
10281 | "\n" |
10282 | "\n" |
10283 | "/// Takes a [16 x i16] vector and replaces the vector element value\n" |
10284 | "/// indexed by the immediate constant operand with a new value. Returns the\n" |
10285 | "/// modified vector.\n" |
10286 | "///\n" |
10287 | "/// \\headerfile <x86intrin.h>\n" |
10288 | "///\n" |
10289 | "/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n" |
10290 | "/// instruction.\n" |
10291 | "///\n" |
10292 | "/// \\param __a\n" |
10293 | "/// A vector of [16 x i16] to be used by the insert operation.\n" |
10294 | "/// \\param __b\n" |
10295 | "/// An i16 integer value. The replacement value for the insert operation.\n" |
10296 | "/// \\param __imm\n" |
10297 | "/// An immediate integer specifying the index of the vector element to be\n" |
10298 | "/// replaced.\n" |
10299 | "/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n" |
10300 | "/// \\a __imm with \\a __b.\n" |
10301 | "#define _mm256_insert_epi16(X, I, N) \\\n" |
10302 | " (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \\\n" |
10303 | " (int)(I), (int)(N))\n" |
10304 | "\n" |
10305 | "/// Takes a [32 x i8] vector and replaces the vector element value\n" |
10306 | "/// indexed by the immediate constant operand with a new value. Returns the\n" |
10307 | "/// modified vector.\n" |
10308 | "///\n" |
10309 | "/// \\headerfile <x86intrin.h>\n" |
10310 | "///\n" |
10311 | "/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n" |
10312 | "/// instruction.\n" |
10313 | "///\n" |
10314 | "/// \\param __a\n" |
10315 | "/// A vector of [32 x i8] to be used by the insert operation.\n" |
10316 | "/// \\param __b\n" |
10317 | "/// An i8 integer value. The replacement value for the insert operation.\n" |
10318 | "/// \\param __imm\n" |
10319 | "/// An immediate integer specifying the index of the vector element to be\n" |
10320 | "/// replaced.\n" |
10321 | "/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n" |
10322 | "/// \\a __imm with \\a __b.\n" |
10323 | "#define _mm256_insert_epi8(X, I, N) \\\n" |
10324 | " (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \\\n" |
10325 | " (int)(I), (int)(N))\n" |
10326 | "\n" |
10327 | "#ifdef __x86_64__\n" |
10328 | "/// Takes a [4 x i64] vector and replaces the vector element value\n" |
10329 | "/// indexed by the immediate constant operand with a new value. Returns the\n" |
10330 | "/// modified vector.\n" |
10331 | "///\n" |
10332 | "/// \\headerfile <x86intrin.h>\n" |
10333 | "///\n" |
10334 | "/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n" |
10335 | "/// instruction.\n" |
10336 | "///\n" |
10337 | "/// \\param __a\n" |
10338 | "/// A vector of [4 x i64] to be used by the insert operation.\n" |
10339 | "/// \\param __b\n" |
10340 | "/// A 64-bit integer value. The replacement value for the insert operation.\n" |
10341 | "/// \\param __imm\n" |
10342 | "/// An immediate integer specifying the index of the vector element to be\n" |
10343 | "/// replaced.\n" |
10344 | "/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n" |
10345 | "/// \\a __imm with \\a __b.\n" |
10346 | "#define _mm256_insert_epi64(X, I, N) \\\n" |
10347 | " (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \\\n" |
10348 | " (long long)(I), (int)(N))\n" |
10349 | "#endif\n" |
10350 | "\n" |
10351 | "/* Conversion */\n" |
10352 | "/// Converts a vector of [4 x i32] into a vector of [4 x double].\n" |
10353 | "///\n" |
10354 | "/// \\headerfile <x86intrin.h>\n" |
10355 | "///\n" |
10356 | "/// This intrinsic corresponds to the <c> VCVTDQ2PD </c> instruction.\n" |
10357 | "///\n" |
10358 | "/// \\param __a\n" |
10359 | "/// A 128-bit integer vector of [4 x i32].\n" |
10360 | "/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n" |
10361 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
10362 | "_mm256_cvtepi32_pd(__m128i __a)\n" |
10363 | "{\n" |
10364 | " return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);\n" |
10365 | "}\n" |
10366 | "\n" |
10367 | "/// Converts a vector of [8 x i32] into a vector of [8 x float].\n" |
10368 | "///\n" |
10369 | "/// \\headerfile <x86intrin.h>\n" |
10370 | "///\n" |
10371 | "/// This intrinsic corresponds to the <c> VCVTDQ2PS </c> instruction.\n" |
10372 | "///\n" |
10373 | "/// \\param __a\n" |
10374 | "/// A 256-bit integer vector.\n" |
10375 | "/// \\returns A 256-bit vector of [8 x float] containing the converted values.\n" |
10376 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
10377 | "_mm256_cvtepi32_ps(__m256i __a)\n" |
10378 | "{\n" |
10379 | " return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);\n" |
10380 | "}\n" |
10381 | "\n" |
10382 | "/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of\n" |
10383 | "/// [4 x float].\n" |
10384 | "///\n" |
10385 | "/// \\headerfile <x86intrin.h>\n" |
10386 | "///\n" |
10387 | "/// This intrinsic corresponds to the <c> VCVTPD2PS </c> instruction.\n" |
10388 | "///\n" |
10389 | "/// \\param __a\n" |
10390 | "/// A 256-bit vector of [4 x double].\n" |
10391 | "/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n" |
10392 | "static __inline __m128 __DEFAULT_FN_ATTRS\n" |
10393 | "_mm256_cvtpd_ps(__m256d __a)\n" |
10394 | "{\n" |
10395 | " return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);\n" |
10396 | "}\n" |
10397 | "\n" |
10398 | "/// Converts a vector of [8 x float] into a vector of [8 x i32].\n" |
10399 | "///\n" |
10400 | "/// \\headerfile <x86intrin.h>\n" |
10401 | "///\n" |
10402 | "/// This intrinsic corresponds to the <c> VCVTPS2DQ </c> instruction.\n" |
10403 | "///\n" |
10404 | "/// \\param __a\n" |
10405 | "/// A 256-bit vector of [8 x float].\n" |
10406 | "/// \\returns A 256-bit integer vector containing the converted values.\n" |
10407 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
10408 | "_mm256_cvtps_epi32(__m256 __a)\n" |
10409 | "{\n" |
10410 | " return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);\n" |
10411 | "}\n" |
10412 | "\n" |
10413 | "/// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4\n" |
10414 | "/// x double].\n" |
10415 | "///\n" |
10416 | "/// \\headerfile <x86intrin.h>\n" |
10417 | "///\n" |
10418 | "/// This intrinsic corresponds to the <c> VCVTPS2PD </c> instruction.\n" |
10419 | "///\n" |
10420 | "/// \\param __a\n" |
10421 | "/// A 128-bit vector of [4 x float].\n" |
10422 | "/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n" |
10423 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
10424 | "_mm256_cvtps_pd(__m128 __a)\n" |
10425 | "{\n" |
10426 | " return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);\n" |
10427 | "}\n" |
10428 | "\n" |
10429 | "/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n" |
10430 | "/// x i32], truncating the result by rounding towards zero when it is\n" |
10431 | "/// inexact.\n" |
10432 | "///\n" |
10433 | "/// \\headerfile <x86intrin.h>\n" |
10434 | "///\n" |
10435 | "/// This intrinsic corresponds to the <c> VCVTTPD2DQ </c> instruction.\n" |
10436 | "///\n" |
10437 | "/// \\param __a\n" |
10438 | "/// A 256-bit vector of [4 x double].\n" |
10439 | "/// \\returns A 128-bit integer vector containing the converted values.\n" |
10440 | "static __inline __m128i __DEFAULT_FN_ATTRS\n" |
10441 | "_mm256_cvttpd_epi32(__m256d __a)\n" |
10442 | "{\n" |
10443 | " return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);\n" |
10444 | "}\n" |
10445 | "\n" |
10446 | "/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n" |
10447 | "/// x i32]. When a conversion is inexact, the value returned is rounded\n" |
10448 | "/// according to the rounding control bits in the MXCSR register.\n" |
10449 | "///\n" |
10450 | "/// \\headerfile <x86intrin.h>\n" |
10451 | "///\n" |
10452 | "/// This intrinsic corresponds to the <c> VCVTPD2DQ </c> instruction.\n" |
10453 | "///\n" |
10454 | "/// \\param __a\n" |
10455 | "/// A 256-bit vector of [4 x double].\n" |
10456 | "/// \\returns A 128-bit integer vector containing the converted values.\n" |
10457 | "static __inline __m128i __DEFAULT_FN_ATTRS\n" |
10458 | "_mm256_cvtpd_epi32(__m256d __a)\n" |
10459 | "{\n" |
10460 | " return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);\n" |
10461 | "}\n" |
10462 | "\n" |
10463 | "/// Converts a vector of [8 x float] into a vector of [8 x i32],\n" |
10464 | "/// truncating the result by rounding towards zero when it is inexact.\n" |
10465 | "///\n" |
10466 | "/// \\headerfile <x86intrin.h>\n" |
10467 | "///\n" |
10468 | "/// This intrinsic corresponds to the <c> VCVTTPS2DQ </c> instruction.\n" |
10469 | "///\n" |
10470 | "/// \\param __a\n" |
10471 | "/// A 256-bit vector of [8 x float].\n" |
10472 | "/// \\returns A 256-bit integer vector containing the converted values.\n" |
10473 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
10474 | "_mm256_cvttps_epi32(__m256 __a)\n" |
10475 | "{\n" |
10476 | " return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);\n" |
10477 | "}\n" |
10478 | "\n" |
10479 | "/// Returns the first element of the input vector of [4 x double].\n" |
10480 | "///\n" |
10481 | "/// \\headerfile <avxintrin.h>\n" |
10482 | "///\n" |
10483 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
10484 | "/// instruction.\n" |
10485 | "///\n" |
10486 | "/// \\param __a\n" |
10487 | "/// A 256-bit vector of [4 x double].\n" |
10488 | "/// \\returns A 64 bit double containing the first element of the input vector.\n" |
10489 | "static __inline double __DEFAULT_FN_ATTRS\n" |
10490 | "_mm256_cvtsd_f64(__m256d __a)\n" |
10491 | "{\n" |
10492 | " return __a[0];\n" |
10493 | "}\n" |
10494 | "\n" |
10495 | "/// Returns the first element of the input vector of [8 x i32].\n" |
10496 | "///\n" |
10497 | "/// \\headerfile <avxintrin.h>\n" |
10498 | "///\n" |
10499 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
10500 | "/// instruction.\n" |
10501 | "///\n" |
10502 | "/// \\param __a\n" |
10503 | "/// A 256-bit vector of [8 x i32].\n" |
10504 | "/// \\returns A 32 bit integer containing the first element of the input vector.\n" |
10505 | "static __inline int __DEFAULT_FN_ATTRS\n" |
10506 | "_mm256_cvtsi256_si32(__m256i __a)\n" |
10507 | "{\n" |
10508 | " __v8si __b = (__v8si)__a;\n" |
10509 | " return __b[0];\n" |
10510 | "}\n" |
10511 | "\n" |
10512 | "/// Returns the first element of the input vector of [8 x float].\n" |
10513 | "///\n" |
10514 | "/// \\headerfile <avxintrin.h>\n" |
10515 | "///\n" |
10516 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
10517 | "/// instruction.\n" |
10518 | "///\n" |
10519 | "/// \\param __a\n" |
10520 | "/// A 256-bit vector of [8 x float].\n" |
10521 | "/// \\returns A 32 bit float containing the first element of the input vector.\n" |
10522 | "static __inline float __DEFAULT_FN_ATTRS\n" |
10523 | "_mm256_cvtss_f32(__m256 __a)\n" |
10524 | "{\n" |
10525 | " return __a[0];\n" |
10526 | "}\n" |
10527 | "\n" |
10528 | "/* Vector replicate */\n" |
10529 | "/// Moves and duplicates odd-indexed values from a 256-bit vector of\n" |
10530 | "/// [8 x float] to float values in a 256-bit vector of [8 x float].\n" |
10531 | "///\n" |
10532 | "/// \\headerfile <x86intrin.h>\n" |
10533 | "///\n" |
10534 | "/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n" |
10535 | "///\n" |
10536 | "/// \\param __a\n" |
10537 | "/// A 256-bit vector of [8 x float]. \\n\n" |
10538 | "/// Bits [255:224] of \\a __a are written to bits [255:224] and [223:192] of\n" |
10539 | "/// the return value. \\n\n" |
10540 | "/// Bits [191:160] of \\a __a are written to bits [191:160] and [159:128] of\n" |
10541 | "/// the return value. \\n\n" |
10542 | "/// Bits [127:96] of \\a __a are written to bits [127:96] and [95:64] of the\n" |
10543 | "/// return value. \\n\n" |
10544 | "/// Bits [63:32] of \\a __a are written to bits [63:32] and [31:0] of the\n" |
10545 | "/// return value.\n" |
10546 | "/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n" |
10547 | "/// values.\n" |
10548 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
10549 | "_mm256_movehdup_ps(__m256 __a)\n" |
10550 | "{\n" |
10551 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);\n" |
10552 | "}\n" |
10553 | "\n" |
10554 | "/// Moves and duplicates even-indexed values from a 256-bit vector of\n" |
10555 | "/// [8 x float] to float values in a 256-bit vector of [8 x float].\n" |
10556 | "///\n" |
10557 | "/// \\headerfile <x86intrin.h>\n" |
10558 | "///\n" |
10559 | "/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n" |
10560 | "///\n" |
10561 | "/// \\param __a\n" |
10562 | "/// A 256-bit vector of [8 x float]. \\n\n" |
10563 | "/// Bits [223:192] of \\a __a are written to bits [255:224] and [223:192] of\n" |
10564 | "/// the return value. \\n\n" |
10565 | "/// Bits [159:128] of \\a __a are written to bits [191:160] and [159:128] of\n" |
10566 | "/// the return value. \\n\n" |
10567 | "/// Bits [95:64] of \\a __a are written to bits [127:96] and [95:64] of the\n" |
10568 | "/// return value. \\n\n" |
10569 | "/// Bits [31:0] of \\a __a are written to bits [63:32] and [31:0] of the\n" |
10570 | "/// return value.\n" |
10571 | "/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n" |
10572 | "/// values.\n" |
10573 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
10574 | "_mm256_moveldup_ps(__m256 __a)\n" |
10575 | "{\n" |
10576 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);\n" |
10577 | "}\n" |
10578 | "\n" |
10579 | "/// Moves and duplicates double-precision floating point values from a\n" |
10580 | "/// 256-bit vector of [4 x double] to double-precision values in a 256-bit\n" |
10581 | "/// vector of [4 x double].\n" |
10582 | "///\n" |
10583 | "/// \\headerfile <x86intrin.h>\n" |
10584 | "///\n" |
10585 | "/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n" |
10586 | "///\n" |
10587 | "/// \\param __a\n" |
10588 | "/// A 256-bit vector of [4 x double]. \\n\n" |
10589 | "/// Bits [63:0] of \\a __a are written to bits [127:64] and [63:0] of the\n" |
10590 | "/// return value. \\n\n" |
10591 | "/// Bits [191:128] of \\a __a are written to bits [255:192] and [191:128] of\n" |
10592 | "/// the return value.\n" |
10593 | "/// \\returns A 256-bit vector of [4 x double] containing the moved and\n" |
10594 | "/// duplicated values.\n" |
10595 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
10596 | "_mm256_movedup_pd(__m256d __a)\n" |
10597 | "{\n" |
10598 | " return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);\n" |
10599 | "}\n" |
10600 | "\n" |
10601 | "/* Unpack and Interleave */\n" |
10602 | "/// Unpacks the odd-indexed vector elements from two 256-bit vectors of\n" |
10603 | "/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n" |
10604 | "///\n" |
10605 | "/// \\headerfile <x86intrin.h>\n" |
10606 | "///\n" |
10607 | "/// This intrinsic corresponds to the <c> VUNPCKHPD </c> instruction.\n" |
10608 | "///\n" |
10609 | "/// \\param __a\n" |
10610 | "/// A 256-bit floating-point vector of [4 x double]. \\n\n" |
10611 | "/// Bits [127:64] are written to bits [63:0] of the return value. \\n\n" |
10612 | "/// Bits [255:192] are written to bits [191:128] of the return value. \\n\n" |
10613 | "/// \\param __b\n" |
10614 | "/// A 256-bit floating-point vector of [4 x double]. \\n\n" |
10615 | "/// Bits [127:64] are written to bits [127:64] of the return value. \\n\n" |
10616 | "/// Bits [255:192] are written to bits [255:192] of the return value. \\n\n" |
10617 | "/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n" |
10618 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
10619 | "_mm256_unpackhi_pd(__m256d __a, __m256d __b)\n" |
10620 | "{\n" |
10621 | " return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);\n" |
10622 | "}\n" |
10623 | "\n" |
10624 | "/// Unpacks the even-indexed vector elements from two 256-bit vectors of\n" |
10625 | "/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n" |
10626 | "///\n" |
10627 | "/// \\headerfile <x86intrin.h>\n" |
10628 | "///\n" |
10629 | "/// This intrinsic corresponds to the <c> VUNPCKLPD </c> instruction.\n" |
10630 | "///\n" |
10631 | "/// \\param __a\n" |
10632 | "/// A 256-bit floating-point vector of [4 x double]. \\n\n" |
10633 | "/// Bits [63:0] are written to bits [63:0] of the return value. \\n\n" |
10634 | "/// Bits [191:128] are written to bits [191:128] of the return value.\n" |
10635 | "/// \\param __b\n" |
10636 | "/// A 256-bit floating-point vector of [4 x double]. \\n\n" |
10637 | "/// Bits [63:0] are written to bits [127:64] of the return value. \\n\n" |
10638 | "/// Bits [191:128] are written to bits [255:192] of the return value. \\n\n" |
10639 | "/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n" |
10640 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
10641 | "_mm256_unpacklo_pd(__m256d __a, __m256d __b)\n" |
10642 | "{\n" |
10643 | " return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);\n" |
10644 | "}\n" |
10645 | "\n" |
10646 | "/// Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the\n" |
10647 | "/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n" |
10648 | "/// vector of [8 x float].\n" |
10649 | "///\n" |
10650 | "/// \\headerfile <x86intrin.h>\n" |
10651 | "///\n" |
10652 | "/// This intrinsic corresponds to the <c> VUNPCKHPS </c> instruction.\n" |
10653 | "///\n" |
10654 | "/// \\param __a\n" |
10655 | "/// A 256-bit vector of [8 x float]. \\n\n" |
10656 | "/// Bits [95:64] are written to bits [31:0] of the return value. \\n\n" |
10657 | "/// Bits [127:96] are written to bits [95:64] of the return value. \\n\n" |
10658 | "/// Bits [223:192] are written to bits [159:128] of the return value. \\n\n" |
10659 | "/// Bits [255:224] are written to bits [223:192] of the return value.\n" |
10660 | "/// \\param __b\n" |
10661 | "/// A 256-bit vector of [8 x float]. \\n\n" |
10662 | "/// Bits [95:64] are written to bits [63:32] of the return value. \\n\n" |
10663 | "/// Bits [127:96] are written to bits [127:96] of the return value. \\n\n" |
10664 | "/// Bits [223:192] are written to bits [191:160] of the return value. \\n\n" |
10665 | "/// Bits [255:224] are written to bits [255:224] of the return value.\n" |
10666 | "/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n" |
10667 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
10668 | "_mm256_unpackhi_ps(__m256 __a, __m256 __b)\n" |
10669 | "{\n" |
10670 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);\n" |
10671 | "}\n" |
10672 | "\n" |
10673 | "/// Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the\n" |
10674 | "/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n" |
10675 | "/// vector of [8 x float].\n" |
10676 | "///\n" |
10677 | "/// \\headerfile <x86intrin.h>\n" |
10678 | "///\n" |
10679 | "/// This intrinsic corresponds to the <c> VUNPCKLPS </c> instruction.\n" |
10680 | "///\n" |
10681 | "/// \\param __a\n" |
10682 | "/// A 256-bit vector of [8 x float]. \\n\n" |
10683 | "/// Bits [31:0] are written to bits [31:0] of the return value. \\n\n" |
10684 | "/// Bits [63:32] are written to bits [95:64] of the return value. \\n\n" |
10685 | "/// Bits [159:128] are written to bits [159:128] of the return value. \\n\n" |
10686 | "/// Bits [191:160] are written to bits [223:192] of the return value.\n" |
10687 | "/// \\param __b\n" |
10688 | "/// A 256-bit vector of [8 x float]. \\n\n" |
10689 | "/// Bits [31:0] are written to bits [63:32] of the return value. \\n\n" |
10690 | "/// Bits [63:32] are written to bits [127:96] of the return value. \\n\n" |
10691 | "/// Bits [159:128] are written to bits [191:160] of the return value. \\n\n" |
10692 | "/// Bits [191:160] are written to bits [255:224] of the return value.\n" |
10693 | "/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n" |
10694 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
10695 | "_mm256_unpacklo_ps(__m256 __a, __m256 __b)\n" |
10696 | "{\n" |
10697 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);\n" |
10698 | "}\n" |
10699 | "\n" |
10700 | "/* Bit Test */\n" |
10701 | "/// Given two 128-bit floating-point vectors of [2 x double], perform an\n" |
10702 | "/// element-by-element comparison of the double-precision element in the\n" |
10703 | "/// first source vector and the corresponding element in the second source\n" |
10704 | "/// vector.\n" |
10705 | "///\n" |
10706 | "/// The EFLAGS register is updated as follows: \\n\n" |
10707 | "/// If there is at least one pair of double-precision elements where the\n" |
10708 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10709 | "/// ZF flag is set to 1. \\n\n" |
10710 | "/// If there is at least one pair of double-precision elements where the\n" |
10711 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10712 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10713 | "/// This intrinsic returns the value of the ZF flag.\n" |
10714 | "///\n" |
10715 | "/// \\headerfile <x86intrin.h>\n" |
10716 | "///\n" |
10717 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
10718 | "///\n" |
10719 | "/// \\param __a\n" |
10720 | "/// A 128-bit vector of [2 x double].\n" |
10721 | "/// \\param __b\n" |
10722 | "/// A 128-bit vector of [2 x double].\n" |
10723 | "/// \\returns the ZF flag in the EFLAGS register.\n" |
10724 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
10725 | "_mm_testz_pd(__m128d __a, __m128d __b)\n" |
10726 | "{\n" |
10727 | " return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);\n" |
10728 | "}\n" |
10729 | "\n" |
10730 | "/// Given two 128-bit floating-point vectors of [2 x double], perform an\n" |
10731 | "/// element-by-element comparison of the double-precision element in the\n" |
10732 | "/// first source vector and the corresponding element in the second source\n" |
10733 | "/// vector.\n" |
10734 | "///\n" |
10735 | "/// The EFLAGS register is updated as follows: \\n\n" |
10736 | "/// If there is at least one pair of double-precision elements where the\n" |
10737 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10738 | "/// ZF flag is set to 1. \\n\n" |
10739 | "/// If there is at least one pair of double-precision elements where the\n" |
10740 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10741 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10742 | "/// This intrinsic returns the value of the CF flag.\n" |
10743 | "///\n" |
10744 | "/// \\headerfile <x86intrin.h>\n" |
10745 | "///\n" |
10746 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
10747 | "///\n" |
10748 | "/// \\param __a\n" |
10749 | "/// A 128-bit vector of [2 x double].\n" |
10750 | "/// \\param __b\n" |
10751 | "/// A 128-bit vector of [2 x double].\n" |
10752 | "/// \\returns the CF flag in the EFLAGS register.\n" |
10753 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
10754 | "_mm_testc_pd(__m128d __a, __m128d __b)\n" |
10755 | "{\n" |
10756 | " return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);\n" |
10757 | "}\n" |
10758 | "\n" |
10759 | "/// Given two 128-bit floating-point vectors of [2 x double], perform an\n" |
10760 | "/// element-by-element comparison of the double-precision element in the\n" |
10761 | "/// first source vector and the corresponding element in the second source\n" |
10762 | "/// vector.\n" |
10763 | "///\n" |
10764 | "/// The EFLAGS register is updated as follows: \\n\n" |
10765 | "/// If there is at least one pair of double-precision elements where the\n" |
10766 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10767 | "/// ZF flag is set to 1. \\n\n" |
10768 | "/// If there is at least one pair of double-precision elements where the\n" |
10769 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10770 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10771 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
10772 | "/// otherwise it returns 0.\n" |
10773 | "///\n" |
10774 | "/// \\headerfile <x86intrin.h>\n" |
10775 | "///\n" |
10776 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
10777 | "///\n" |
10778 | "/// \\param __a\n" |
10779 | "/// A 128-bit vector of [2 x double].\n" |
10780 | "/// \\param __b\n" |
10781 | "/// A 128-bit vector of [2 x double].\n" |
10782 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
10783 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
10784 | "_mm_testnzc_pd(__m128d __a, __m128d __b)\n" |
10785 | "{\n" |
10786 | " return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);\n" |
10787 | "}\n" |
10788 | "\n" |
10789 | "/// Given two 128-bit floating-point vectors of [4 x float], perform an\n" |
10790 | "/// element-by-element comparison of the single-precision element in the\n" |
10791 | "/// first source vector and the corresponding element in the second source\n" |
10792 | "/// vector.\n" |
10793 | "///\n" |
10794 | "/// The EFLAGS register is updated as follows: \\n\n" |
10795 | "/// If there is at least one pair of single-precision elements where the\n" |
10796 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10797 | "/// ZF flag is set to 1. \\n\n" |
10798 | "/// If there is at least one pair of single-precision elements where the\n" |
10799 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10800 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10801 | "/// This intrinsic returns the value of the ZF flag.\n" |
10802 | "///\n" |
10803 | "/// \\headerfile <x86intrin.h>\n" |
10804 | "///\n" |
10805 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
10806 | "///\n" |
10807 | "/// \\param __a\n" |
10808 | "/// A 128-bit vector of [4 x float].\n" |
10809 | "/// \\param __b\n" |
10810 | "/// A 128-bit vector of [4 x float].\n" |
10811 | "/// \\returns the ZF flag.\n" |
10812 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
10813 | "_mm_testz_ps(__m128 __a, __m128 __b)\n" |
10814 | "{\n" |
10815 | " return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);\n" |
10816 | "}\n" |
10817 | "\n" |
10818 | "/// Given two 128-bit floating-point vectors of [4 x float], perform an\n" |
10819 | "/// element-by-element comparison of the single-precision element in the\n" |
10820 | "/// first source vector and the corresponding element in the second source\n" |
10821 | "/// vector.\n" |
10822 | "///\n" |
10823 | "/// The EFLAGS register is updated as follows: \\n\n" |
10824 | "/// If there is at least one pair of single-precision elements where the\n" |
10825 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10826 | "/// ZF flag is set to 1. \\n\n" |
10827 | "/// If there is at least one pair of single-precision elements where the\n" |
10828 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10829 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10830 | "/// This intrinsic returns the value of the CF flag.\n" |
10831 | "///\n" |
10832 | "/// \\headerfile <x86intrin.h>\n" |
10833 | "///\n" |
10834 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
10835 | "///\n" |
10836 | "/// \\param __a\n" |
10837 | "/// A 128-bit vector of [4 x float].\n" |
10838 | "/// \\param __b\n" |
10839 | "/// A 128-bit vector of [4 x float].\n" |
10840 | "/// \\returns the CF flag.\n" |
10841 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
10842 | "_mm_testc_ps(__m128 __a, __m128 __b)\n" |
10843 | "{\n" |
10844 | " return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);\n" |
10845 | "}\n" |
10846 | "\n" |
10847 | "/// Given two 128-bit floating-point vectors of [4 x float], perform an\n" |
10848 | "/// element-by-element comparison of the single-precision element in the\n" |
10849 | "/// first source vector and the corresponding element in the second source\n" |
10850 | "/// vector.\n" |
10851 | "///\n" |
10852 | "/// The EFLAGS register is updated as follows: \\n\n" |
10853 | "/// If there is at least one pair of single-precision elements where the\n" |
10854 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10855 | "/// ZF flag is set to 1. \\n\n" |
10856 | "/// If there is at least one pair of single-precision elements where the\n" |
10857 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10858 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10859 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
10860 | "/// otherwise it returns 0.\n" |
10861 | "///\n" |
10862 | "/// \\headerfile <x86intrin.h>\n" |
10863 | "///\n" |
10864 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
10865 | "///\n" |
10866 | "/// \\param __a\n" |
10867 | "/// A 128-bit vector of [4 x float].\n" |
10868 | "/// \\param __b\n" |
10869 | "/// A 128-bit vector of [4 x float].\n" |
10870 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
10871 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
10872 | "_mm_testnzc_ps(__m128 __a, __m128 __b)\n" |
10873 | "{\n" |
10874 | " return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);\n" |
10875 | "}\n" |
10876 | "\n" |
10877 | "/// Given two 256-bit floating-point vectors of [4 x double], perform an\n" |
10878 | "/// element-by-element comparison of the double-precision elements in the\n" |
10879 | "/// first source vector and the corresponding elements in the second source\n" |
10880 | "/// vector.\n" |
10881 | "///\n" |
10882 | "/// The EFLAGS register is updated as follows: \\n\n" |
10883 | "/// If there is at least one pair of double-precision elements where the\n" |
10884 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10885 | "/// ZF flag is set to 1. \\n\n" |
10886 | "/// If there is at least one pair of double-precision elements where the\n" |
10887 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10888 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10889 | "/// This intrinsic returns the value of the ZF flag.\n" |
10890 | "///\n" |
10891 | "/// \\headerfile <x86intrin.h>\n" |
10892 | "///\n" |
10893 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
10894 | "///\n" |
10895 | "/// \\param __a\n" |
10896 | "/// A 256-bit vector of [4 x double].\n" |
10897 | "/// \\param __b\n" |
10898 | "/// A 256-bit vector of [4 x double].\n" |
10899 | "/// \\returns the ZF flag.\n" |
10900 | "static __inline int __DEFAULT_FN_ATTRS\n" |
10901 | "_mm256_testz_pd(__m256d __a, __m256d __b)\n" |
10902 | "{\n" |
10903 | " return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);\n" |
10904 | "}\n" |
10905 | "\n" |
10906 | "/// Given two 256-bit floating-point vectors of [4 x double], perform an\n" |
10907 | "/// element-by-element comparison of the double-precision elements in the\n" |
10908 | "/// first source vector and the corresponding elements in the second source\n" |
10909 | "/// vector.\n" |
10910 | "///\n" |
10911 | "/// The EFLAGS register is updated as follows: \\n\n" |
10912 | "/// If there is at least one pair of double-precision elements where the\n" |
10913 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10914 | "/// ZF flag is set to 1. \\n\n" |
10915 | "/// If there is at least one pair of double-precision elements where the\n" |
10916 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10917 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10918 | "/// This intrinsic returns the value of the CF flag.\n" |
10919 | "///\n" |
10920 | "/// \\headerfile <x86intrin.h>\n" |
10921 | "///\n" |
10922 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
10923 | "///\n" |
10924 | "/// \\param __a\n" |
10925 | "/// A 256-bit vector of [4 x double].\n" |
10926 | "/// \\param __b\n" |
10927 | "/// A 256-bit vector of [4 x double].\n" |
10928 | "/// \\returns the CF flag.\n" |
10929 | "static __inline int __DEFAULT_FN_ATTRS\n" |
10930 | "_mm256_testc_pd(__m256d __a, __m256d __b)\n" |
10931 | "{\n" |
10932 | " return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);\n" |
10933 | "}\n" |
10934 | "\n" |
10935 | "/// Given two 256-bit floating-point vectors of [4 x double], perform an\n" |
10936 | "/// element-by-element comparison of the double-precision elements in the\n" |
10937 | "/// first source vector and the corresponding elements in the second source\n" |
10938 | "/// vector.\n" |
10939 | "///\n" |
10940 | "/// The EFLAGS register is updated as follows: \\n\n" |
10941 | "/// If there is at least one pair of double-precision elements where the\n" |
10942 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10943 | "/// ZF flag is set to 1. \\n\n" |
10944 | "/// If there is at least one pair of double-precision elements where the\n" |
10945 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10946 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10947 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
10948 | "/// otherwise it returns 0.\n" |
10949 | "///\n" |
10950 | "/// \\headerfile <x86intrin.h>\n" |
10951 | "///\n" |
10952 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
10953 | "///\n" |
10954 | "/// \\param __a\n" |
10955 | "/// A 256-bit vector of [4 x double].\n" |
10956 | "/// \\param __b\n" |
10957 | "/// A 256-bit vector of [4 x double].\n" |
10958 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
10959 | "static __inline int __DEFAULT_FN_ATTRS\n" |
10960 | "_mm256_testnzc_pd(__m256d __a, __m256d __b)\n" |
10961 | "{\n" |
10962 | " return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);\n" |
10963 | "}\n" |
10964 | "\n" |
10965 | "/// Given two 256-bit floating-point vectors of [8 x float], perform an\n" |
10966 | "/// element-by-element comparison of the single-precision element in the\n" |
10967 | "/// first source vector and the corresponding element in the second source\n" |
10968 | "/// vector.\n" |
10969 | "///\n" |
10970 | "/// The EFLAGS register is updated as follows: \\n\n" |
10971 | "/// If there is at least one pair of single-precision elements where the\n" |
10972 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
10973 | "/// ZF flag is set to 1. \\n\n" |
10974 | "/// If there is at least one pair of single-precision elements where the\n" |
10975 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
10976 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
10977 | "/// This intrinsic returns the value of the ZF flag.\n" |
10978 | "///\n" |
10979 | "/// \\headerfile <x86intrin.h>\n" |
10980 | "///\n" |
10981 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
10982 | "///\n" |
10983 | "/// \\param __a\n" |
10984 | "/// A 256-bit vector of [8 x float].\n" |
10985 | "/// \\param __b\n" |
10986 | "/// A 256-bit vector of [8 x float].\n" |
10987 | "/// \\returns the ZF flag.\n" |
10988 | "static __inline int __DEFAULT_FN_ATTRS\n" |
10989 | "_mm256_testz_ps(__m256 __a, __m256 __b)\n" |
10990 | "{\n" |
10991 | " return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);\n" |
10992 | "}\n" |
10993 | "\n" |
10994 | "/// Given two 256-bit floating-point vectors of [8 x float], perform an\n" |
10995 | "/// element-by-element comparison of the single-precision element in the\n" |
10996 | "/// first source vector and the corresponding element in the second source\n" |
10997 | "/// vector.\n" |
10998 | "///\n" |
10999 | "/// The EFLAGS register is updated as follows: \\n\n" |
11000 | "/// If there is at least one pair of single-precision elements where the\n" |
11001 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
11002 | "/// ZF flag is set to 1. \\n\n" |
11003 | "/// If there is at least one pair of single-precision elements where the\n" |
11004 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
11005 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
11006 | "/// This intrinsic returns the value of the CF flag.\n" |
11007 | "///\n" |
11008 | "/// \\headerfile <x86intrin.h>\n" |
11009 | "///\n" |
11010 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
11011 | "///\n" |
11012 | "/// \\param __a\n" |
11013 | "/// A 256-bit vector of [8 x float].\n" |
11014 | "/// \\param __b\n" |
11015 | "/// A 256-bit vector of [8 x float].\n" |
11016 | "/// \\returns the CF flag.\n" |
11017 | "static __inline int __DEFAULT_FN_ATTRS\n" |
11018 | "_mm256_testc_ps(__m256 __a, __m256 __b)\n" |
11019 | "{\n" |
11020 | " return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);\n" |
11021 | "}\n" |
11022 | "\n" |
11023 | "/// Given two 256-bit floating-point vectors of [8 x float], perform an\n" |
11024 | "/// element-by-element comparison of the single-precision elements in the\n" |
11025 | "/// first source vector and the corresponding elements in the second source\n" |
11026 | "/// vector.\n" |
11027 | "///\n" |
11028 | "/// The EFLAGS register is updated as follows: \\n\n" |
11029 | "/// If there is at least one pair of single-precision elements where the\n" |
11030 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
11031 | "/// ZF flag is set to 1. \\n\n" |
11032 | "/// If there is at least one pair of single-precision elements where the\n" |
11033 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
11034 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
11035 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
11036 | "/// otherwise it returns 0.\n" |
11037 | "///\n" |
11038 | "/// \\headerfile <x86intrin.h>\n" |
11039 | "///\n" |
11040 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
11041 | "///\n" |
11042 | "/// \\param __a\n" |
11043 | "/// A 256-bit vector of [8 x float].\n" |
11044 | "/// \\param __b\n" |
11045 | "/// A 256-bit vector of [8 x float].\n" |
11046 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
11047 | "static __inline int __DEFAULT_FN_ATTRS\n" |
11048 | "_mm256_testnzc_ps(__m256 __a, __m256 __b)\n" |
11049 | "{\n" |
11050 | " return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);\n" |
11051 | "}\n" |
11052 | "\n" |
11053 | "/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n" |
11054 | "/// of the two source vectors.\n" |
11055 | "///\n" |
11056 | "/// The EFLAGS register is updated as follows: \\n\n" |
11057 | "/// If there is at least one pair of bits where both bits are 1, the ZF flag\n" |
11058 | "/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n" |
11059 | "/// If there is at least one pair of bits where the bit from the first source\n" |
11060 | "/// vector is 0 and the bit from the second source vector is 1, the CF flag\n" |
11061 | "/// is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
11062 | "/// This intrinsic returns the value of the ZF flag.\n" |
11063 | "///\n" |
11064 | "/// \\headerfile <x86intrin.h>\n" |
11065 | "///\n" |
11066 | "/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n" |
11067 | "///\n" |
11068 | "/// \\param __a\n" |
11069 | "/// A 256-bit integer vector.\n" |
11070 | "/// \\param __b\n" |
11071 | "/// A 256-bit integer vector.\n" |
11072 | "/// \\returns the ZF flag.\n" |
11073 | "static __inline int __DEFAULT_FN_ATTRS\n" |
11074 | "_mm256_testz_si256(__m256i __a, __m256i __b)\n" |
11075 | "{\n" |
11076 | " return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);\n" |
11077 | "}\n" |
11078 | "\n" |
11079 | "/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n" |
11080 | "/// of the two source vectors.\n" |
11081 | "///\n" |
11082 | "/// The EFLAGS register is updated as follows: \\n\n" |
11083 | "/// If there is at least one pair of bits where both bits are 1, the ZF flag\n" |
11084 | "/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n" |
11085 | "/// If there is at least one pair of bits where the bit from the first source\n" |
11086 | "/// vector is 0 and the bit from the second source vector is 1, the CF flag\n" |
11087 | "/// is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
11088 | "/// This intrinsic returns the value of the CF flag.\n" |
11089 | "///\n" |
11090 | "/// \\headerfile <x86intrin.h>\n" |
11091 | "///\n" |
11092 | "/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n" |
11093 | "///\n" |
11094 | "/// \\param __a\n" |
11095 | "/// A 256-bit integer vector.\n" |
11096 | "/// \\param __b\n" |
11097 | "/// A 256-bit integer vector.\n" |
11098 | "/// \\returns the CF flag.\n" |
11099 | "static __inline int __DEFAULT_FN_ATTRS\n" |
11100 | "_mm256_testc_si256(__m256i __a, __m256i __b)\n" |
11101 | "{\n" |
11102 | " return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);\n" |
11103 | "}\n" |
11104 | "\n" |
11105 | "/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n" |
11106 | "/// of the two source vectors.\n" |
11107 | "///\n" |
11108 | "/// The EFLAGS register is updated as follows: \\n\n" |
11109 | "/// If there is at least one pair of bits where both bits are 1, the ZF flag\n" |
11110 | "/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n" |
11111 | "/// If there is at least one pair of bits where the bit from the first source\n" |
11112 | "/// vector is 0 and the bit from the second source vector is 1, the CF flag\n" |
11113 | "/// is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
11114 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
11115 | "/// otherwise it returns 0.\n" |
11116 | "///\n" |
11117 | "/// \\headerfile <x86intrin.h>\n" |
11118 | "///\n" |
11119 | "/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n" |
11120 | "///\n" |
11121 | "/// \\param __a\n" |
11122 | "/// A 256-bit integer vector.\n" |
11123 | "/// \\param __b\n" |
11124 | "/// A 256-bit integer vector.\n" |
11125 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
11126 | "static __inline int __DEFAULT_FN_ATTRS\n" |
11127 | "_mm256_testnzc_si256(__m256i __a, __m256i __b)\n" |
11128 | "{\n" |
11129 | " return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);\n" |
11130 | "}\n" |
11131 | "\n" |
11132 | "/* Vector extract sign mask */\n" |
11133 | "/// Extracts the sign bits of double-precision floating point elements\n" |
11134 | "/// in a 256-bit vector of [4 x double] and writes them to the lower order\n" |
11135 | "/// bits of the return value.\n" |
11136 | "///\n" |
11137 | "/// \\headerfile <x86intrin.h>\n" |
11138 | "///\n" |
11139 | "/// This intrinsic corresponds to the <c> VMOVMSKPD </c> instruction.\n" |
11140 | "///\n" |
11141 | "/// \\param __a\n" |
11142 | "/// A 256-bit vector of [4 x double] containing the double-precision\n" |
11143 | "/// floating point values with sign bits to be extracted.\n" |
11144 | "/// \\returns The sign bits from the operand, written to bits [3:0].\n" |
11145 | "static __inline int __DEFAULT_FN_ATTRS\n" |
11146 | "_mm256_movemask_pd(__m256d __a)\n" |
11147 | "{\n" |
11148 | " return __builtin_ia32_movmskpd256((__v4df)__a);\n" |
11149 | "}\n" |
11150 | "\n" |
11151 | "/// Extracts the sign bits of single-precision floating point elements\n" |
11152 | "/// in a 256-bit vector of [8 x float] and writes them to the lower order\n" |
11153 | "/// bits of the return value.\n" |
11154 | "///\n" |
11155 | "/// \\headerfile <x86intrin.h>\n" |
11156 | "///\n" |
11157 | "/// This intrinsic corresponds to the <c> VMOVMSKPS </c> instruction.\n" |
11158 | "///\n" |
11159 | "/// \\param __a\n" |
11160 | "/// A 256-bit vector of [8 x float] containing the single-precision floating\n" |
11161 | "/// point values with sign bits to be extracted.\n" |
11162 | "/// \\returns The sign bits from the operand, written to bits [7:0].\n" |
11163 | "static __inline int __DEFAULT_FN_ATTRS\n" |
11164 | "_mm256_movemask_ps(__m256 __a)\n" |
11165 | "{\n" |
11166 | " return __builtin_ia32_movmskps256((__v8sf)__a);\n" |
11167 | "}\n" |
11168 | "\n" |
11169 | "/* Vector __zero */\n" |
11170 | "/// Zeroes the contents of all XMM or YMM registers.\n" |
11171 | "///\n" |
11172 | "/// \\headerfile <x86intrin.h>\n" |
11173 | "///\n" |
11174 | "/// This intrinsic corresponds to the <c> VZEROALL </c> instruction.\n" |
11175 | "static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n" |
11176 | "_mm256_zeroall(void)\n" |
11177 | "{\n" |
11178 | " __builtin_ia32_vzeroall();\n" |
11179 | "}\n" |
11180 | "\n" |
11181 | "/// Zeroes the upper 128 bits (bits 255:128) of all YMM registers.\n" |
11182 | "///\n" |
11183 | "/// \\headerfile <x86intrin.h>\n" |
11184 | "///\n" |
11185 | "/// This intrinsic corresponds to the <c> VZEROUPPER </c> instruction.\n" |
11186 | "static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n" |
11187 | "_mm256_zeroupper(void)\n" |
11188 | "{\n" |
11189 | " __builtin_ia32_vzeroupper();\n" |
11190 | "}\n" |
11191 | "\n" |
11192 | "/* Vector load with broadcast */\n" |
11193 | "/// Loads a scalar single-precision floating point value from the\n" |
11194 | "/// specified address pointed to by \\a __a and broadcasts it to the elements\n" |
11195 | "/// of a [4 x float] vector.\n" |
11196 | "///\n" |
11197 | "/// \\headerfile <x86intrin.h>\n" |
11198 | "///\n" |
11199 | "/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n" |
11200 | "///\n" |
11201 | "/// \\param __a\n" |
11202 | "/// The single-precision floating point value to be broadcast.\n" |
11203 | "/// \\returns A 128-bit vector of [4 x float] whose 32-bit elements are set\n" |
11204 | "/// equal to the broadcast value.\n" |
11205 | "static __inline __m128 __DEFAULT_FN_ATTRS128\n" |
11206 | "_mm_broadcast_ss(float const *__a)\n" |
11207 | "{\n" |
11208 | " float __f = *__a;\n" |
11209 | " return __extension__ (__m128)(__v4sf){ __f, __f, __f, __f };\n" |
11210 | "}\n" |
11211 | "\n" |
11212 | "/// Loads a scalar double-precision floating point value from the\n" |
11213 | "/// specified address pointed to by \\a __a and broadcasts it to the elements\n" |
11214 | "/// of a [4 x double] vector.\n" |
11215 | "///\n" |
11216 | "/// \\headerfile <x86intrin.h>\n" |
11217 | "///\n" |
11218 | "/// This intrinsic corresponds to the <c> VBROADCASTSD </c> instruction.\n" |
11219 | "///\n" |
11220 | "/// \\param __a\n" |
11221 | "/// The double-precision floating point value to be broadcast.\n" |
11222 | "/// \\returns A 256-bit vector of [4 x double] whose 64-bit elements are set\n" |
11223 | "/// equal to the broadcast value.\n" |
11224 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
11225 | "_mm256_broadcast_sd(double const *__a)\n" |
11226 | "{\n" |
11227 | " double __d = *__a;\n" |
11228 | " return __extension__ (__m256d)(__v4df){ __d, __d, __d, __d };\n" |
11229 | "}\n" |
11230 | "\n" |
11231 | "/// Loads a scalar single-precision floating point value from the\n" |
11232 | "/// specified address pointed to by \\a __a and broadcasts it to the elements\n" |
11233 | "/// of a [8 x float] vector.\n" |
11234 | "///\n" |
11235 | "/// \\headerfile <x86intrin.h>\n" |
11236 | "///\n" |
11237 | "/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n" |
11238 | "///\n" |
11239 | "/// \\param __a\n" |
11240 | "/// The single-precision floating point value to be broadcast.\n" |
11241 | "/// \\returns A 256-bit vector of [8 x float] whose 32-bit elements are set\n" |
11242 | "/// equal to the broadcast value.\n" |
11243 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
11244 | "_mm256_broadcast_ss(float const *__a)\n" |
11245 | "{\n" |
11246 | " float __f = *__a;\n" |
11247 | " return __extension__ (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };\n" |
11248 | "}\n" |
11249 | "\n" |
11250 | "/// Loads the data from a 128-bit vector of [2 x double] from the\n" |
11251 | "/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n" |
11252 | "/// elements in a 256-bit vector of [4 x double].\n" |
11253 | "///\n" |
11254 | "/// \\headerfile <x86intrin.h>\n" |
11255 | "///\n" |
11256 | "/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n" |
11257 | "///\n" |
11258 | "/// \\param __a\n" |
11259 | "/// The 128-bit vector of [2 x double] to be broadcast.\n" |
11260 | "/// \\returns A 256-bit vector of [4 x double] whose 128-bit elements are set\n" |
11261 | "/// equal to the broadcast value.\n" |
11262 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
11263 | "_mm256_broadcast_pd(__m128d const *__a)\n" |
11264 | "{\n" |
11265 | " __m128d __b = _mm_loadu_pd((const double *)__a);\n" |
11266 | " return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b,\n" |
11267 | " 0, 1, 0, 1);\n" |
11268 | "}\n" |
11269 | "\n" |
11270 | "/// Loads the data from a 128-bit vector of [4 x float] from the\n" |
11271 | "/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n" |
11272 | "/// elements in a 256-bit vector of [8 x float].\n" |
11273 | "///\n" |
11274 | "/// \\headerfile <x86intrin.h>\n" |
11275 | "///\n" |
11276 | "/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n" |
11277 | "///\n" |
11278 | "/// \\param __a\n" |
11279 | "/// The 128-bit vector of [4 x float] to be broadcast.\n" |
11280 | "/// \\returns A 256-bit vector of [8 x float] whose 128-bit elements are set\n" |
11281 | "/// equal to the broadcast value.\n" |
11282 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
11283 | "_mm256_broadcast_ps(__m128 const *__a)\n" |
11284 | "{\n" |
11285 | " __m128 __b = _mm_loadu_ps((const float *)__a);\n" |
11286 | " return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b,\n" |
11287 | " 0, 1, 2, 3, 0, 1, 2, 3);\n" |
11288 | "}\n" |
11289 | "\n" |
11290 | "/* SIMD load ops */\n" |
11291 | "/// Loads 4 double-precision floating point values from a 32-byte aligned\n" |
11292 | "/// memory location pointed to by \\a __p into a vector of [4 x double].\n" |
11293 | "///\n" |
11294 | "/// \\headerfile <x86intrin.h>\n" |
11295 | "///\n" |
11296 | "/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n" |
11297 | "///\n" |
11298 | "/// \\param __p\n" |
11299 | "/// A 32-byte aligned pointer to a memory location containing\n" |
11300 | "/// double-precision floating point values.\n" |
11301 | "/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n" |
11302 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
11303 | "_mm256_load_pd(double const *__p)\n" |
11304 | "{\n" |
11305 | " return *(__m256d *)__p;\n" |
11306 | "}\n" |
11307 | "\n" |
11308 | "/// Loads 8 single-precision floating point values from a 32-byte aligned\n" |
11309 | "/// memory location pointed to by \\a __p into a vector of [8 x float].\n" |
11310 | "///\n" |
11311 | "/// \\headerfile <x86intrin.h>\n" |
11312 | "///\n" |
11313 | "/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n" |
11314 | "///\n" |
11315 | "/// \\param __p\n" |
11316 | "/// A 32-byte aligned pointer to a memory location containing float values.\n" |
11317 | "/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n" |
11318 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
11319 | "_mm256_load_ps(float const *__p)\n" |
11320 | "{\n" |
11321 | " return *(__m256 *)__p;\n" |
11322 | "}\n" |
11323 | "\n" |
11324 | "/// Loads 4 double-precision floating point values from an unaligned\n" |
11325 | "/// memory location pointed to by \\a __p into a vector of [4 x double].\n" |
11326 | "///\n" |
11327 | "/// \\headerfile <x86intrin.h>\n" |
11328 | "///\n" |
11329 | "/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n" |
11330 | "///\n" |
11331 | "/// \\param __p\n" |
11332 | "/// A pointer to a memory location containing double-precision floating\n" |
11333 | "/// point values.\n" |
11334 | "/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n" |
11335 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
11336 | "_mm256_loadu_pd(double const *__p)\n" |
11337 | "{\n" |
11338 | " struct __loadu_pd {\n" |
11339 | " __m256d __v;\n" |
11340 | " } __attribute__((__packed__, __may_alias__));\n" |
11341 | " return ((struct __loadu_pd*)__p)->__v;\n" |
11342 | "}\n" |
11343 | "\n" |
11344 | "/// Loads 8 single-precision floating point values from an unaligned\n" |
11345 | "/// memory location pointed to by \\a __p into a vector of [8 x float].\n" |
11346 | "///\n" |
11347 | "/// \\headerfile <x86intrin.h>\n" |
11348 | "///\n" |
11349 | "/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n" |
11350 | "///\n" |
11351 | "/// \\param __p\n" |
11352 | "/// A pointer to a memory location containing single-precision floating\n" |
11353 | "/// point values.\n" |
11354 | "/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n" |
11355 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
11356 | "_mm256_loadu_ps(float const *__p)\n" |
11357 | "{\n" |
11358 | " struct __loadu_ps {\n" |
11359 | " __m256 __v;\n" |
11360 | " } __attribute__((__packed__, __may_alias__));\n" |
11361 | " return ((struct __loadu_ps*)__p)->__v;\n" |
11362 | "}\n" |
11363 | "\n" |
11364 | "/// Loads 256 bits of integer data from a 32-byte aligned memory\n" |
11365 | "/// location pointed to by \\a __p into elements of a 256-bit integer vector.\n" |
11366 | "///\n" |
11367 | "/// \\headerfile <x86intrin.h>\n" |
11368 | "///\n" |
11369 | "/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n" |
11370 | "///\n" |
11371 | "/// \\param __p\n" |
11372 | "/// A 32-byte aligned pointer to a 256-bit integer vector containing integer\n" |
11373 | "/// values.\n" |
11374 | "/// \\returns A 256-bit integer vector containing the moved values.\n" |
11375 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
11376 | "_mm256_load_si256(__m256i const *__p)\n" |
11377 | "{\n" |
11378 | " return *__p;\n" |
11379 | "}\n" |
11380 | "\n" |
11381 | "/// Loads 256 bits of integer data from an unaligned memory location\n" |
11382 | "/// pointed to by \\a __p into a 256-bit integer vector.\n" |
11383 | "///\n" |
11384 | "/// \\headerfile <x86intrin.h>\n" |
11385 | "///\n" |
11386 | "/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n" |
11387 | "///\n" |
11388 | "/// \\param __p\n" |
11389 | "/// A pointer to a 256-bit integer vector containing integer values.\n" |
11390 | "/// \\returns A 256-bit integer vector containing the moved values.\n" |
11391 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
11392 | "_mm256_loadu_si256(__m256i const *__p)\n" |
11393 | "{\n" |
11394 | " struct __loadu_si256 {\n" |
11395 | " __m256i __v;\n" |
11396 | " } __attribute__((__packed__, __may_alias__));\n" |
11397 | " return ((struct __loadu_si256*)__p)->__v;\n" |
11398 | "}\n" |
11399 | "\n" |
11400 | "/// Loads 256 bits of integer data from an unaligned memory location\n" |
11401 | "/// pointed to by \\a __p into a 256-bit integer vector. This intrinsic may\n" |
11402 | "/// perform better than \\c _mm256_loadu_si256 when the data crosses a cache\n" |
11403 | "/// line boundary.\n" |
11404 | "///\n" |
11405 | "/// \\headerfile <x86intrin.h>\n" |
11406 | "///\n" |
11407 | "/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n" |
11408 | "///\n" |
11409 | "/// \\param __p\n" |
11410 | "/// A pointer to a 256-bit integer vector containing integer values.\n" |
11411 | "/// \\returns A 256-bit integer vector containing the moved values.\n" |
11412 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
11413 | "_mm256_lddqu_si256(__m256i const *__p)\n" |
11414 | "{\n" |
11415 | " return (__m256i)__builtin_ia32_lddqu256((char const *)__p);\n" |
11416 | "}\n" |
11417 | "\n" |
11418 | "/* SIMD store ops */\n" |
11419 | "/// Stores double-precision floating point values from a 256-bit vector\n" |
11420 | "/// of [4 x double] to a 32-byte aligned memory location pointed to by\n" |
11421 | "/// \\a __p.\n" |
11422 | "///\n" |
11423 | "/// \\headerfile <x86intrin.h>\n" |
11424 | "///\n" |
11425 | "/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n" |
11426 | "///\n" |
11427 | "/// \\param __p\n" |
11428 | "/// A 32-byte aligned pointer to a memory location that will receive the\n" |
11429 | "/// double-precision floaing point values.\n" |
11430 | "/// \\param __a\n" |
11431 | "/// A 256-bit vector of [4 x double] containing the values to be moved.\n" |
11432 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11433 | "_mm256_store_pd(double *__p, __m256d __a)\n" |
11434 | "{\n" |
11435 | " *(__m256d *)__p = __a;\n" |
11436 | "}\n" |
11437 | "\n" |
11438 | "/// Stores single-precision floating point values from a 256-bit vector\n" |
11439 | "/// of [8 x float] to a 32-byte aligned memory location pointed to by \\a __p.\n" |
11440 | "///\n" |
11441 | "/// \\headerfile <x86intrin.h>\n" |
11442 | "///\n" |
11443 | "/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n" |
11444 | "///\n" |
11445 | "/// \\param __p\n" |
11446 | "/// A 32-byte aligned pointer to a memory location that will receive the\n" |
11447 | "/// float values.\n" |
11448 | "/// \\param __a\n" |
11449 | "/// A 256-bit vector of [8 x float] containing the values to be moved.\n" |
11450 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11451 | "_mm256_store_ps(float *__p, __m256 __a)\n" |
11452 | "{\n" |
11453 | " *(__m256 *)__p = __a;\n" |
11454 | "}\n" |
11455 | "\n" |
11456 | "/// Stores double-precision floating point values from a 256-bit vector\n" |
11457 | "/// of [4 x double] to an unaligned memory location pointed to by \\a __p.\n" |
11458 | "///\n" |
11459 | "/// \\headerfile <x86intrin.h>\n" |
11460 | "///\n" |
11461 | "/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n" |
11462 | "///\n" |
11463 | "/// \\param __p\n" |
11464 | "/// A pointer to a memory location that will receive the double-precision\n" |
11465 | "/// floating point values.\n" |
11466 | "/// \\param __a\n" |
11467 | "/// A 256-bit vector of [4 x double] containing the values to be moved.\n" |
11468 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11469 | "_mm256_storeu_pd(double *__p, __m256d __a)\n" |
11470 | "{\n" |
11471 | " struct __storeu_pd {\n" |
11472 | " __m256d __v;\n" |
11473 | " } __attribute__((__packed__, __may_alias__));\n" |
11474 | " ((struct __storeu_pd*)__p)->__v = __a;\n" |
11475 | "}\n" |
11476 | "\n" |
11477 | "/// Stores single-precision floating point values from a 256-bit vector\n" |
11478 | "/// of [8 x float] to an unaligned memory location pointed to by \\a __p.\n" |
11479 | "///\n" |
11480 | "/// \\headerfile <x86intrin.h>\n" |
11481 | "///\n" |
11482 | "/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n" |
11483 | "///\n" |
11484 | "/// \\param __p\n" |
11485 | "/// A pointer to a memory location that will receive the float values.\n" |
11486 | "/// \\param __a\n" |
11487 | "/// A 256-bit vector of [8 x float] containing the values to be moved.\n" |
11488 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11489 | "_mm256_storeu_ps(float *__p, __m256 __a)\n" |
11490 | "{\n" |
11491 | " struct __storeu_ps {\n" |
11492 | " __m256 __v;\n" |
11493 | " } __attribute__((__packed__, __may_alias__));\n" |
11494 | " ((struct __storeu_ps*)__p)->__v = __a;\n" |
11495 | "}\n" |
11496 | "\n" |
11497 | "/// Stores integer values from a 256-bit integer vector to a 32-byte\n" |
11498 | "/// aligned memory location pointed to by \\a __p.\n" |
11499 | "///\n" |
11500 | "/// \\headerfile <x86intrin.h>\n" |
11501 | "///\n" |
11502 | "/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n" |
11503 | "///\n" |
11504 | "/// \\param __p\n" |
11505 | "/// A 32-byte aligned pointer to a memory location that will receive the\n" |
11506 | "/// integer values.\n" |
11507 | "/// \\param __a\n" |
11508 | "/// A 256-bit integer vector containing the values to be moved.\n" |
11509 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11510 | "_mm256_store_si256(__m256i *__p, __m256i __a)\n" |
11511 | "{\n" |
11512 | " *__p = __a;\n" |
11513 | "}\n" |
11514 | "\n" |
11515 | "/// Stores integer values from a 256-bit integer vector to an unaligned\n" |
11516 | "/// memory location pointed to by \\a __p.\n" |
11517 | "///\n" |
11518 | "/// \\headerfile <x86intrin.h>\n" |
11519 | "///\n" |
11520 | "/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n" |
11521 | "///\n" |
11522 | "/// \\param __p\n" |
11523 | "/// A pointer to a memory location that will receive the integer values.\n" |
11524 | "/// \\param __a\n" |
11525 | "/// A 256-bit integer vector containing the values to be moved.\n" |
11526 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11527 | "_mm256_storeu_si256(__m256i *__p, __m256i __a)\n" |
11528 | "{\n" |
11529 | " struct __storeu_si256 {\n" |
11530 | " __m256i __v;\n" |
11531 | " } __attribute__((__packed__, __may_alias__));\n" |
11532 | " ((struct __storeu_si256*)__p)->__v = __a;\n" |
11533 | "}\n" |
11534 | "\n" |
11535 | "/* Conditional load ops */\n" |
11536 | "/// Conditionally loads double-precision floating point elements from a\n" |
11537 | "/// memory location pointed to by \\a __p into a 128-bit vector of\n" |
11538 | "/// [2 x double], depending on the mask bits associated with each data\n" |
11539 | "/// element.\n" |
11540 | "///\n" |
11541 | "/// \\headerfile <x86intrin.h>\n" |
11542 | "///\n" |
11543 | "/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n" |
11544 | "///\n" |
11545 | "/// \\param __p\n" |
11546 | "/// A pointer to a memory location that contains the double-precision\n" |
11547 | "/// floating point values.\n" |
11548 | "/// \\param __m\n" |
11549 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
11550 | "/// each data element represents the mask bits. If a mask bit is zero, the\n" |
11551 | "/// corresponding value in the memory location is not loaded and the\n" |
11552 | "/// corresponding field in the return value is set to zero.\n" |
11553 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n" |
11554 | "static __inline __m128d __DEFAULT_FN_ATTRS128\n" |
11555 | "_mm_maskload_pd(double const *__p, __m128i __m)\n" |
11556 | "{\n" |
11557 | " return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);\n" |
11558 | "}\n" |
11559 | "\n" |
11560 | "/// Conditionally loads double-precision floating point elements from a\n" |
11561 | "/// memory location pointed to by \\a __p into a 256-bit vector of\n" |
11562 | "/// [4 x double], depending on the mask bits associated with each data\n" |
11563 | "/// element.\n" |
11564 | "///\n" |
11565 | "/// \\headerfile <x86intrin.h>\n" |
11566 | "///\n" |
11567 | "/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n" |
11568 | "///\n" |
11569 | "/// \\param __p\n" |
11570 | "/// A pointer to a memory location that contains the double-precision\n" |
11571 | "/// floating point values.\n" |
11572 | "/// \\param __m\n" |
11573 | "/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n" |
11574 | "/// significant bit of each quadword element represents the mask bits. If a\n" |
11575 | "/// mask bit is zero, the corresponding value in the memory location is not\n" |
11576 | "/// loaded and the corresponding field in the return value is set to zero.\n" |
11577 | "/// \\returns A 256-bit vector of [4 x double] containing the loaded values.\n" |
11578 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
11579 | "_mm256_maskload_pd(double const *__p, __m256i __m)\n" |
11580 | "{\n" |
11581 | " return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,\n" |
11582 | " (__v4di)__m);\n" |
11583 | "}\n" |
11584 | "\n" |
11585 | "/// Conditionally loads single-precision floating point elements from a\n" |
11586 | "/// memory location pointed to by \\a __p into a 128-bit vector of\n" |
11587 | "/// [4 x float], depending on the mask bits associated with each data\n" |
11588 | "/// element.\n" |
11589 | "///\n" |
11590 | "/// \\headerfile <x86intrin.h>\n" |
11591 | "///\n" |
11592 | "/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n" |
11593 | "///\n" |
11594 | "/// \\param __p\n" |
11595 | "/// A pointer to a memory location that contains the single-precision\n" |
11596 | "/// floating point values.\n" |
11597 | "/// \\param __m\n" |
11598 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
11599 | "/// each data element represents the mask bits. If a mask bit is zero, the\n" |
11600 | "/// corresponding value in the memory location is not loaded and the\n" |
11601 | "/// corresponding field in the return value is set to zero.\n" |
11602 | "/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n" |
11603 | "static __inline __m128 __DEFAULT_FN_ATTRS128\n" |
11604 | "_mm_maskload_ps(float const *__p, __m128i __m)\n" |
11605 | "{\n" |
11606 | " return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);\n" |
11607 | "}\n" |
11608 | "\n" |
11609 | "/// Conditionally loads single-precision floating point elements from a\n" |
11610 | "/// memory location pointed to by \\a __p into a 256-bit vector of\n" |
11611 | "/// [8 x float], depending on the mask bits associated with each data\n" |
11612 | "/// element.\n" |
11613 | "///\n" |
11614 | "/// \\headerfile <x86intrin.h>\n" |
11615 | "///\n" |
11616 | "/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n" |
11617 | "///\n" |
11618 | "/// \\param __p\n" |
11619 | "/// A pointer to a memory location that contains the single-precision\n" |
11620 | "/// floating point values.\n" |
11621 | "/// \\param __m\n" |
11622 | "/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n" |
11623 | "/// significant bit of each dword element represents the mask bits. If a mask\n" |
11624 | "/// bit is zero, the corresponding value in the memory location is not loaded\n" |
11625 | "/// and the corresponding field in the return value is set to zero.\n" |
11626 | "/// \\returns A 256-bit vector of [8 x float] containing the loaded values.\n" |
11627 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
11628 | "_mm256_maskload_ps(float const *__p, __m256i __m)\n" |
11629 | "{\n" |
11630 | " return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);\n" |
11631 | "}\n" |
11632 | "\n" |
11633 | "/* Conditional store ops */\n" |
11634 | "/// Moves single-precision floating point values from a 256-bit vector\n" |
11635 | "/// of [8 x float] to a memory location pointed to by \\a __p, according to\n" |
11636 | "/// the specified mask.\n" |
11637 | "///\n" |
11638 | "/// \\headerfile <x86intrin.h>\n" |
11639 | "///\n" |
11640 | "/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n" |
11641 | "///\n" |
11642 | "/// \\param __p\n" |
11643 | "/// A pointer to a memory location that will receive the float values.\n" |
11644 | "/// \\param __m\n" |
11645 | "/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n" |
11646 | "/// significant bit of each dword element in the mask vector represents the\n" |
11647 | "/// mask bits. If a mask bit is zero, the corresponding value from vector\n" |
11648 | "/// \\a __a is not stored and the corresponding field in the memory location\n" |
11649 | "/// pointed to by \\a __p is not changed.\n" |
11650 | "/// \\param __a\n" |
11651 | "/// A 256-bit vector of [8 x float] containing the values to be stored.\n" |
11652 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11653 | "_mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)\n" |
11654 | "{\n" |
11655 | " __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);\n" |
11656 | "}\n" |
11657 | "\n" |
11658 | "/// Moves double-precision values from a 128-bit vector of [2 x double]\n" |
11659 | "/// to a memory location pointed to by \\a __p, according to the specified\n" |
11660 | "/// mask.\n" |
11661 | "///\n" |
11662 | "/// \\headerfile <x86intrin.h>\n" |
11663 | "///\n" |
11664 | "/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n" |
11665 | "///\n" |
11666 | "/// \\param __p\n" |
11667 | "/// A pointer to a memory location that will receive the float values.\n" |
11668 | "/// \\param __m\n" |
11669 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
11670 | "/// each field in the mask vector represents the mask bits. If a mask bit is\n" |
11671 | "/// zero, the corresponding value from vector \\a __a is not stored and the\n" |
11672 | "/// corresponding field in the memory location pointed to by \\a __p is not\n" |
11673 | "/// changed.\n" |
11674 | "/// \\param __a\n" |
11675 | "/// A 128-bit vector of [2 x double] containing the values to be stored.\n" |
11676 | "static __inline void __DEFAULT_FN_ATTRS128\n" |
11677 | "_mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)\n" |
11678 | "{\n" |
11679 | " __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);\n" |
11680 | "}\n" |
11681 | "\n" |
11682 | "/// Moves double-precision values from a 256-bit vector of [4 x double]\n" |
11683 | "/// to a memory location pointed to by \\a __p, according to the specified\n" |
11684 | "/// mask.\n" |
11685 | "///\n" |
11686 | "/// \\headerfile <x86intrin.h>\n" |
11687 | "///\n" |
11688 | "/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n" |
11689 | "///\n" |
11690 | "/// \\param __p\n" |
11691 | "/// A pointer to a memory location that will receive the float values.\n" |
11692 | "/// \\param __m\n" |
11693 | "/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n" |
11694 | "/// significant bit of each quadword element in the mask vector represents\n" |
11695 | "/// the mask bits. If a mask bit is zero, the corresponding value from vector\n" |
11696 | "/// __a is not stored and the corresponding field in the memory location\n" |
11697 | "/// pointed to by \\a __p is not changed.\n" |
11698 | "/// \\param __a\n" |
11699 | "/// A 256-bit vector of [4 x double] containing the values to be stored.\n" |
11700 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11701 | "_mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)\n" |
11702 | "{\n" |
11703 | " __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);\n" |
11704 | "}\n" |
11705 | "\n" |
11706 | "/// Moves single-precision floating point values from a 128-bit vector\n" |
11707 | "/// of [4 x float] to a memory location pointed to by \\a __p, according to\n" |
11708 | "/// the specified mask.\n" |
11709 | "///\n" |
11710 | "/// \\headerfile <x86intrin.h>\n" |
11711 | "///\n" |
11712 | "/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n" |
11713 | "///\n" |
11714 | "/// \\param __p\n" |
11715 | "/// A pointer to a memory location that will receive the float values.\n" |
11716 | "/// \\param __m\n" |
11717 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
11718 | "/// each field in the mask vector represents the mask bits. If a mask bit is\n" |
11719 | "/// zero, the corresponding value from vector __a is not stored and the\n" |
11720 | "/// corresponding field in the memory location pointed to by \\a __p is not\n" |
11721 | "/// changed.\n" |
11722 | "/// \\param __a\n" |
11723 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
11724 | "static __inline void __DEFAULT_FN_ATTRS128\n" |
11725 | "_mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)\n" |
11726 | "{\n" |
11727 | " __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);\n" |
11728 | "}\n" |
11729 | "\n" |
11730 | "/* Cacheability support ops */\n" |
11731 | "/// Moves integer data from a 256-bit integer vector to a 32-byte\n" |
11732 | "/// aligned memory location. To minimize caching, the data is flagged as\n" |
11733 | "/// non-temporal (unlikely to be used again soon).\n" |
11734 | "///\n" |
11735 | "/// \\headerfile <x86intrin.h>\n" |
11736 | "///\n" |
11737 | "/// This intrinsic corresponds to the <c> VMOVNTDQ </c> instruction.\n" |
11738 | "///\n" |
11739 | "/// \\param __a\n" |
11740 | "/// A pointer to a 32-byte aligned memory location that will receive the\n" |
11741 | "/// integer values.\n" |
11742 | "/// \\param __b\n" |
11743 | "/// A 256-bit integer vector containing the values to be moved.\n" |
11744 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11745 | "_mm256_stream_si256(__m256i *__a, __m256i __b)\n" |
11746 | "{\n" |
11747 | " typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n" |
11748 | " __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);\n" |
11749 | "}\n" |
11750 | "\n" |
11751 | "/// Moves double-precision values from a 256-bit vector of [4 x double]\n" |
11752 | "/// to a 32-byte aligned memory location. To minimize caching, the data is\n" |
11753 | "/// flagged as non-temporal (unlikely to be used again soon).\n" |
11754 | "///\n" |
11755 | "/// \\headerfile <x86intrin.h>\n" |
11756 | "///\n" |
11757 | "/// This intrinsic corresponds to the <c> VMOVNTPD </c> instruction.\n" |
11758 | "///\n" |
11759 | "/// \\param __a\n" |
11760 | "/// A pointer to a 32-byte aligned memory location that will receive the\n" |
11761 | "/// double-precision floating-point values.\n" |
11762 | "/// \\param __b\n" |
11763 | "/// A 256-bit vector of [4 x double] containing the values to be moved.\n" |
11764 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11765 | "_mm256_stream_pd(double *__a, __m256d __b)\n" |
11766 | "{\n" |
11767 | " typedef __v4df __v4df_aligned __attribute__((aligned(32)));\n" |
11768 | " __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);\n" |
11769 | "}\n" |
11770 | "\n" |
11771 | "/// Moves single-precision floating point values from a 256-bit vector\n" |
11772 | "/// of [8 x float] to a 32-byte aligned memory location. To minimize\n" |
11773 | "/// caching, the data is flagged as non-temporal (unlikely to be used again\n" |
11774 | "/// soon).\n" |
11775 | "///\n" |
11776 | "/// \\headerfile <x86intrin.h>\n" |
11777 | "///\n" |
11778 | "/// This intrinsic corresponds to the <c> VMOVNTPS </c> instruction.\n" |
11779 | "///\n" |
11780 | "/// \\param __p\n" |
11781 | "/// A pointer to a 32-byte aligned memory location that will receive the\n" |
11782 | "/// single-precision floating point values.\n" |
11783 | "/// \\param __a\n" |
11784 | "/// A 256-bit vector of [8 x float] containing the values to be moved.\n" |
11785 | "static __inline void __DEFAULT_FN_ATTRS\n" |
11786 | "_mm256_stream_ps(float *__p, __m256 __a)\n" |
11787 | "{\n" |
11788 | " typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));\n" |
11789 | " __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);\n" |
11790 | "}\n" |
11791 | "\n" |
11792 | "/* Create vectors */\n" |
11793 | "/// Create a 256-bit vector of [4 x double] with undefined values.\n" |
11794 | "///\n" |
11795 | "/// \\headerfile <x86intrin.h>\n" |
11796 | "///\n" |
11797 | "/// This intrinsic has no corresponding instruction.\n" |
11798 | "///\n" |
11799 | "/// \\returns A 256-bit vector of [4 x double] containing undefined values.\n" |
11800 | "static __inline__ __m256d __DEFAULT_FN_ATTRS\n" |
11801 | "_mm256_undefined_pd(void)\n" |
11802 | "{\n" |
11803 | " return (__m256d)__builtin_ia32_undef256();\n" |
11804 | "}\n" |
11805 | "\n" |
11806 | "/// Create a 256-bit vector of [8 x float] with undefined values.\n" |
11807 | "///\n" |
11808 | "/// \\headerfile <x86intrin.h>\n" |
11809 | "///\n" |
11810 | "/// This intrinsic has no corresponding instruction.\n" |
11811 | "///\n" |
11812 | "/// \\returns A 256-bit vector of [8 x float] containing undefined values.\n" |
11813 | "static __inline__ __m256 __DEFAULT_FN_ATTRS\n" |
11814 | "_mm256_undefined_ps(void)\n" |
11815 | "{\n" |
11816 | " return (__m256)__builtin_ia32_undef256();\n" |
11817 | "}\n" |
11818 | "\n" |
11819 | "/// Create a 256-bit integer vector with undefined values.\n" |
11820 | "///\n" |
11821 | "/// \\headerfile <x86intrin.h>\n" |
11822 | "///\n" |
11823 | "/// This intrinsic has no corresponding instruction.\n" |
11824 | "///\n" |
11825 | "/// \\returns A 256-bit integer vector containing undefined values.\n" |
11826 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
11827 | "_mm256_undefined_si256(void)\n" |
11828 | "{\n" |
11829 | " return (__m256i)__builtin_ia32_undef256();\n" |
11830 | "}\n" |
11831 | "\n" |
11832 | "/// Constructs a 256-bit floating-point vector of [4 x double]\n" |
11833 | "/// initialized with the specified double-precision floating-point values.\n" |
11834 | "///\n" |
11835 | "/// \\headerfile <x86intrin.h>\n" |
11836 | "///\n" |
11837 | "/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n" |
11838 | "/// instruction.\n" |
11839 | "///\n" |
11840 | "/// \\param __a\n" |
11841 | "/// A double-precision floating-point value used to initialize bits [255:192]\n" |
11842 | "/// of the result.\n" |
11843 | "/// \\param __b\n" |
11844 | "/// A double-precision floating-point value used to initialize bits [191:128]\n" |
11845 | "/// of the result.\n" |
11846 | "/// \\param __c\n" |
11847 | "/// A double-precision floating-point value used to initialize bits [127:64]\n" |
11848 | "/// of the result.\n" |
11849 | "/// \\param __d\n" |
11850 | "/// A double-precision floating-point value used to initialize bits [63:0]\n" |
11851 | "/// of the result.\n" |
11852 | "/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n" |
11853 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
11854 | "_mm256_set_pd(double __a, double __b, double __c, double __d)\n" |
11855 | "{\n" |
11856 | " return __extension__ (__m256d){ __d, __c, __b, __a };\n" |
11857 | "}\n" |
11858 | "\n" |
11859 | "/// Constructs a 256-bit floating-point vector of [8 x float] initialized\n" |
11860 | "/// with the specified single-precision floating-point values.\n" |
11861 | "///\n" |
11862 | "/// \\headerfile <x86intrin.h>\n" |
11863 | "///\n" |
11864 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
11865 | "/// instruction.\n" |
11866 | "///\n" |
11867 | "/// \\param __a\n" |
11868 | "/// A single-precision floating-point value used to initialize bits [255:224]\n" |
11869 | "/// of the result.\n" |
11870 | "/// \\param __b\n" |
11871 | "/// A single-precision floating-point value used to initialize bits [223:192]\n" |
11872 | "/// of the result.\n" |
11873 | "/// \\param __c\n" |
11874 | "/// A single-precision floating-point value used to initialize bits [191:160]\n" |
11875 | "/// of the result.\n" |
11876 | "/// \\param __d\n" |
11877 | "/// A single-precision floating-point value used to initialize bits [159:128]\n" |
11878 | "/// of the result.\n" |
11879 | "/// \\param __e\n" |
11880 | "/// A single-precision floating-point value used to initialize bits [127:96]\n" |
11881 | "/// of the result.\n" |
11882 | "/// \\param __f\n" |
11883 | "/// A single-precision floating-point value used to initialize bits [95:64]\n" |
11884 | "/// of the result.\n" |
11885 | "/// \\param __g\n" |
11886 | "/// A single-precision floating-point value used to initialize bits [63:32]\n" |
11887 | "/// of the result.\n" |
11888 | "/// \\param __h\n" |
11889 | "/// A single-precision floating-point value used to initialize bits [31:0]\n" |
11890 | "/// of the result.\n" |
11891 | "/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n" |
11892 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
11893 | "_mm256_set_ps(float __a, float __b, float __c, float __d,\n" |
11894 | " float __e, float __f, float __g, float __h)\n" |
11895 | "{\n" |
11896 | " return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };\n" |
11897 | "}\n" |
11898 | "\n" |
11899 | "/// Constructs a 256-bit integer vector initialized with the specified\n" |
11900 | "/// 32-bit integral values.\n" |
11901 | "///\n" |
11902 | "/// \\headerfile <x86intrin.h>\n" |
11903 | "///\n" |
11904 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
11905 | "/// instruction.\n" |
11906 | "///\n" |
11907 | "/// \\param __i0\n" |
11908 | "/// A 32-bit integral value used to initialize bits [255:224] of the result.\n" |
11909 | "/// \\param __i1\n" |
11910 | "/// A 32-bit integral value used to initialize bits [223:192] of the result.\n" |
11911 | "/// \\param __i2\n" |
11912 | "/// A 32-bit integral value used to initialize bits [191:160] of the result.\n" |
11913 | "/// \\param __i3\n" |
11914 | "/// A 32-bit integral value used to initialize bits [159:128] of the result.\n" |
11915 | "/// \\param __i4\n" |
11916 | "/// A 32-bit integral value used to initialize bits [127:96] of the result.\n" |
11917 | "/// \\param __i5\n" |
11918 | "/// A 32-bit integral value used to initialize bits [95:64] of the result.\n" |
11919 | "/// \\param __i6\n" |
11920 | "/// A 32-bit integral value used to initialize bits [63:32] of the result.\n" |
11921 | "/// \\param __i7\n" |
11922 | "/// A 32-bit integral value used to initialize bits [31:0] of the result.\n" |
11923 | "/// \\returns An initialized 256-bit integer vector.\n" |
11924 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
11925 | "_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,\n" |
11926 | " int __i4, int __i5, int __i6, int __i7)\n" |
11927 | "{\n" |
11928 | " return __extension__ (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };\n" |
11929 | "}\n" |
11930 | "\n" |
11931 | "/// Constructs a 256-bit integer vector initialized with the specified\n" |
11932 | "/// 16-bit integral values.\n" |
11933 | "///\n" |
11934 | "/// \\headerfile <x86intrin.h>\n" |
11935 | "///\n" |
11936 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
11937 | "/// instruction.\n" |
11938 | "///\n" |
11939 | "/// \\param __w15\n" |
11940 | "/// A 16-bit integral value used to initialize bits [255:240] of the result.\n" |
11941 | "/// \\param __w14\n" |
11942 | "/// A 16-bit integral value used to initialize bits [239:224] of the result.\n" |
11943 | "/// \\param __w13\n" |
11944 | "/// A 16-bit integral value used to initialize bits [223:208] of the result.\n" |
11945 | "/// \\param __w12\n" |
11946 | "/// A 16-bit integral value used to initialize bits [207:192] of the result.\n" |
11947 | "/// \\param __w11\n" |
11948 | "/// A 16-bit integral value used to initialize bits [191:176] of the result.\n" |
11949 | "/// \\param __w10\n" |
11950 | "/// A 16-bit integral value used to initialize bits [175:160] of the result.\n" |
11951 | "/// \\param __w09\n" |
11952 | "/// A 16-bit integral value used to initialize bits [159:144] of the result.\n" |
11953 | "/// \\param __w08\n" |
11954 | "/// A 16-bit integral value used to initialize bits [143:128] of the result.\n" |
11955 | "/// \\param __w07\n" |
11956 | "/// A 16-bit integral value used to initialize bits [127:112] of the result.\n" |
11957 | "/// \\param __w06\n" |
11958 | "/// A 16-bit integral value used to initialize bits [111:96] of the result.\n" |
11959 | "/// \\param __w05\n" |
11960 | "/// A 16-bit integral value used to initialize bits [95:80] of the result.\n" |
11961 | "/// \\param __w04\n" |
11962 | "/// A 16-bit integral value used to initialize bits [79:64] of the result.\n" |
11963 | "/// \\param __w03\n" |
11964 | "/// A 16-bit integral value used to initialize bits [63:48] of the result.\n" |
11965 | "/// \\param __w02\n" |
11966 | "/// A 16-bit integral value used to initialize bits [47:32] of the result.\n" |
11967 | "/// \\param __w01\n" |
11968 | "/// A 16-bit integral value used to initialize bits [31:16] of the result.\n" |
11969 | "/// \\param __w00\n" |
11970 | "/// A 16-bit integral value used to initialize bits [15:0] of the result.\n" |
11971 | "/// \\returns An initialized 256-bit integer vector.\n" |
11972 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
11973 | "_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,\n" |
11974 | " short __w11, short __w10, short __w09, short __w08,\n" |
11975 | " short __w07, short __w06, short __w05, short __w04,\n" |
11976 | " short __w03, short __w02, short __w01, short __w00)\n" |
11977 | "{\n" |
11978 | " return __extension__ (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,\n" |
11979 | " __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };\n" |
11980 | "}\n" |
11981 | "\n" |
11982 | "/// Constructs a 256-bit integer vector initialized with the specified\n" |
11983 | "/// 8-bit integral values.\n" |
11984 | "///\n" |
11985 | "/// \\headerfile <x86intrin.h>\n" |
11986 | "///\n" |
11987 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
11988 | "/// instruction.\n" |
11989 | "///\n" |
11990 | "/// \\param __b31\n" |
11991 | "/// An 8-bit integral value used to initialize bits [255:248] of the result.\n" |
11992 | "/// \\param __b30\n" |
11993 | "/// An 8-bit integral value used to initialize bits [247:240] of the result.\n" |
11994 | "/// \\param __b29\n" |
11995 | "/// An 8-bit integral value used to initialize bits [239:232] of the result.\n" |
11996 | "/// \\param __b28\n" |
11997 | "/// An 8-bit integral value used to initialize bits [231:224] of the result.\n" |
11998 | "/// \\param __b27\n" |
11999 | "/// An 8-bit integral value used to initialize bits [223:216] of the result.\n" |
12000 | "/// \\param __b26\n" |
12001 | "/// An 8-bit integral value used to initialize bits [215:208] of the result.\n" |
12002 | "/// \\param __b25\n" |
12003 | "/// An 8-bit integral value used to initialize bits [207:200] of the result.\n" |
12004 | "/// \\param __b24\n" |
12005 | "/// An 8-bit integral value used to initialize bits [199:192] of the result.\n" |
12006 | "/// \\param __b23\n" |
12007 | "/// An 8-bit integral value used to initialize bits [191:184] of the result.\n" |
12008 | "/// \\param __b22\n" |
12009 | "/// An 8-bit integral value used to initialize bits [183:176] of the result.\n" |
12010 | "/// \\param __b21\n" |
12011 | "/// An 8-bit integral value used to initialize bits [175:168] of the result.\n" |
12012 | "/// \\param __b20\n" |
12013 | "/// An 8-bit integral value used to initialize bits [167:160] of the result.\n" |
12014 | "/// \\param __b19\n" |
12015 | "/// An 8-bit integral value used to initialize bits [159:152] of the result.\n" |
12016 | "/// \\param __b18\n" |
12017 | "/// An 8-bit integral value used to initialize bits [151:144] of the result.\n" |
12018 | "/// \\param __b17\n" |
12019 | "/// An 8-bit integral value used to initialize bits [143:136] of the result.\n" |
12020 | "/// \\param __b16\n" |
12021 | "/// An 8-bit integral value used to initialize bits [135:128] of the result.\n" |
12022 | "/// \\param __b15\n" |
12023 | "/// An 8-bit integral value used to initialize bits [127:120] of the result.\n" |
12024 | "/// \\param __b14\n" |
12025 | "/// An 8-bit integral value used to initialize bits [119:112] of the result.\n" |
12026 | "/// \\param __b13\n" |
12027 | "/// An 8-bit integral value used to initialize bits [111:104] of the result.\n" |
12028 | "/// \\param __b12\n" |
12029 | "/// An 8-bit integral value used to initialize bits [103:96] of the result.\n" |
12030 | "/// \\param __b11\n" |
12031 | "/// An 8-bit integral value used to initialize bits [95:88] of the result.\n" |
12032 | "/// \\param __b10\n" |
12033 | "/// An 8-bit integral value used to initialize bits [87:80] of the result.\n" |
12034 | "/// \\param __b09\n" |
12035 | "/// An 8-bit integral value used to initialize bits [79:72] of the result.\n" |
12036 | "/// \\param __b08\n" |
12037 | "/// An 8-bit integral value used to initialize bits [71:64] of the result.\n" |
12038 | "/// \\param __b07\n" |
12039 | "/// An 8-bit integral value used to initialize bits [63:56] of the result.\n" |
12040 | "/// \\param __b06\n" |
12041 | "/// An 8-bit integral value used to initialize bits [55:48] of the result.\n" |
12042 | "/// \\param __b05\n" |
12043 | "/// An 8-bit integral value used to initialize bits [47:40] of the result.\n" |
12044 | "/// \\param __b04\n" |
12045 | "/// An 8-bit integral value used to initialize bits [39:32] of the result.\n" |
12046 | "/// \\param __b03\n" |
12047 | "/// An 8-bit integral value used to initialize bits [31:24] of the result.\n" |
12048 | "/// \\param __b02\n" |
12049 | "/// An 8-bit integral value used to initialize bits [23:16] of the result.\n" |
12050 | "/// \\param __b01\n" |
12051 | "/// An 8-bit integral value used to initialize bits [15:8] of the result.\n" |
12052 | "/// \\param __b00\n" |
12053 | "/// An 8-bit integral value used to initialize bits [7:0] of the result.\n" |
12054 | "/// \\returns An initialized 256-bit integer vector.\n" |
12055 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12056 | "_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,\n" |
12057 | " char __b27, char __b26, char __b25, char __b24,\n" |
12058 | " char __b23, char __b22, char __b21, char __b20,\n" |
12059 | " char __b19, char __b18, char __b17, char __b16,\n" |
12060 | " char __b15, char __b14, char __b13, char __b12,\n" |
12061 | " char __b11, char __b10, char __b09, char __b08,\n" |
12062 | " char __b07, char __b06, char __b05, char __b04,\n" |
12063 | " char __b03, char __b02, char __b01, char __b00)\n" |
12064 | "{\n" |
12065 | " return __extension__ (__m256i)(__v32qi){\n" |
12066 | " __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n" |
12067 | " __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n" |
12068 | " __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n" |
12069 | " __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31\n" |
12070 | " };\n" |
12071 | "}\n" |
12072 | "\n" |
12073 | "/// Constructs a 256-bit integer vector initialized with the specified\n" |
12074 | "/// 64-bit integral values.\n" |
12075 | "///\n" |
12076 | "/// \\headerfile <x86intrin.h>\n" |
12077 | "///\n" |
12078 | "/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n" |
12079 | "/// instruction.\n" |
12080 | "///\n" |
12081 | "/// \\param __a\n" |
12082 | "/// A 64-bit integral value used to initialize bits [255:192] of the result.\n" |
12083 | "/// \\param __b\n" |
12084 | "/// A 64-bit integral value used to initialize bits [191:128] of the result.\n" |
12085 | "/// \\param __c\n" |
12086 | "/// A 64-bit integral value used to initialize bits [127:64] of the result.\n" |
12087 | "/// \\param __d\n" |
12088 | "/// A 64-bit integral value used to initialize bits [63:0] of the result.\n" |
12089 | "/// \\returns An initialized 256-bit integer vector.\n" |
12090 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12091 | "_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)\n" |
12092 | "{\n" |
12093 | " return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };\n" |
12094 | "}\n" |
12095 | "\n" |
12096 | "/* Create vectors with elements in reverse order */\n" |
12097 | "/// Constructs a 256-bit floating-point vector of [4 x double],\n" |
12098 | "/// initialized in reverse order with the specified double-precision\n" |
12099 | "/// floating-point values.\n" |
12100 | "///\n" |
12101 | "/// \\headerfile <x86intrin.h>\n" |
12102 | "///\n" |
12103 | "/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n" |
12104 | "/// instruction.\n" |
12105 | "///\n" |
12106 | "/// \\param __a\n" |
12107 | "/// A double-precision floating-point value used to initialize bits [63:0]\n" |
12108 | "/// of the result.\n" |
12109 | "/// \\param __b\n" |
12110 | "/// A double-precision floating-point value used to initialize bits [127:64]\n" |
12111 | "/// of the result.\n" |
12112 | "/// \\param __c\n" |
12113 | "/// A double-precision floating-point value used to initialize bits [191:128]\n" |
12114 | "/// of the result.\n" |
12115 | "/// \\param __d\n" |
12116 | "/// A double-precision floating-point value used to initialize bits [255:192]\n" |
12117 | "/// of the result.\n" |
12118 | "/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n" |
12119 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
12120 | "_mm256_setr_pd(double __a, double __b, double __c, double __d)\n" |
12121 | "{\n" |
12122 | " return _mm256_set_pd(__d, __c, __b, __a);\n" |
12123 | "}\n" |
12124 | "\n" |
12125 | "/// Constructs a 256-bit floating-point vector of [8 x float],\n" |
12126 | "/// initialized in reverse order with the specified single-precision\n" |
12127 | "/// float-point values.\n" |
12128 | "///\n" |
12129 | "/// \\headerfile <x86intrin.h>\n" |
12130 | "///\n" |
12131 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
12132 | "/// instruction.\n" |
12133 | "///\n" |
12134 | "/// \\param __a\n" |
12135 | "/// A single-precision floating-point value used to initialize bits [31:0]\n" |
12136 | "/// of the result.\n" |
12137 | "/// \\param __b\n" |
12138 | "/// A single-precision floating-point value used to initialize bits [63:32]\n" |
12139 | "/// of the result.\n" |
12140 | "/// \\param __c\n" |
12141 | "/// A single-precision floating-point value used to initialize bits [95:64]\n" |
12142 | "/// of the result.\n" |
12143 | "/// \\param __d\n" |
12144 | "/// A single-precision floating-point value used to initialize bits [127:96]\n" |
12145 | "/// of the result.\n" |
12146 | "/// \\param __e\n" |
12147 | "/// A single-precision floating-point value used to initialize bits [159:128]\n" |
12148 | "/// of the result.\n" |
12149 | "/// \\param __f\n" |
12150 | "/// A single-precision floating-point value used to initialize bits [191:160]\n" |
12151 | "/// of the result.\n" |
12152 | "/// \\param __g\n" |
12153 | "/// A single-precision floating-point value used to initialize bits [223:192]\n" |
12154 | "/// of the result.\n" |
12155 | "/// \\param __h\n" |
12156 | "/// A single-precision floating-point value used to initialize bits [255:224]\n" |
12157 | "/// of the result.\n" |
12158 | "/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n" |
12159 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
12160 | "_mm256_setr_ps(float __a, float __b, float __c, float __d,\n" |
12161 | " float __e, float __f, float __g, float __h)\n" |
12162 | "{\n" |
12163 | " return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a);\n" |
12164 | "}\n" |
12165 | "\n" |
12166 | "/// Constructs a 256-bit integer vector, initialized in reverse order\n" |
12167 | "/// with the specified 32-bit integral values.\n" |
12168 | "///\n" |
12169 | "/// \\headerfile <x86intrin.h>\n" |
12170 | "///\n" |
12171 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
12172 | "/// instruction.\n" |
12173 | "///\n" |
12174 | "/// \\param __i0\n" |
12175 | "/// A 32-bit integral value used to initialize bits [31:0] of the result.\n" |
12176 | "/// \\param __i1\n" |
12177 | "/// A 32-bit integral value used to initialize bits [63:32] of the result.\n" |
12178 | "/// \\param __i2\n" |
12179 | "/// A 32-bit integral value used to initialize bits [95:64] of the result.\n" |
12180 | "/// \\param __i3\n" |
12181 | "/// A 32-bit integral value used to initialize bits [127:96] of the result.\n" |
12182 | "/// \\param __i4\n" |
12183 | "/// A 32-bit integral value used to initialize bits [159:128] of the result.\n" |
12184 | "/// \\param __i5\n" |
12185 | "/// A 32-bit integral value used to initialize bits [191:160] of the result.\n" |
12186 | "/// \\param __i6\n" |
12187 | "/// A 32-bit integral value used to initialize bits [223:192] of the result.\n" |
12188 | "/// \\param __i7\n" |
12189 | "/// A 32-bit integral value used to initialize bits [255:224] of the result.\n" |
12190 | "/// \\returns An initialized 256-bit integer vector.\n" |
12191 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12192 | "_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,\n" |
12193 | " int __i4, int __i5, int __i6, int __i7)\n" |
12194 | "{\n" |
12195 | " return _mm256_set_epi32(__i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0);\n" |
12196 | "}\n" |
12197 | "\n" |
12198 | "/// Constructs a 256-bit integer vector, initialized in reverse order\n" |
12199 | "/// with the specified 16-bit integral values.\n" |
12200 | "///\n" |
12201 | "/// \\headerfile <x86intrin.h>\n" |
12202 | "///\n" |
12203 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
12204 | "/// instruction.\n" |
12205 | "///\n" |
12206 | "/// \\param __w15\n" |
12207 | "/// A 16-bit integral value used to initialize bits [15:0] of the result.\n" |
12208 | "/// \\param __w14\n" |
12209 | "/// A 16-bit integral value used to initialize bits [31:16] of the result.\n" |
12210 | "/// \\param __w13\n" |
12211 | "/// A 16-bit integral value used to initialize bits [47:32] of the result.\n" |
12212 | "/// \\param __w12\n" |
12213 | "/// A 16-bit integral value used to initialize bits [63:48] of the result.\n" |
12214 | "/// \\param __w11\n" |
12215 | "/// A 16-bit integral value used to initialize bits [79:64] of the result.\n" |
12216 | "/// \\param __w10\n" |
12217 | "/// A 16-bit integral value used to initialize bits [95:80] of the result.\n" |
12218 | "/// \\param __w09\n" |
12219 | "/// A 16-bit integral value used to initialize bits [111:96] of the result.\n" |
12220 | "/// \\param __w08\n" |
12221 | "/// A 16-bit integral value used to initialize bits [127:112] of the result.\n" |
12222 | "/// \\param __w07\n" |
12223 | "/// A 16-bit integral value used to initialize bits [143:128] of the result.\n" |
12224 | "/// \\param __w06\n" |
12225 | "/// A 16-bit integral value used to initialize bits [159:144] of the result.\n" |
12226 | "/// \\param __w05\n" |
12227 | "/// A 16-bit integral value used to initialize bits [175:160] of the result.\n" |
12228 | "/// \\param __w04\n" |
12229 | "/// A 16-bit integral value used to initialize bits [191:176] of the result.\n" |
12230 | "/// \\param __w03\n" |
12231 | "/// A 16-bit integral value used to initialize bits [207:192] of the result.\n" |
12232 | "/// \\param __w02\n" |
12233 | "/// A 16-bit integral value used to initialize bits [223:208] of the result.\n" |
12234 | "/// \\param __w01\n" |
12235 | "/// A 16-bit integral value used to initialize bits [239:224] of the result.\n" |
12236 | "/// \\param __w00\n" |
12237 | "/// A 16-bit integral value used to initialize bits [255:240] of the result.\n" |
12238 | "/// \\returns An initialized 256-bit integer vector.\n" |
12239 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12240 | "_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,\n" |
12241 | " short __w11, short __w10, short __w09, short __w08,\n" |
12242 | " short __w07, short __w06, short __w05, short __w04,\n" |
12243 | " short __w03, short __w02, short __w01, short __w00)\n" |
12244 | "{\n" |
12245 | " return _mm256_set_epi16(__w00, __w01, __w02, __w03,\n" |
12246 | " __w04, __w05, __w06, __w07,\n" |
12247 | " __w08, __w09, __w10, __w11,\n" |
12248 | " __w12, __w13, __w14, __w15);\n" |
12249 | "}\n" |
12250 | "\n" |
12251 | "/// Constructs a 256-bit integer vector, initialized in reverse order\n" |
12252 | "/// with the specified 8-bit integral values.\n" |
12253 | "///\n" |
12254 | "/// \\headerfile <x86intrin.h>\n" |
12255 | "///\n" |
12256 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
12257 | "/// instruction.\n" |
12258 | "///\n" |
12259 | "/// \\param __b31\n" |
12260 | "/// An 8-bit integral value used to initialize bits [7:0] of the result.\n" |
12261 | "/// \\param __b30\n" |
12262 | "/// An 8-bit integral value used to initialize bits [15:8] of the result.\n" |
12263 | "/// \\param __b29\n" |
12264 | "/// An 8-bit integral value used to initialize bits [23:16] of the result.\n" |
12265 | "/// \\param __b28\n" |
12266 | "/// An 8-bit integral value used to initialize bits [31:24] of the result.\n" |
12267 | "/// \\param __b27\n" |
12268 | "/// An 8-bit integral value used to initialize bits [39:32] of the result.\n" |
12269 | "/// \\param __b26\n" |
12270 | "/// An 8-bit integral value used to initialize bits [47:40] of the result.\n" |
12271 | "/// \\param __b25\n" |
12272 | "/// An 8-bit integral value used to initialize bits [55:48] of the result.\n" |
12273 | "/// \\param __b24\n" |
12274 | "/// An 8-bit integral value used to initialize bits [63:56] of the result.\n" |
12275 | "/// \\param __b23\n" |
12276 | "/// An 8-bit integral value used to initialize bits [71:64] of the result.\n" |
12277 | "/// \\param __b22\n" |
12278 | "/// An 8-bit integral value used to initialize bits [79:72] of the result.\n" |
12279 | "/// \\param __b21\n" |
12280 | "/// An 8-bit integral value used to initialize bits [87:80] of the result.\n" |
12281 | "/// \\param __b20\n" |
12282 | "/// An 8-bit integral value used to initialize bits [95:88] of the result.\n" |
12283 | "/// \\param __b19\n" |
12284 | "/// An 8-bit integral value used to initialize bits [103:96] of the result.\n" |
12285 | "/// \\param __b18\n" |
12286 | "/// An 8-bit integral value used to initialize bits [111:104] of the result.\n" |
12287 | "/// \\param __b17\n" |
12288 | "/// An 8-bit integral value used to initialize bits [119:112] of the result.\n" |
12289 | "/// \\param __b16\n" |
12290 | "/// An 8-bit integral value used to initialize bits [127:120] of the result.\n" |
12291 | "/// \\param __b15\n" |
12292 | "/// An 8-bit integral value used to initialize bits [135:128] of the result.\n" |
12293 | "/// \\param __b14\n" |
12294 | "/// An 8-bit integral value used to initialize bits [143:136] of the result.\n" |
12295 | "/// \\param __b13\n" |
12296 | "/// An 8-bit integral value used to initialize bits [151:144] of the result.\n" |
12297 | "/// \\param __b12\n" |
12298 | "/// An 8-bit integral value used to initialize bits [159:152] of the result.\n" |
12299 | "/// \\param __b11\n" |
12300 | "/// An 8-bit integral value used to initialize bits [167:160] of the result.\n" |
12301 | "/// \\param __b10\n" |
12302 | "/// An 8-bit integral value used to initialize bits [175:168] of the result.\n" |
12303 | "/// \\param __b09\n" |
12304 | "/// An 8-bit integral value used to initialize bits [183:176] of the result.\n" |
12305 | "/// \\param __b08\n" |
12306 | "/// An 8-bit integral value used to initialize bits [191:184] of the result.\n" |
12307 | "/// \\param __b07\n" |
12308 | "/// An 8-bit integral value used to initialize bits [199:192] of the result.\n" |
12309 | "/// \\param __b06\n" |
12310 | "/// An 8-bit integral value used to initialize bits [207:200] of the result.\n" |
12311 | "/// \\param __b05\n" |
12312 | "/// An 8-bit integral value used to initialize bits [215:208] of the result.\n" |
12313 | "/// \\param __b04\n" |
12314 | "/// An 8-bit integral value used to initialize bits [223:216] of the result.\n" |
12315 | "/// \\param __b03\n" |
12316 | "/// An 8-bit integral value used to initialize bits [231:224] of the result.\n" |
12317 | "/// \\param __b02\n" |
12318 | "/// An 8-bit integral value used to initialize bits [239:232] of the result.\n" |
12319 | "/// \\param __b01\n" |
12320 | "/// An 8-bit integral value used to initialize bits [247:240] of the result.\n" |
12321 | "/// \\param __b00\n" |
12322 | "/// An 8-bit integral value used to initialize bits [255:248] of the result.\n" |
12323 | "/// \\returns An initialized 256-bit integer vector.\n" |
12324 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12325 | "_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,\n" |
12326 | " char __b27, char __b26, char __b25, char __b24,\n" |
12327 | " char __b23, char __b22, char __b21, char __b20,\n" |
12328 | " char __b19, char __b18, char __b17, char __b16,\n" |
12329 | " char __b15, char __b14, char __b13, char __b12,\n" |
12330 | " char __b11, char __b10, char __b09, char __b08,\n" |
12331 | " char __b07, char __b06, char __b05, char __b04,\n" |
12332 | " char __b03, char __b02, char __b01, char __b00)\n" |
12333 | "{\n" |
12334 | " return _mm256_set_epi8(__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n" |
12335 | " __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n" |
12336 | " __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n" |
12337 | " __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31);\n" |
12338 | "}\n" |
12339 | "\n" |
12340 | "/// Constructs a 256-bit integer vector, initialized in reverse order\n" |
12341 | "/// with the specified 64-bit integral values.\n" |
12342 | "///\n" |
12343 | "/// \\headerfile <x86intrin.h>\n" |
12344 | "///\n" |
12345 | "/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n" |
12346 | "/// instruction.\n" |
12347 | "///\n" |
12348 | "/// \\param __a\n" |
12349 | "/// A 64-bit integral value used to initialize bits [63:0] of the result.\n" |
12350 | "/// \\param __b\n" |
12351 | "/// A 64-bit integral value used to initialize bits [127:64] of the result.\n" |
12352 | "/// \\param __c\n" |
12353 | "/// A 64-bit integral value used to initialize bits [191:128] of the result.\n" |
12354 | "/// \\param __d\n" |
12355 | "/// A 64-bit integral value used to initialize bits [255:192] of the result.\n" |
12356 | "/// \\returns An initialized 256-bit integer vector.\n" |
12357 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12358 | "_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)\n" |
12359 | "{\n" |
12360 | " return _mm256_set_epi64x(__d, __c, __b, __a);\n" |
12361 | "}\n" |
12362 | "\n" |
12363 | "/* Create vectors with repeated elements */\n" |
12364 | "/// Constructs a 256-bit floating-point vector of [4 x double], with each\n" |
12365 | "/// of the four double-precision floating-point vector elements set to the\n" |
12366 | "/// specified double-precision floating-point value.\n" |
12367 | "///\n" |
12368 | "/// \\headerfile <x86intrin.h>\n" |
12369 | "///\n" |
12370 | "/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n" |
12371 | "///\n" |
12372 | "/// \\param __w\n" |
12373 | "/// A double-precision floating-point value used to initialize each vector\n" |
12374 | "/// element of the result.\n" |
12375 | "/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n" |
12376 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
12377 | "_mm256_set1_pd(double __w)\n" |
12378 | "{\n" |
12379 | " return _mm256_set_pd(__w, __w, __w, __w);\n" |
12380 | "}\n" |
12381 | "\n" |
12382 | "/// Constructs a 256-bit floating-point vector of [8 x float], with each\n" |
12383 | "/// of the eight single-precision floating-point vector elements set to the\n" |
12384 | "/// specified single-precision floating-point value.\n" |
12385 | "///\n" |
12386 | "/// \\headerfile <x86intrin.h>\n" |
12387 | "///\n" |
12388 | "/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n" |
12389 | "/// instruction.\n" |
12390 | "///\n" |
12391 | "/// \\param __w\n" |
12392 | "/// A single-precision floating-point value used to initialize each vector\n" |
12393 | "/// element of the result.\n" |
12394 | "/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n" |
12395 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
12396 | "_mm256_set1_ps(float __w)\n" |
12397 | "{\n" |
12398 | " return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w);\n" |
12399 | "}\n" |
12400 | "\n" |
12401 | "/// Constructs a 256-bit integer vector of [8 x i32], with each of the\n" |
12402 | "/// 32-bit integral vector elements set to the specified 32-bit integral\n" |
12403 | "/// value.\n" |
12404 | "///\n" |
12405 | "/// \\headerfile <x86intrin.h>\n" |
12406 | "///\n" |
12407 | "/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n" |
12408 | "/// instruction.\n" |
12409 | "///\n" |
12410 | "/// \\param __i\n" |
12411 | "/// A 32-bit integral value used to initialize each vector element of the\n" |
12412 | "/// result.\n" |
12413 | "/// \\returns An initialized 256-bit integer vector of [8 x i32].\n" |
12414 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12415 | "_mm256_set1_epi32(int __i)\n" |
12416 | "{\n" |
12417 | " return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i);\n" |
12418 | "}\n" |
12419 | "\n" |
12420 | "/// Constructs a 256-bit integer vector of [16 x i16], with each of the\n" |
12421 | "/// 16-bit integral vector elements set to the specified 16-bit integral\n" |
12422 | "/// value.\n" |
12423 | "///\n" |
12424 | "/// \\headerfile <x86intrin.h>\n" |
12425 | "///\n" |
12426 | "/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n" |
12427 | "///\n" |
12428 | "/// \\param __w\n" |
12429 | "/// A 16-bit integral value used to initialize each vector element of the\n" |
12430 | "/// result.\n" |
12431 | "/// \\returns An initialized 256-bit integer vector of [16 x i16].\n" |
12432 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12433 | "_mm256_set1_epi16(short __w)\n" |
12434 | "{\n" |
12435 | " return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w,\n" |
12436 | " __w, __w, __w, __w, __w, __w, __w, __w);\n" |
12437 | "}\n" |
12438 | "\n" |
12439 | "/// Constructs a 256-bit integer vector of [32 x i8], with each of the\n" |
12440 | "/// 8-bit integral vector elements set to the specified 8-bit integral value.\n" |
12441 | "///\n" |
12442 | "/// \\headerfile <x86intrin.h>\n" |
12443 | "///\n" |
12444 | "/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n" |
12445 | "///\n" |
12446 | "/// \\param __b\n" |
12447 | "/// An 8-bit integral value used to initialize each vector element of the\n" |
12448 | "/// result.\n" |
12449 | "/// \\returns An initialized 256-bit integer vector of [32 x i8].\n" |
12450 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12451 | "_mm256_set1_epi8(char __b)\n" |
12452 | "{\n" |
12453 | " return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b,\n" |
12454 | " __b, __b, __b, __b, __b, __b, __b, __b,\n" |
12455 | " __b, __b, __b, __b, __b, __b, __b, __b,\n" |
12456 | " __b, __b, __b, __b, __b, __b, __b, __b);\n" |
12457 | "}\n" |
12458 | "\n" |
12459 | "/// Constructs a 256-bit integer vector of [4 x i64], with each of the\n" |
12460 | "/// 64-bit integral vector elements set to the specified 64-bit integral\n" |
12461 | "/// value.\n" |
12462 | "///\n" |
12463 | "/// \\headerfile <x86intrin.h>\n" |
12464 | "///\n" |
12465 | "/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n" |
12466 | "///\n" |
12467 | "/// \\param __q\n" |
12468 | "/// A 64-bit integral value used to initialize each vector element of the\n" |
12469 | "/// result.\n" |
12470 | "/// \\returns An initialized 256-bit integer vector of [4 x i64].\n" |
12471 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12472 | "_mm256_set1_epi64x(long long __q)\n" |
12473 | "{\n" |
12474 | " return _mm256_set_epi64x(__q, __q, __q, __q);\n" |
12475 | "}\n" |
12476 | "\n" |
12477 | "/* Create __zeroed vectors */\n" |
12478 | "/// Constructs a 256-bit floating-point vector of [4 x double] with all\n" |
12479 | "/// vector elements initialized to zero.\n" |
12480 | "///\n" |
12481 | "/// \\headerfile <x86intrin.h>\n" |
12482 | "///\n" |
12483 | "/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n" |
12484 | "///\n" |
12485 | "/// \\returns A 256-bit vector of [4 x double] with all elements set to zero.\n" |
12486 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
12487 | "_mm256_setzero_pd(void)\n" |
12488 | "{\n" |
12489 | " return __extension__ (__m256d){ 0, 0, 0, 0 };\n" |
12490 | "}\n" |
12491 | "\n" |
12492 | "/// Constructs a 256-bit floating-point vector of [8 x float] with all\n" |
12493 | "/// vector elements initialized to zero.\n" |
12494 | "///\n" |
12495 | "/// \\headerfile <x86intrin.h>\n" |
12496 | "///\n" |
12497 | "/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n" |
12498 | "///\n" |
12499 | "/// \\returns A 256-bit vector of [8 x float] with all elements set to zero.\n" |
12500 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
12501 | "_mm256_setzero_ps(void)\n" |
12502 | "{\n" |
12503 | " return __extension__ (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };\n" |
12504 | "}\n" |
12505 | "\n" |
12506 | "/// Constructs a 256-bit integer vector initialized to zero.\n" |
12507 | "///\n" |
12508 | "/// \\headerfile <x86intrin.h>\n" |
12509 | "///\n" |
12510 | "/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n" |
12511 | "///\n" |
12512 | "/// \\returns A 256-bit integer vector initialized to zero.\n" |
12513 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12514 | "_mm256_setzero_si256(void)\n" |
12515 | "{\n" |
12516 | " return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };\n" |
12517 | "}\n" |
12518 | "\n" |
12519 | "/* Cast between vector types */\n" |
12520 | "/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n" |
12521 | "/// floating-point vector of [8 x float].\n" |
12522 | "///\n" |
12523 | "/// \\headerfile <x86intrin.h>\n" |
12524 | "///\n" |
12525 | "/// This intrinsic has no corresponding instruction.\n" |
12526 | "///\n" |
12527 | "/// \\param __a\n" |
12528 | "/// A 256-bit floating-point vector of [4 x double].\n" |
12529 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n" |
12530 | "/// bitwise pattern as the parameter.\n" |
12531 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
12532 | "_mm256_castpd_ps(__m256d __a)\n" |
12533 | "{\n" |
12534 | " return (__m256)__a;\n" |
12535 | "}\n" |
12536 | "\n" |
12537 | "/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n" |
12538 | "/// integer vector.\n" |
12539 | "///\n" |
12540 | "/// \\headerfile <x86intrin.h>\n" |
12541 | "///\n" |
12542 | "/// This intrinsic has no corresponding instruction.\n" |
12543 | "///\n" |
12544 | "/// \\param __a\n" |
12545 | "/// A 256-bit floating-point vector of [4 x double].\n" |
12546 | "/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n" |
12547 | "/// parameter.\n" |
12548 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12549 | "_mm256_castpd_si256(__m256d __a)\n" |
12550 | "{\n" |
12551 | " return (__m256i)__a;\n" |
12552 | "}\n" |
12553 | "\n" |
12554 | "/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n" |
12555 | "/// floating-point vector of [4 x double].\n" |
12556 | "///\n" |
12557 | "/// \\headerfile <x86intrin.h>\n" |
12558 | "///\n" |
12559 | "/// This intrinsic has no corresponding instruction.\n" |
12560 | "///\n" |
12561 | "/// \\param __a\n" |
12562 | "/// A 256-bit floating-point vector of [8 x float].\n" |
12563 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n" |
12564 | "/// bitwise pattern as the parameter.\n" |
12565 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
12566 | "_mm256_castps_pd(__m256 __a)\n" |
12567 | "{\n" |
12568 | " return (__m256d)__a;\n" |
12569 | "}\n" |
12570 | "\n" |
12571 | "/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n" |
12572 | "/// integer vector.\n" |
12573 | "///\n" |
12574 | "/// \\headerfile <x86intrin.h>\n" |
12575 | "///\n" |
12576 | "/// This intrinsic has no corresponding instruction.\n" |
12577 | "///\n" |
12578 | "/// \\param __a\n" |
12579 | "/// A 256-bit floating-point vector of [8 x float].\n" |
12580 | "/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n" |
12581 | "/// parameter.\n" |
12582 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12583 | "_mm256_castps_si256(__m256 __a)\n" |
12584 | "{\n" |
12585 | " return (__m256i)__a;\n" |
12586 | "}\n" |
12587 | "\n" |
12588 | "/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n" |
12589 | "/// of [8 x float].\n" |
12590 | "///\n" |
12591 | "/// \\headerfile <x86intrin.h>\n" |
12592 | "///\n" |
12593 | "/// This intrinsic has no corresponding instruction.\n" |
12594 | "///\n" |
12595 | "/// \\param __a\n" |
12596 | "/// A 256-bit integer vector.\n" |
12597 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n" |
12598 | "/// bitwise pattern as the parameter.\n" |
12599 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
12600 | "_mm256_castsi256_ps(__m256i __a)\n" |
12601 | "{\n" |
12602 | " return (__m256)__a;\n" |
12603 | "}\n" |
12604 | "\n" |
12605 | "/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n" |
12606 | "/// of [4 x double].\n" |
12607 | "///\n" |
12608 | "/// \\headerfile <x86intrin.h>\n" |
12609 | "///\n" |
12610 | "/// This intrinsic has no corresponding instruction.\n" |
12611 | "///\n" |
12612 | "/// \\param __a\n" |
12613 | "/// A 256-bit integer vector.\n" |
12614 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n" |
12615 | "/// bitwise pattern as the parameter.\n" |
12616 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
12617 | "_mm256_castsi256_pd(__m256i __a)\n" |
12618 | "{\n" |
12619 | " return (__m256d)__a;\n" |
12620 | "}\n" |
12621 | "\n" |
12622 | "/// Returns the lower 128 bits of a 256-bit floating-point vector of\n" |
12623 | "/// [4 x double] as a 128-bit floating-point vector of [2 x double].\n" |
12624 | "///\n" |
12625 | "/// \\headerfile <x86intrin.h>\n" |
12626 | "///\n" |
12627 | "/// This intrinsic has no corresponding instruction.\n" |
12628 | "///\n" |
12629 | "/// \\param __a\n" |
12630 | "/// A 256-bit floating-point vector of [4 x double].\n" |
12631 | "/// \\returns A 128-bit floating-point vector of [2 x double] containing the\n" |
12632 | "/// lower 128 bits of the parameter.\n" |
12633 | "static __inline __m128d __DEFAULT_FN_ATTRS\n" |
12634 | "_mm256_castpd256_pd128(__m256d __a)\n" |
12635 | "{\n" |
12636 | " return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);\n" |
12637 | "}\n" |
12638 | "\n" |
12639 | "/// Returns the lower 128 bits of a 256-bit floating-point vector of\n" |
12640 | "/// [8 x float] as a 128-bit floating-point vector of [4 x float].\n" |
12641 | "///\n" |
12642 | "/// \\headerfile <x86intrin.h>\n" |
12643 | "///\n" |
12644 | "/// This intrinsic has no corresponding instruction.\n" |
12645 | "///\n" |
12646 | "/// \\param __a\n" |
12647 | "/// A 256-bit floating-point vector of [8 x float].\n" |
12648 | "/// \\returns A 128-bit floating-point vector of [4 x float] containing the\n" |
12649 | "/// lower 128 bits of the parameter.\n" |
12650 | "static __inline __m128 __DEFAULT_FN_ATTRS\n" |
12651 | "_mm256_castps256_ps128(__m256 __a)\n" |
12652 | "{\n" |
12653 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);\n" |
12654 | "}\n" |
12655 | "\n" |
12656 | "/// Truncates a 256-bit integer vector into a 128-bit integer vector.\n" |
12657 | "///\n" |
12658 | "/// \\headerfile <x86intrin.h>\n" |
12659 | "///\n" |
12660 | "/// This intrinsic has no corresponding instruction.\n" |
12661 | "///\n" |
12662 | "/// \\param __a\n" |
12663 | "/// A 256-bit integer vector.\n" |
12664 | "/// \\returns A 128-bit integer vector containing the lower 128 bits of the\n" |
12665 | "/// parameter.\n" |
12666 | "static __inline __m128i __DEFAULT_FN_ATTRS\n" |
12667 | "_mm256_castsi256_si128(__m256i __a)\n" |
12668 | "{\n" |
12669 | " return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);\n" |
12670 | "}\n" |
12671 | "\n" |
12672 | "/// Constructs a 256-bit floating-point vector of [4 x double] from a\n" |
12673 | "/// 128-bit floating-point vector of [2 x double].\n" |
12674 | "///\n" |
12675 | "/// The lower 128 bits contain the value of the source vector. The contents\n" |
12676 | "/// of the upper 128 bits are undefined.\n" |
12677 | "///\n" |
12678 | "/// \\headerfile <x86intrin.h>\n" |
12679 | "///\n" |
12680 | "/// This intrinsic has no corresponding instruction.\n" |
12681 | "///\n" |
12682 | "/// \\param __a\n" |
12683 | "/// A 128-bit vector of [2 x double].\n" |
12684 | "/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n" |
12685 | "/// contain the value of the parameter. The contents of the upper 128 bits\n" |
12686 | "/// are undefined.\n" |
12687 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
12688 | "_mm256_castpd128_pd256(__m128d __a)\n" |
12689 | "{\n" |
12690 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1);\n" |
12691 | "}\n" |
12692 | "\n" |
12693 | "/// Constructs a 256-bit floating-point vector of [8 x float] from a\n" |
12694 | "/// 128-bit floating-point vector of [4 x float].\n" |
12695 | "///\n" |
12696 | "/// The lower 128 bits contain the value of the source vector. The contents\n" |
12697 | "/// of the upper 128 bits are undefined.\n" |
12698 | "///\n" |
12699 | "/// \\headerfile <x86intrin.h>\n" |
12700 | "///\n" |
12701 | "/// This intrinsic has no corresponding instruction.\n" |
12702 | "///\n" |
12703 | "/// \\param __a\n" |
12704 | "/// A 128-bit vector of [4 x float].\n" |
12705 | "/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n" |
12706 | "/// contain the value of the parameter. The contents of the upper 128 bits\n" |
12707 | "/// are undefined.\n" |
12708 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
12709 | "_mm256_castps128_ps256(__m128 __a)\n" |
12710 | "{\n" |
12711 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1);\n" |
12712 | "}\n" |
12713 | "\n" |
12714 | "/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n" |
12715 | "///\n" |
12716 | "/// The lower 128 bits contain the value of the source vector. The contents\n" |
12717 | "/// of the upper 128 bits are undefined.\n" |
12718 | "///\n" |
12719 | "/// \\headerfile <x86intrin.h>\n" |
12720 | "///\n" |
12721 | "/// This intrinsic has no corresponding instruction.\n" |
12722 | "///\n" |
12723 | "/// \\param __a\n" |
12724 | "/// A 128-bit integer vector.\n" |
12725 | "/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n" |
12726 | "/// the parameter. The contents of the upper 128 bits are undefined.\n" |
12727 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12728 | "_mm256_castsi128_si256(__m128i __a)\n" |
12729 | "{\n" |
12730 | " return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1);\n" |
12731 | "}\n" |
12732 | "\n" |
12733 | "/// Constructs a 256-bit floating-point vector of [4 x double] from a\n" |
12734 | "/// 128-bit floating-point vector of [2 x double]. The lower 128 bits\n" |
12735 | "/// contain the value of the source vector. The upper 128 bits are set\n" |
12736 | "/// to zero.\n" |
12737 | "///\n" |
12738 | "/// \\headerfile <x86intrin.h>\n" |
12739 | "///\n" |
12740 | "/// This intrinsic has no corresponding instruction.\n" |
12741 | "///\n" |
12742 | "/// \\param __a\n" |
12743 | "/// A 128-bit vector of [2 x double].\n" |
12744 | "/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n" |
12745 | "/// contain the value of the parameter. The upper 128 bits are set to zero.\n" |
12746 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
12747 | "_mm256_zextpd128_pd256(__m128d __a)\n" |
12748 | "{\n" |
12749 | " return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);\n" |
12750 | "}\n" |
12751 | "\n" |
12752 | "/// Constructs a 256-bit floating-point vector of [8 x float] from a\n" |
12753 | "/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain\n" |
12754 | "/// the value of the source vector. The upper 128 bits are set to zero.\n" |
12755 | "///\n" |
12756 | "/// \\headerfile <x86intrin.h>\n" |
12757 | "///\n" |
12758 | "/// This intrinsic has no corresponding instruction.\n" |
12759 | "///\n" |
12760 | "/// \\param __a\n" |
12761 | "/// A 128-bit vector of [4 x float].\n" |
12762 | "/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n" |
12763 | "/// contain the value of the parameter. The upper 128 bits are set to zero.\n" |
12764 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
12765 | "_mm256_zextps128_ps256(__m128 __a)\n" |
12766 | "{\n" |
12767 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);\n" |
12768 | "}\n" |
12769 | "\n" |
12770 | "/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n" |
12771 | "/// The lower 128 bits contain the value of the source vector. The upper\n" |
12772 | "/// 128 bits are set to zero.\n" |
12773 | "///\n" |
12774 | "/// \\headerfile <x86intrin.h>\n" |
12775 | "///\n" |
12776 | "/// This intrinsic has no corresponding instruction.\n" |
12777 | "///\n" |
12778 | "/// \\param __a\n" |
12779 | "/// A 128-bit integer vector.\n" |
12780 | "/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n" |
12781 | "/// the parameter. The upper 128 bits are set to zero.\n" |
12782 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
12783 | "_mm256_zextsi128_si256(__m128i __a)\n" |
12784 | "{\n" |
12785 | " return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);\n" |
12786 | "}\n" |
12787 | "\n" |
12788 | "/*\n" |
12789 | " Vector insert.\n" |
12790 | " We use macros rather than inlines because we only want to accept\n" |
12791 | " invocations where the immediate M is a constant expression.\n" |
12792 | "*/\n" |
12793 | "/// Constructs a new 256-bit vector of [8 x float] by first duplicating\n" |
12794 | "/// a 256-bit vector of [8 x float] given in the first parameter, and then\n" |
12795 | "/// replacing either the upper or the lower 128 bits with the contents of a\n" |
12796 | "/// 128-bit vector of [4 x float] in the second parameter.\n" |
12797 | "///\n" |
12798 | "/// The immediate integer parameter determines between the upper or the lower\n" |
12799 | "/// 128 bits.\n" |
12800 | "///\n" |
12801 | "/// \\headerfile <x86intrin.h>\n" |
12802 | "///\n" |
12803 | "/// \\code\n" |
12804 | "/// __m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M);\n" |
12805 | "/// \\endcode\n" |
12806 | "///\n" |
12807 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
12808 | "///\n" |
12809 | "/// \\param V1\n" |
12810 | "/// A 256-bit vector of [8 x float]. This vector is copied to the result\n" |
12811 | "/// first, and then either the upper or the lower 128 bits of the result will\n" |
12812 | "/// be replaced by the contents of \\a V2.\n" |
12813 | "/// \\param V2\n" |
12814 | "/// A 128-bit vector of [4 x float]. The contents of this parameter are\n" |
12815 | "/// written to either the upper or the lower 128 bits of the result depending\n" |
12816 | "/// on the value of parameter \\a M.\n" |
12817 | "/// \\param M\n" |
12818 | "/// An immediate integer. The least significant bit determines how the values\n" |
12819 | "/// from the two parameters are interleaved: \\n\n" |
12820 | "/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n" |
12821 | "/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n" |
12822 | "/// result. \\n\n" |
12823 | "/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n" |
12824 | "/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n" |
12825 | "/// result.\n" |
12826 | "/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n" |
12827 | "#define _mm256_insertf128_ps(V1, V2, M) \\\n" |
12828 | " (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \\\n" |
12829 | " (__v4sf)(__m128)(V2), (int)(M))\n" |
12830 | "\n" |
12831 | "/// Constructs a new 256-bit vector of [4 x double] by first duplicating\n" |
12832 | "/// a 256-bit vector of [4 x double] given in the first parameter, and then\n" |
12833 | "/// replacing either the upper or the lower 128 bits with the contents of a\n" |
12834 | "/// 128-bit vector of [2 x double] in the second parameter.\n" |
12835 | "///\n" |
12836 | "/// The immediate integer parameter determines between the upper or the lower\n" |
12837 | "/// 128 bits.\n" |
12838 | "///\n" |
12839 | "/// \\headerfile <x86intrin.h>\n" |
12840 | "///\n" |
12841 | "/// \\code\n" |
12842 | "/// __m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M);\n" |
12843 | "/// \\endcode\n" |
12844 | "///\n" |
12845 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
12846 | "///\n" |
12847 | "/// \\param V1\n" |
12848 | "/// A 256-bit vector of [4 x double]. This vector is copied to the result\n" |
12849 | "/// first, and then either the upper or the lower 128 bits of the result will\n" |
12850 | "/// be replaced by the contents of \\a V2.\n" |
12851 | "/// \\param V2\n" |
12852 | "/// A 128-bit vector of [2 x double]. The contents of this parameter are\n" |
12853 | "/// written to either the upper or the lower 128 bits of the result depending\n" |
12854 | "/// on the value of parameter \\a M.\n" |
12855 | "/// \\param M\n" |
12856 | "/// An immediate integer. The least significant bit determines how the values\n" |
12857 | "/// from the two parameters are interleaved: \\n\n" |
12858 | "/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n" |
12859 | "/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n" |
12860 | "/// result. \\n\n" |
12861 | "/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n" |
12862 | "/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n" |
12863 | "/// result.\n" |
12864 | "/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n" |
12865 | "#define _mm256_insertf128_pd(V1, V2, M) \\\n" |
12866 | " (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \\\n" |
12867 | " (__v2df)(__m128d)(V2), (int)(M))\n" |
12868 | "\n" |
12869 | "/// Constructs a new 256-bit integer vector by first duplicating a\n" |
12870 | "/// 256-bit integer vector given in the first parameter, and then replacing\n" |
12871 | "/// either the upper or the lower 128 bits with the contents of a 128-bit\n" |
12872 | "/// integer vector in the second parameter.\n" |
12873 | "///\n" |
12874 | "/// The immediate integer parameter determines between the upper or the lower\n" |
12875 | "/// 128 bits.\n" |
12876 | "///\n" |
12877 | "/// \\headerfile <x86intrin.h>\n" |
12878 | "///\n" |
12879 | "/// \\code\n" |
12880 | "/// __m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M);\n" |
12881 | "/// \\endcode\n" |
12882 | "///\n" |
12883 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
12884 | "///\n" |
12885 | "/// \\param V1\n" |
12886 | "/// A 256-bit integer vector. This vector is copied to the result first, and\n" |
12887 | "/// then either the upper or the lower 128 bits of the result will be\n" |
12888 | "/// replaced by the contents of \\a V2.\n" |
12889 | "/// \\param V2\n" |
12890 | "/// A 128-bit integer vector. The contents of this parameter are written to\n" |
12891 | "/// either the upper or the lower 128 bits of the result depending on the\n" |
12892 | "/// value of parameter \\a M.\n" |
12893 | "/// \\param M\n" |
12894 | "/// An immediate integer. The least significant bit determines how the values\n" |
12895 | "/// from the two parameters are interleaved: \\n\n" |
12896 | "/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n" |
12897 | "/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n" |
12898 | "/// result. \\n\n" |
12899 | "/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n" |
12900 | "/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n" |
12901 | "/// result.\n" |
12902 | "/// \\returns A 256-bit integer vector containing the interleaved values.\n" |
12903 | "#define _mm256_insertf128_si256(V1, V2, M) \\\n" |
12904 | " (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \\\n" |
12905 | " (__v4si)(__m128i)(V2), (int)(M))\n" |
12906 | "\n" |
12907 | "/*\n" |
12908 | " Vector extract.\n" |
12909 | " We use macros rather than inlines because we only want to accept\n" |
12910 | " invocations where the immediate M is a constant expression.\n" |
12911 | "*/\n" |
12912 | "/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n" |
12913 | "/// of [8 x float], as determined by the immediate integer parameter, and\n" |
12914 | "/// returns the extracted bits as a 128-bit vector of [4 x float].\n" |
12915 | "///\n" |
12916 | "/// \\headerfile <x86intrin.h>\n" |
12917 | "///\n" |
12918 | "/// \\code\n" |
12919 | "/// __m128 _mm256_extractf128_ps(__m256 V, const int M);\n" |
12920 | "/// \\endcode\n" |
12921 | "///\n" |
12922 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n" |
12923 | "///\n" |
12924 | "/// \\param V\n" |
12925 | "/// A 256-bit vector of [8 x float].\n" |
12926 | "/// \\param M\n" |
12927 | "/// An immediate integer. The least significant bit determines which bits are\n" |
12928 | "/// extracted from the first parameter: \\n\n" |
12929 | "/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n" |
12930 | "/// result. \\n\n" |
12931 | "/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n" |
12932 | "/// \\returns A 128-bit vector of [4 x float] containing the extracted bits.\n" |
12933 | "#define _mm256_extractf128_ps(V, M) \\\n" |
12934 | " (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))\n" |
12935 | "\n" |
12936 | "/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n" |
12937 | "/// of [4 x double], as determined by the immediate integer parameter, and\n" |
12938 | "/// returns the extracted bits as a 128-bit vector of [2 x double].\n" |
12939 | "///\n" |
12940 | "/// \\headerfile <x86intrin.h>\n" |
12941 | "///\n" |
12942 | "/// \\code\n" |
12943 | "/// __m128d _mm256_extractf128_pd(__m256d V, const int M);\n" |
12944 | "/// \\endcode\n" |
12945 | "///\n" |
12946 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n" |
12947 | "///\n" |
12948 | "/// \\param V\n" |
12949 | "/// A 256-bit vector of [4 x double].\n" |
12950 | "/// \\param M\n" |
12951 | "/// An immediate integer. The least significant bit determines which bits are\n" |
12952 | "/// extracted from the first parameter: \\n\n" |
12953 | "/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n" |
12954 | "/// result. \\n\n" |
12955 | "/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n" |
12956 | "/// \\returns A 128-bit vector of [2 x double] containing the extracted bits.\n" |
12957 | "#define _mm256_extractf128_pd(V, M) \\\n" |
12958 | " (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))\n" |
12959 | "\n" |
12960 | "/// Extracts either the upper or the lower 128 bits from a 256-bit\n" |
12961 | "/// integer vector, as determined by the immediate integer parameter, and\n" |
12962 | "/// returns the extracted bits as a 128-bit integer vector.\n" |
12963 | "///\n" |
12964 | "/// \\headerfile <x86intrin.h>\n" |
12965 | "///\n" |
12966 | "/// \\code\n" |
12967 | "/// __m128i _mm256_extractf128_si256(__m256i V, const int M);\n" |
12968 | "/// \\endcode\n" |
12969 | "///\n" |
12970 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n" |
12971 | "///\n" |
12972 | "/// \\param V\n" |
12973 | "/// A 256-bit integer vector.\n" |
12974 | "/// \\param M\n" |
12975 | "/// An immediate integer. The least significant bit determines which bits are\n" |
12976 | "/// extracted from the first parameter: \\n\n" |
12977 | "/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n" |
12978 | "/// result. \\n\n" |
12979 | "/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n" |
12980 | "/// \\returns A 128-bit integer vector containing the extracted bits.\n" |
12981 | "#define _mm256_extractf128_si256(V, M) \\\n" |
12982 | " (__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))\n" |
12983 | "\n" |
12984 | "/* SIMD load ops (unaligned) */\n" |
12985 | "/// Loads two 128-bit floating-point vectors of [4 x float] from\n" |
12986 | "/// unaligned memory locations and constructs a 256-bit floating-point vector\n" |
12987 | "/// of [8 x float] by concatenating the two 128-bit vectors.\n" |
12988 | "///\n" |
12989 | "/// \\headerfile <x86intrin.h>\n" |
12990 | "///\n" |
12991 | "/// This intrinsic corresponds to load instructions followed by the\n" |
12992 | "/// <c> VINSERTF128 </c> instruction.\n" |
12993 | "///\n" |
12994 | "/// \\param __addr_hi\n" |
12995 | "/// A pointer to a 128-bit memory location containing 4 consecutive\n" |
12996 | "/// single-precision floating-point values. These values are to be copied to\n" |
12997 | "/// bits[255:128] of the result. The address of the memory location does not\n" |
12998 | "/// have to be aligned.\n" |
12999 | "/// \\param __addr_lo\n" |
13000 | "/// A pointer to a 128-bit memory location containing 4 consecutive\n" |
13001 | "/// single-precision floating-point values. These values are to be copied to\n" |
13002 | "/// bits[127:0] of the result. The address of the memory location does not\n" |
13003 | "/// have to be aligned.\n" |
13004 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n" |
13005 | "/// concatenated result.\n" |
13006 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
13007 | "_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)\n" |
13008 | "{\n" |
13009 | " __m256 __v256 = _mm256_castps128_ps256(_mm_loadu_ps(__addr_lo));\n" |
13010 | " return _mm256_insertf128_ps(__v256, _mm_loadu_ps(__addr_hi), 1);\n" |
13011 | "}\n" |
13012 | "\n" |
13013 | "/// Loads two 128-bit floating-point vectors of [2 x double] from\n" |
13014 | "/// unaligned memory locations and constructs a 256-bit floating-point vector\n" |
13015 | "/// of [4 x double] by concatenating the two 128-bit vectors.\n" |
13016 | "///\n" |
13017 | "/// \\headerfile <x86intrin.h>\n" |
13018 | "///\n" |
13019 | "/// This intrinsic corresponds to load instructions followed by the\n" |
13020 | "/// <c> VINSERTF128 </c> instruction.\n" |
13021 | "///\n" |
13022 | "/// \\param __addr_hi\n" |
13023 | "/// A pointer to a 128-bit memory location containing two consecutive\n" |
13024 | "/// double-precision floating-point values. These values are to be copied to\n" |
13025 | "/// bits[255:128] of the result. The address of the memory location does not\n" |
13026 | "/// have to be aligned.\n" |
13027 | "/// \\param __addr_lo\n" |
13028 | "/// A pointer to a 128-bit memory location containing two consecutive\n" |
13029 | "/// double-precision floating-point values. These values are to be copied to\n" |
13030 | "/// bits[127:0] of the result. The address of the memory location does not\n" |
13031 | "/// have to be aligned.\n" |
13032 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n" |
13033 | "/// concatenated result.\n" |
13034 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
13035 | "_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)\n" |
13036 | "{\n" |
13037 | " __m256d __v256 = _mm256_castpd128_pd256(_mm_loadu_pd(__addr_lo));\n" |
13038 | " return _mm256_insertf128_pd(__v256, _mm_loadu_pd(__addr_hi), 1);\n" |
13039 | "}\n" |
13040 | "\n" |
13041 | "/// Loads two 128-bit integer vectors from unaligned memory locations and\n" |
13042 | "/// constructs a 256-bit integer vector by concatenating the two 128-bit\n" |
13043 | "/// vectors.\n" |
13044 | "///\n" |
13045 | "/// \\headerfile <x86intrin.h>\n" |
13046 | "///\n" |
13047 | "/// This intrinsic corresponds to load instructions followed by the\n" |
13048 | "/// <c> VINSERTF128 </c> instruction.\n" |
13049 | "///\n" |
13050 | "/// \\param __addr_hi\n" |
13051 | "/// A pointer to a 128-bit memory location containing a 128-bit integer\n" |
13052 | "/// vector. This vector is to be copied to bits[255:128] of the result. The\n" |
13053 | "/// address of the memory location does not have to be aligned.\n" |
13054 | "/// \\param __addr_lo\n" |
13055 | "/// A pointer to a 128-bit memory location containing a 128-bit integer\n" |
13056 | "/// vector. This vector is to be copied to bits[127:0] of the result. The\n" |
13057 | "/// address of the memory location does not have to be aligned.\n" |
13058 | "/// \\returns A 256-bit integer vector containing the concatenated result.\n" |
13059 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
13060 | "_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)\n" |
13061 | "{\n" |
13062 | " __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));\n" |
13063 | " return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);\n" |
13064 | "}\n" |
13065 | "\n" |
13066 | "/* SIMD store ops (unaligned) */\n" |
13067 | "/// Stores the upper and lower 128 bits of a 256-bit floating-point\n" |
13068 | "/// vector of [8 x float] into two different unaligned memory locations.\n" |
13069 | "///\n" |
13070 | "/// \\headerfile <x86intrin.h>\n" |
13071 | "///\n" |
13072 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n" |
13073 | "/// store instructions.\n" |
13074 | "///\n" |
13075 | "/// \\param __addr_hi\n" |
13076 | "/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n" |
13077 | "/// copied to this memory location. The address of this memory location does\n" |
13078 | "/// not have to be aligned.\n" |
13079 | "/// \\param __addr_lo\n" |
13080 | "/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n" |
13081 | "/// copied to this memory location. The address of this memory location does\n" |
13082 | "/// not have to be aligned.\n" |
13083 | "/// \\param __a\n" |
13084 | "/// A 256-bit floating-point vector of [8 x float].\n" |
13085 | "static __inline void __DEFAULT_FN_ATTRS\n" |
13086 | "_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)\n" |
13087 | "{\n" |
13088 | " __m128 __v128;\n" |
13089 | "\n" |
13090 | " __v128 = _mm256_castps256_ps128(__a);\n" |
13091 | " _mm_storeu_ps(__addr_lo, __v128);\n" |
13092 | " __v128 = _mm256_extractf128_ps(__a, 1);\n" |
13093 | " _mm_storeu_ps(__addr_hi, __v128);\n" |
13094 | "}\n" |
13095 | "\n" |
13096 | "/// Stores the upper and lower 128 bits of a 256-bit floating-point\n" |
13097 | "/// vector of [4 x double] into two different unaligned memory locations.\n" |
13098 | "///\n" |
13099 | "/// \\headerfile <x86intrin.h>\n" |
13100 | "///\n" |
13101 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n" |
13102 | "/// store instructions.\n" |
13103 | "///\n" |
13104 | "/// \\param __addr_hi\n" |
13105 | "/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n" |
13106 | "/// copied to this memory location. The address of this memory location does\n" |
13107 | "/// not have to be aligned.\n" |
13108 | "/// \\param __addr_lo\n" |
13109 | "/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n" |
13110 | "/// copied to this memory location. The address of this memory location does\n" |
13111 | "/// not have to be aligned.\n" |
13112 | "/// \\param __a\n" |
13113 | "/// A 256-bit floating-point vector of [4 x double].\n" |
13114 | "static __inline void __DEFAULT_FN_ATTRS\n" |
13115 | "_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)\n" |
13116 | "{\n" |
13117 | " __m128d __v128;\n" |
13118 | "\n" |
13119 | " __v128 = _mm256_castpd256_pd128(__a);\n" |
13120 | " _mm_storeu_pd(__addr_lo, __v128);\n" |
13121 | " __v128 = _mm256_extractf128_pd(__a, 1);\n" |
13122 | " _mm_storeu_pd(__addr_hi, __v128);\n" |
13123 | "}\n" |
13124 | "\n" |
13125 | "/// Stores the upper and lower 128 bits of a 256-bit integer vector into\n" |
13126 | "/// two different unaligned memory locations.\n" |
13127 | "///\n" |
13128 | "/// \\headerfile <x86intrin.h>\n" |
13129 | "///\n" |
13130 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n" |
13131 | "/// store instructions.\n" |
13132 | "///\n" |
13133 | "/// \\param __addr_hi\n" |
13134 | "/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n" |
13135 | "/// copied to this memory location. The address of this memory location does\n" |
13136 | "/// not have to be aligned.\n" |
13137 | "/// \\param __addr_lo\n" |
13138 | "/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n" |
13139 | "/// copied to this memory location. The address of this memory location does\n" |
13140 | "/// not have to be aligned.\n" |
13141 | "/// \\param __a\n" |
13142 | "/// A 256-bit integer vector.\n" |
13143 | "static __inline void __DEFAULT_FN_ATTRS\n" |
13144 | "_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)\n" |
13145 | "{\n" |
13146 | " __m128i __v128;\n" |
13147 | "\n" |
13148 | " __v128 = _mm256_castsi256_si128(__a);\n" |
13149 | " _mm_storeu_si128(__addr_lo, __v128);\n" |
13150 | " __v128 = _mm256_extractf128_si256(__a, 1);\n" |
13151 | " _mm_storeu_si128(__addr_hi, __v128);\n" |
13152 | "}\n" |
13153 | "\n" |
13154 | "/// Constructs a 256-bit floating-point vector of [8 x float] by\n" |
13155 | "/// concatenating two 128-bit floating-point vectors of [4 x float].\n" |
13156 | "///\n" |
13157 | "/// \\headerfile <x86intrin.h>\n" |
13158 | "///\n" |
13159 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
13160 | "///\n" |
13161 | "/// \\param __hi\n" |
13162 | "/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n" |
13163 | "/// 128 bits of the result.\n" |
13164 | "/// \\param __lo\n" |
13165 | "/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n" |
13166 | "/// 128 bits of the result.\n" |
13167 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n" |
13168 | "/// concatenated result.\n" |
13169 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
13170 | "_mm256_set_m128 (__m128 __hi, __m128 __lo)\n" |
13171 | "{\n" |
13172 | " return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);\n" |
13173 | "}\n" |
13174 | "\n" |
13175 | "/// Constructs a 256-bit floating-point vector of [4 x double] by\n" |
13176 | "/// concatenating two 128-bit floating-point vectors of [2 x double].\n" |
13177 | "///\n" |
13178 | "/// \\headerfile <x86intrin.h>\n" |
13179 | "///\n" |
13180 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
13181 | "///\n" |
13182 | "/// \\param __hi\n" |
13183 | "/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n" |
13184 | "/// 128 bits of the result.\n" |
13185 | "/// \\param __lo\n" |
13186 | "/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n" |
13187 | "/// 128 bits of the result.\n" |
13188 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n" |
13189 | "/// concatenated result.\n" |
13190 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
13191 | "_mm256_set_m128d (__m128d __hi, __m128d __lo)\n" |
13192 | "{\n" |
13193 | " return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);\n" |
13194 | "}\n" |
13195 | "\n" |
13196 | "/// Constructs a 256-bit integer vector by concatenating two 128-bit\n" |
13197 | "/// integer vectors.\n" |
13198 | "///\n" |
13199 | "/// \\headerfile <x86intrin.h>\n" |
13200 | "///\n" |
13201 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
13202 | "///\n" |
13203 | "/// \\param __hi\n" |
13204 | "/// A 128-bit integer vector to be copied to the upper 128 bits of the\n" |
13205 | "/// result.\n" |
13206 | "/// \\param __lo\n" |
13207 | "/// A 128-bit integer vector to be copied to the lower 128 bits of the\n" |
13208 | "/// result.\n" |
13209 | "/// \\returns A 256-bit integer vector containing the concatenated result.\n" |
13210 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
13211 | "_mm256_set_m128i (__m128i __hi, __m128i __lo)\n" |
13212 | "{\n" |
13213 | " return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);\n" |
13214 | "}\n" |
13215 | "\n" |
13216 | "/// Constructs a 256-bit floating-point vector of [8 x float] by\n" |
13217 | "/// concatenating two 128-bit floating-point vectors of [4 x float]. This is\n" |
13218 | "/// similar to _mm256_set_m128, but the order of the input parameters is\n" |
13219 | "/// swapped.\n" |
13220 | "///\n" |
13221 | "/// \\headerfile <x86intrin.h>\n" |
13222 | "///\n" |
13223 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
13224 | "///\n" |
13225 | "/// \\param __lo\n" |
13226 | "/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n" |
13227 | "/// 128 bits of the result.\n" |
13228 | "/// \\param __hi\n" |
13229 | "/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n" |
13230 | "/// 128 bits of the result.\n" |
13231 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n" |
13232 | "/// concatenated result.\n" |
13233 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
13234 | "_mm256_setr_m128 (__m128 __lo, __m128 __hi)\n" |
13235 | "{\n" |
13236 | " return _mm256_set_m128(__hi, __lo);\n" |
13237 | "}\n" |
13238 | "\n" |
13239 | "/// Constructs a 256-bit floating-point vector of [4 x double] by\n" |
13240 | "/// concatenating two 128-bit floating-point vectors of [2 x double]. This is\n" |
13241 | "/// similar to _mm256_set_m128d, but the order of the input parameters is\n" |
13242 | "/// swapped.\n" |
13243 | "///\n" |
13244 | "/// \\headerfile <x86intrin.h>\n" |
13245 | "///\n" |
13246 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
13247 | "///\n" |
13248 | "/// \\param __lo\n" |
13249 | "/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n" |
13250 | "/// 128 bits of the result.\n" |
13251 | "/// \\param __hi\n" |
13252 | "/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n" |
13253 | "/// 128 bits of the result.\n" |
13254 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n" |
13255 | "/// concatenated result.\n" |
13256 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
13257 | "_mm256_setr_m128d (__m128d __lo, __m128d __hi)\n" |
13258 | "{\n" |
13259 | " return (__m256d)_mm256_set_m128d(__hi, __lo);\n" |
13260 | "}\n" |
13261 | "\n" |
13262 | "/// Constructs a 256-bit integer vector by concatenating two 128-bit\n" |
13263 | "/// integer vectors. This is similar to _mm256_set_m128i, but the order of\n" |
13264 | "/// the input parameters is swapped.\n" |
13265 | "///\n" |
13266 | "/// \\headerfile <x86intrin.h>\n" |
13267 | "///\n" |
13268 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
13269 | "///\n" |
13270 | "/// \\param __lo\n" |
13271 | "/// A 128-bit integer vector to be copied to the lower 128 bits of the\n" |
13272 | "/// result.\n" |
13273 | "/// \\param __hi\n" |
13274 | "/// A 128-bit integer vector to be copied to the upper 128 bits of the\n" |
13275 | "/// result.\n" |
13276 | "/// \\returns A 256-bit integer vector containing the concatenated result.\n" |
13277 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
13278 | "_mm256_setr_m128i (__m128i __lo, __m128i __hi)\n" |
13279 | "{\n" |
13280 | " return (__m256i)_mm256_set_m128i(__hi, __lo);\n" |
13281 | "}\n" |
13282 | "\n" |
13283 | "#undef __DEFAULT_FN_ATTRS\n" |
13284 | "#undef __DEFAULT_FN_ATTRS128\n" |
13285 | "\n" |
13286 | "#endif /* __AVXINTRIN_H */\n" |
13287 | "" } , |
13288 | { "/builtins/bmi2intrin.h" , "/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===\n" |
13289 | " *\n" |
13290 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
13291 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
13292 | " * in the Software without restriction, including without limitation the rights\n" |
13293 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
13294 | " * copies of the Software, and to permit persons to whom the Software is\n" |
13295 | " * furnished to do so, subject to the following conditions:\n" |
13296 | " *\n" |
13297 | " * The above copyright notice and this permission notice shall be included in\n" |
13298 | " * all copies or substantial portions of the Software.\n" |
13299 | " *\n" |
13300 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
13301 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
13302 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
13303 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
13304 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
13305 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
13306 | " * THE SOFTWARE.\n" |
13307 | " *\n" |
13308 | " *===-----------------------------------------------------------------------===\n" |
13309 | " */\n" |
13310 | "\n" |
13311 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
13312 | "#error \"Never use <bmi2intrin.h> directly; include <x86intrin.h> instead.\"\n" |
13313 | "#endif\n" |
13314 | "\n" |
13315 | "#ifndef __BMI2INTRIN_H\n" |
13316 | "#define __BMI2INTRIN_H\n" |
13317 | "\n" |
13318 | "/* Define the default attributes for the functions in this file. */\n" |
13319 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi2\")))\n" |
13320 | "\n" |
13321 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13322 | "_bzhi_u32(unsigned int __X, unsigned int __Y)\n" |
13323 | "{\n" |
13324 | " return __builtin_ia32_bzhi_si(__X, __Y);\n" |
13325 | "}\n" |
13326 | "\n" |
13327 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13328 | "_pdep_u32(unsigned int __X, unsigned int __Y)\n" |
13329 | "{\n" |
13330 | " return __builtin_ia32_pdep_si(__X, __Y);\n" |
13331 | "}\n" |
13332 | "\n" |
13333 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13334 | "_pext_u32(unsigned int __X, unsigned int __Y)\n" |
13335 | "{\n" |
13336 | " return __builtin_ia32_pext_si(__X, __Y);\n" |
13337 | "}\n" |
13338 | "\n" |
13339 | "#ifdef __x86_64__\n" |
13340 | "\n" |
13341 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13342 | "_bzhi_u64(unsigned long long __X, unsigned long long __Y)\n" |
13343 | "{\n" |
13344 | " return __builtin_ia32_bzhi_di(__X, __Y);\n" |
13345 | "}\n" |
13346 | "\n" |
13347 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13348 | "_pdep_u64(unsigned long long __X, unsigned long long __Y)\n" |
13349 | "{\n" |
13350 | " return __builtin_ia32_pdep_di(__X, __Y);\n" |
13351 | "}\n" |
13352 | "\n" |
13353 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13354 | "_pext_u64(unsigned long long __X, unsigned long long __Y)\n" |
13355 | "{\n" |
13356 | " return __builtin_ia32_pext_di(__X, __Y);\n" |
13357 | "}\n" |
13358 | "\n" |
13359 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13360 | "_mulx_u64 (unsigned long long __X, unsigned long long __Y,\n" |
13361 | " unsigned long long *__P)\n" |
13362 | "{\n" |
13363 | " unsigned __int128 __res = (unsigned __int128) __X * __Y;\n" |
13364 | " *__P = (unsigned long long) (__res >> 64);\n" |
13365 | " return (unsigned long long) __res;\n" |
13366 | "}\n" |
13367 | "\n" |
13368 | "#else /* !__x86_64__ */\n" |
13369 | "\n" |
13370 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13371 | "_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)\n" |
13372 | "{\n" |
13373 | " unsigned long long __res = (unsigned long long) __X * __Y;\n" |
13374 | " *__P = (unsigned int) (__res >> 32);\n" |
13375 | " return (unsigned int) __res;\n" |
13376 | "}\n" |
13377 | "\n" |
13378 | "#endif /* !__x86_64__ */\n" |
13379 | "\n" |
13380 | "#undef __DEFAULT_FN_ATTRS\n" |
13381 | "\n" |
13382 | "#endif /* __BMI2INTRIN_H */\n" |
13383 | "" } , |
13384 | { "/builtins/bmiintrin.h" , "/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===\n" |
13385 | " *\n" |
13386 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
13387 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
13388 | " * in the Software without restriction, including without limitation the rights\n" |
13389 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
13390 | " * copies of the Software, and to permit persons to whom the Software is\n" |
13391 | " * furnished to do so, subject to the following conditions:\n" |
13392 | " *\n" |
13393 | " * The above copyright notice and this permission notice shall be included in\n" |
13394 | " * all copies or substantial portions of the Software.\n" |
13395 | " *\n" |
13396 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
13397 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
13398 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
13399 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
13400 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
13401 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
13402 | " * THE SOFTWARE.\n" |
13403 | " *\n" |
13404 | " *===-----------------------------------------------------------------------===\n" |
13405 | " */\n" |
13406 | "\n" |
13407 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
13408 | "#error \"Never use <bmiintrin.h> directly; include <x86intrin.h> instead.\"\n" |
13409 | "#endif\n" |
13410 | "\n" |
13411 | "#ifndef __BMIINTRIN_H\n" |
13412 | "#define __BMIINTRIN_H\n" |
13413 | "\n" |
13414 | "#define _tzcnt_u16(a) (__tzcnt_u16((a)))\n" |
13415 | "\n" |
13416 | "#define _andn_u32(a, b) (__andn_u32((a), (b)))\n" |
13417 | "\n" |
13418 | "/* _bextr_u32 != __bextr_u32 */\n" |
13419 | "#define _blsi_u32(a) (__blsi_u32((a)))\n" |
13420 | "\n" |
13421 | "#define _blsmsk_u32(a) (__blsmsk_u32((a)))\n" |
13422 | "\n" |
13423 | "#define _blsr_u32(a) (__blsr_u32((a)))\n" |
13424 | "\n" |
13425 | "#define _tzcnt_u32(a) (__tzcnt_u32((a)))\n" |
13426 | "\n" |
13427 | "/* Define the default attributes for the functions in this file. */\n" |
13428 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi\")))\n" |
13429 | "\n" |
13430 | "/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT\n" |
13431 | " instruction behaves as BSF on non-BMI targets, there is code that expects\n" |
13432 | " to use it as a potentially faster version of BSF. */\n" |
13433 | "#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n" |
13434 | "\n" |
13435 | "/// Counts the number of trailing zero bits in the operand.\n" |
13436 | "///\n" |
13437 | "/// \\headerfile <x86intrin.h>\n" |
13438 | "///\n" |
13439 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
13440 | "///\n" |
13441 | "/// \\param __X\n" |
13442 | "/// An unsigned 16-bit integer whose trailing zeros are to be counted.\n" |
13443 | "/// \\returns An unsigned 16-bit integer containing the number of trailing zero\n" |
13444 | "/// bits in the operand.\n" |
13445 | "static __inline__ unsigned short __RELAXED_FN_ATTRS\n" |
13446 | "__tzcnt_u16(unsigned short __X)\n" |
13447 | "{\n" |
13448 | " return __builtin_ia32_tzcnt_u16(__X);\n" |
13449 | "}\n" |
13450 | "\n" |
13451 | "/// Performs a bitwise AND of the second operand with the one's\n" |
13452 | "/// complement of the first operand.\n" |
13453 | "///\n" |
13454 | "/// \\headerfile <x86intrin.h>\n" |
13455 | "///\n" |
13456 | "/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n" |
13457 | "///\n" |
13458 | "/// \\param __X\n" |
13459 | "/// An unsigned integer containing one of the operands.\n" |
13460 | "/// \\param __Y\n" |
13461 | "/// An unsigned integer containing one of the operands.\n" |
13462 | "/// \\returns An unsigned integer containing the bitwise AND of the second\n" |
13463 | "/// operand with the one's complement of the first operand.\n" |
13464 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13465 | "__andn_u32(unsigned int __X, unsigned int __Y)\n" |
13466 | "{\n" |
13467 | " return ~__X & __Y;\n" |
13468 | "}\n" |
13469 | "\n" |
13470 | "/* AMD-specified, double-leading-underscore version of BEXTR */\n" |
13471 | "/// Extracts the specified bits from the first operand and returns them\n" |
13472 | "/// in the least significant bits of the result.\n" |
13473 | "///\n" |
13474 | "/// \\headerfile <x86intrin.h>\n" |
13475 | "///\n" |
13476 | "/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n" |
13477 | "///\n" |
13478 | "/// \\param __X\n" |
13479 | "/// An unsigned integer whose bits are to be extracted.\n" |
13480 | "/// \\param __Y\n" |
13481 | "/// An unsigned integer used to specify which bits are extracted. Bits [7:0]\n" |
13482 | "/// specify the index of the least significant bit. Bits [15:8] specify the\n" |
13483 | "/// number of bits to be extracted.\n" |
13484 | "/// \\returns An unsigned integer whose least significant bits contain the\n" |
13485 | "/// extracted bits.\n" |
13486 | "/// \\see _bextr_u32\n" |
13487 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13488 | "__bextr_u32(unsigned int __X, unsigned int __Y)\n" |
13489 | "{\n" |
13490 | " return __builtin_ia32_bextr_u32(__X, __Y);\n" |
13491 | "}\n" |
13492 | "\n" |
13493 | "/* Intel-specified, single-leading-underscore version of BEXTR */\n" |
13494 | "/// Extracts the specified bits from the first operand and returns them\n" |
13495 | "/// in the least significant bits of the result.\n" |
13496 | "///\n" |
13497 | "/// \\headerfile <x86intrin.h>\n" |
13498 | "///\n" |
13499 | "/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n" |
13500 | "///\n" |
13501 | "/// \\param __X\n" |
13502 | "/// An unsigned integer whose bits are to be extracted.\n" |
13503 | "/// \\param __Y\n" |
13504 | "/// An unsigned integer used to specify the index of the least significant\n" |
13505 | "/// bit for the bits to be extracted. Bits [7:0] specify the index.\n" |
13506 | "/// \\param __Z\n" |
13507 | "/// An unsigned integer used to specify the number of bits to be extracted.\n" |
13508 | "/// Bits [7:0] specify the number of bits.\n" |
13509 | "/// \\returns An unsigned integer whose least significant bits contain the\n" |
13510 | "/// extracted bits.\n" |
13511 | "/// \\see __bextr_u32\n" |
13512 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13513 | "_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)\n" |
13514 | "{\n" |
13515 | " return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n" |
13516 | "}\n" |
13517 | "\n" |
13518 | "/// Clears all bits in the source except for the least significant bit\n" |
13519 | "/// containing a value of 1 and returns the result.\n" |
13520 | "///\n" |
13521 | "/// \\headerfile <x86intrin.h>\n" |
13522 | "///\n" |
13523 | "/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n" |
13524 | "///\n" |
13525 | "/// \\param __X\n" |
13526 | "/// An unsigned integer whose bits are to be cleared.\n" |
13527 | "/// \\returns An unsigned integer containing the result of clearing the bits from\n" |
13528 | "/// the source operand.\n" |
13529 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13530 | "__blsi_u32(unsigned int __X)\n" |
13531 | "{\n" |
13532 | " return __X & -__X;\n" |
13533 | "}\n" |
13534 | "\n" |
13535 | "/// Creates a mask whose bits are set to 1, using bit 0 up to and\n" |
13536 | "/// including the least significant bit that is set to 1 in the source\n" |
13537 | "/// operand and returns the result.\n" |
13538 | "///\n" |
13539 | "/// \\headerfile <x86intrin.h>\n" |
13540 | "///\n" |
13541 | "/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n" |
13542 | "///\n" |
13543 | "/// \\param __X\n" |
13544 | "/// An unsigned integer used to create the mask.\n" |
13545 | "/// \\returns An unsigned integer containing the newly created mask.\n" |
13546 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13547 | "__blsmsk_u32(unsigned int __X)\n" |
13548 | "{\n" |
13549 | " return __X ^ (__X - 1);\n" |
13550 | "}\n" |
13551 | "\n" |
13552 | "/// Clears the least significant bit that is set to 1 in the source\n" |
13553 | "/// operand and returns the result.\n" |
13554 | "///\n" |
13555 | "/// \\headerfile <x86intrin.h>\n" |
13556 | "///\n" |
13557 | "/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n" |
13558 | "///\n" |
13559 | "/// \\param __X\n" |
13560 | "/// An unsigned integer containing the operand to be cleared.\n" |
13561 | "/// \\returns An unsigned integer containing the result of clearing the source\n" |
13562 | "/// operand.\n" |
13563 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
13564 | "__blsr_u32(unsigned int __X)\n" |
13565 | "{\n" |
13566 | " return __X & (__X - 1);\n" |
13567 | "}\n" |
13568 | "\n" |
13569 | "/// Counts the number of trailing zero bits in the operand.\n" |
13570 | "///\n" |
13571 | "/// \\headerfile <x86intrin.h>\n" |
13572 | "///\n" |
13573 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
13574 | "///\n" |
13575 | "/// \\param __X\n" |
13576 | "/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n" |
13577 | "/// \\returns An unsigned 32-bit integer containing the number of trailing zero\n" |
13578 | "/// bits in the operand.\n" |
13579 | "static __inline__ unsigned int __RELAXED_FN_ATTRS\n" |
13580 | "__tzcnt_u32(unsigned int __X)\n" |
13581 | "{\n" |
13582 | " return __builtin_ia32_tzcnt_u32(__X);\n" |
13583 | "}\n" |
13584 | "\n" |
13585 | "/// Counts the number of trailing zero bits in the operand.\n" |
13586 | "///\n" |
13587 | "/// \\headerfile <x86intrin.h>\n" |
13588 | "///\n" |
13589 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
13590 | "///\n" |
13591 | "/// \\param __X\n" |
13592 | "/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n" |
13593 | "/// \\returns An 32-bit integer containing the number of trailing zero bits in\n" |
13594 | "/// the operand.\n" |
13595 | "static __inline__ int __RELAXED_FN_ATTRS\n" |
13596 | "_mm_tzcnt_32(unsigned int __X)\n" |
13597 | "{\n" |
13598 | " return __builtin_ia32_tzcnt_u32(__X);\n" |
13599 | "}\n" |
13600 | "\n" |
13601 | "#ifdef __x86_64__\n" |
13602 | "\n" |
13603 | "#define _andn_u64(a, b) (__andn_u64((a), (b)))\n" |
13604 | "\n" |
13605 | "/* _bextr_u64 != __bextr_u64 */\n" |
13606 | "#define _blsi_u64(a) (__blsi_u64((a)))\n" |
13607 | "\n" |
13608 | "#define _blsmsk_u64(a) (__blsmsk_u64((a)))\n" |
13609 | "\n" |
13610 | "#define _blsr_u64(a) (__blsr_u64((a)))\n" |
13611 | "\n" |
13612 | "#define _tzcnt_u64(a) (__tzcnt_u64((a)))\n" |
13613 | "\n" |
13614 | "/// Performs a bitwise AND of the second operand with the one's\n" |
13615 | "/// complement of the first operand.\n" |
13616 | "///\n" |
13617 | "/// \\headerfile <x86intrin.h>\n" |
13618 | "///\n" |
13619 | "/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n" |
13620 | "///\n" |
13621 | "/// \\param __X\n" |
13622 | "/// An unsigned 64-bit integer containing one of the operands.\n" |
13623 | "/// \\param __Y\n" |
13624 | "/// An unsigned 64-bit integer containing one of the operands.\n" |
13625 | "/// \\returns An unsigned 64-bit integer containing the bitwise AND of the second\n" |
13626 | "/// operand with the one's complement of the first operand.\n" |
13627 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13628 | "__andn_u64 (unsigned long long __X, unsigned long long __Y)\n" |
13629 | "{\n" |
13630 | " return ~__X & __Y;\n" |
13631 | "}\n" |
13632 | "\n" |
13633 | "/* AMD-specified, double-leading-underscore version of BEXTR */\n" |
13634 | "/// Extracts the specified bits from the first operand and returns them\n" |
13635 | "/// in the least significant bits of the result.\n" |
13636 | "///\n" |
13637 | "/// \\headerfile <x86intrin.h>\n" |
13638 | "///\n" |
13639 | "/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n" |
13640 | "///\n" |
13641 | "/// \\param __X\n" |
13642 | "/// An unsigned 64-bit integer whose bits are to be extracted.\n" |
13643 | "/// \\param __Y\n" |
13644 | "/// An unsigned 64-bit integer used to specify which bits are extracted. Bits\n" |
13645 | "/// [7:0] specify the index of the least significant bit. Bits [15:8] specify\n" |
13646 | "/// the number of bits to be extracted.\n" |
13647 | "/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n" |
13648 | "/// extracted bits.\n" |
13649 | "/// \\see _bextr_u64\n" |
13650 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13651 | "__bextr_u64(unsigned long long __X, unsigned long long __Y)\n" |
13652 | "{\n" |
13653 | " return __builtin_ia32_bextr_u64(__X, __Y);\n" |
13654 | "}\n" |
13655 | "\n" |
13656 | "/* Intel-specified, single-leading-underscore version of BEXTR */\n" |
13657 | "/// Extracts the specified bits from the first operand and returns them\n" |
13658 | "/// in the least significant bits of the result.\n" |
13659 | "///\n" |
13660 | "/// \\headerfile <x86intrin.h>\n" |
13661 | "///\n" |
13662 | "/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n" |
13663 | "///\n" |
13664 | "/// \\param __X\n" |
13665 | "/// An unsigned 64-bit integer whose bits are to be extracted.\n" |
13666 | "/// \\param __Y\n" |
13667 | "/// An unsigned integer used to specify the index of the least significant\n" |
13668 | "/// bit for the bits to be extracted. Bits [7:0] specify the index.\n" |
13669 | "/// \\param __Z\n" |
13670 | "/// An unsigned integer used to specify the number of bits to be extracted.\n" |
13671 | "/// Bits [7:0] specify the number of bits.\n" |
13672 | "/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n" |
13673 | "/// extracted bits.\n" |
13674 | "/// \\see __bextr_u64\n" |
13675 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13676 | "_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)\n" |
13677 | "{\n" |
13678 | " return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n" |
13679 | "}\n" |
13680 | "\n" |
13681 | "/// Clears all bits in the source except for the least significant bit\n" |
13682 | "/// containing a value of 1 and returns the result.\n" |
13683 | "///\n" |
13684 | "/// \\headerfile <x86intrin.h>\n" |
13685 | "///\n" |
13686 | "/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n" |
13687 | "///\n" |
13688 | "/// \\param __X\n" |
13689 | "/// An unsigned 64-bit integer whose bits are to be cleared.\n" |
13690 | "/// \\returns An unsigned 64-bit integer containing the result of clearing the\n" |
13691 | "/// bits from the source operand.\n" |
13692 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13693 | "__blsi_u64(unsigned long long __X)\n" |
13694 | "{\n" |
13695 | " return __X & -__X;\n" |
13696 | "}\n" |
13697 | "\n" |
13698 | "/// Creates a mask whose bits are set to 1, using bit 0 up to and\n" |
13699 | "/// including the least significant bit that is set to 1 in the source\n" |
13700 | "/// operand and returns the result.\n" |
13701 | "///\n" |
13702 | "/// \\headerfile <x86intrin.h>\n" |
13703 | "///\n" |
13704 | "/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n" |
13705 | "///\n" |
13706 | "/// \\param __X\n" |
13707 | "/// An unsigned 64-bit integer used to create the mask.\n" |
13708 | "/// \\returns An unsigned 64-bit integer containing the newly created mask.\n" |
13709 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13710 | "__blsmsk_u64(unsigned long long __X)\n" |
13711 | "{\n" |
13712 | " return __X ^ (__X - 1);\n" |
13713 | "}\n" |
13714 | "\n" |
13715 | "/// Clears the least significant bit that is set to 1 in the source\n" |
13716 | "/// operand and returns the result.\n" |
13717 | "///\n" |
13718 | "/// \\headerfile <x86intrin.h>\n" |
13719 | "///\n" |
13720 | "/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n" |
13721 | "///\n" |
13722 | "/// \\param __X\n" |
13723 | "/// An unsigned 64-bit integer containing the operand to be cleared.\n" |
13724 | "/// \\returns An unsigned 64-bit integer containing the result of clearing the\n" |
13725 | "/// source operand.\n" |
13726 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
13727 | "__blsr_u64(unsigned long long __X)\n" |
13728 | "{\n" |
13729 | " return __X & (__X - 1);\n" |
13730 | "}\n" |
13731 | "\n" |
13732 | "/// Counts the number of trailing zero bits in the operand.\n" |
13733 | "///\n" |
13734 | "/// \\headerfile <x86intrin.h>\n" |
13735 | "///\n" |
13736 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
13737 | "///\n" |
13738 | "/// \\param __X\n" |
13739 | "/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n" |
13740 | "/// \\returns An unsigned 64-bit integer containing the number of trailing zero\n" |
13741 | "/// bits in the operand.\n" |
13742 | "static __inline__ unsigned long long __RELAXED_FN_ATTRS\n" |
13743 | "__tzcnt_u64(unsigned long long __X)\n" |
13744 | "{\n" |
13745 | " return __builtin_ia32_tzcnt_u64(__X);\n" |
13746 | "}\n" |
13747 | "\n" |
13748 | "/// Counts the number of trailing zero bits in the operand.\n" |
13749 | "///\n" |
13750 | "/// \\headerfile <x86intrin.h>\n" |
13751 | "///\n" |
13752 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
13753 | "///\n" |
13754 | "/// \\param __X\n" |
13755 | "/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n" |
13756 | "/// \\returns An 64-bit integer containing the number of trailing zero bits in\n" |
13757 | "/// the operand.\n" |
13758 | "static __inline__ long long __RELAXED_FN_ATTRS\n" |
13759 | "_mm_tzcnt_64(unsigned long long __X)\n" |
13760 | "{\n" |
13761 | " return __builtin_ia32_tzcnt_u64(__X);\n" |
13762 | "}\n" |
13763 | "\n" |
13764 | "#endif /* __x86_64__ */\n" |
13765 | "\n" |
13766 | "#undef __DEFAULT_FN_ATTRS\n" |
13767 | "#undef __RELAXED_FN_ATTRS\n" |
13768 | "\n" |
13769 | "#endif /* __BMIINTRIN_H */\n" |
13770 | "" } , |
13771 | { "/builtins/cetintrin.h" , "/*===---- cetintrin.h - CET intrinsic --------------------------------------===\n" |
13772 | " *\n" |
13773 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
13774 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
13775 | " * in the Software without restriction, including without limitation the rights\n" |
13776 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
13777 | " * copies of the Software, and to permit persons to whom the Software is\n" |
13778 | " * furnished to do so, subject to the following conditions:\n" |
13779 | " *\n" |
13780 | " * The above copyright notice and this permission notice shall be included in\n" |
13781 | " * all copies or substantial portions of the Software.\n" |
13782 | " *\n" |
13783 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
13784 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
13785 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
13786 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
13787 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
13788 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
13789 | " * THE SOFTWARE.\n" |
13790 | " *\n" |
13791 | " *===-----------------------------------------------------------------------===\n" |
13792 | " */\n" |
13793 | "\n" |
13794 | "#ifndef __IMMINTRIN_H\n" |
13795 | "#error \"Never use <cetintrin.h> directly; include <immintrin.h> instead.\"\n" |
13796 | "#endif\n" |
13797 | "\n" |
13798 | "#ifndef __CETINTRIN_H\n" |
13799 | "#define __CETINTRIN_H\n" |
13800 | "\n" |
13801 | "/* Define the default attributes for the functions in this file. */\n" |
13802 | "#define __DEFAULT_FN_ATTRS \\\n" |
13803 | " __attribute__((__always_inline__, __nodebug__, __target__(\"shstk\")))\n" |
13804 | "\n" |
13805 | "static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {\n" |
13806 | " __builtin_ia32_incsspd(__a);\n" |
13807 | "}\n" |
13808 | "\n" |
13809 | "#ifdef __x86_64__\n" |
13810 | "static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {\n" |
13811 | " __builtin_ia32_incsspq(__a);\n" |
13812 | "}\n" |
13813 | "#endif /* __x86_64__ */\n" |
13814 | "\n" |
13815 | "#ifdef __x86_64__\n" |
13816 | "static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n" |
13817 | " __builtin_ia32_incsspq(__a);\n" |
13818 | "}\n" |
13819 | "#else /* __x86_64__ */\n" |
13820 | "static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n" |
13821 | " __builtin_ia32_incsspd((int)__a);\n" |
13822 | "}\n" |
13823 | "#endif /* __x86_64__ */\n" |
13824 | "\n" |
13825 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {\n" |
13826 | " return __builtin_ia32_rdsspd(__a);\n" |
13827 | "}\n" |
13828 | "\n" |
13829 | "#ifdef __x86_64__\n" |
13830 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {\n" |
13831 | " return __builtin_ia32_rdsspq(__a);\n" |
13832 | "}\n" |
13833 | "#endif /* __x86_64__ */\n" |
13834 | "\n" |
13835 | "#ifdef __x86_64__\n" |
13836 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) {\n" |
13837 | " return __builtin_ia32_rdsspq(0);\n" |
13838 | "}\n" |
13839 | "#else /* __x86_64__ */\n" |
13840 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) {\n" |
13841 | " return __builtin_ia32_rdsspd(0);\n" |
13842 | "}\n" |
13843 | "#endif /* __x86_64__ */\n" |
13844 | "\n" |
13845 | "static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {\n" |
13846 | " __builtin_ia32_saveprevssp();\n" |
13847 | "}\n" |
13848 | "\n" |
13849 | "static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {\n" |
13850 | " __builtin_ia32_rstorssp(__p);\n" |
13851 | "}\n" |
13852 | "\n" |
13853 | "static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {\n" |
13854 | " __builtin_ia32_wrssd(__a, __p);\n" |
13855 | "}\n" |
13856 | "\n" |
13857 | "#ifdef __x86_64__\n" |
13858 | "static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {\n" |
13859 | " __builtin_ia32_wrssq(__a, __p);\n" |
13860 | "}\n" |
13861 | "#endif /* __x86_64__ */\n" |
13862 | "\n" |
13863 | "static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {\n" |
13864 | " __builtin_ia32_wrussd(__a, __p);\n" |
13865 | "}\n" |
13866 | "\n" |
13867 | "#ifdef __x86_64__\n" |
13868 | "static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {\n" |
13869 | " __builtin_ia32_wrussq(__a, __p);\n" |
13870 | "}\n" |
13871 | "#endif /* __x86_64__ */\n" |
13872 | "\n" |
13873 | "static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {\n" |
13874 | " __builtin_ia32_setssbsy();\n" |
13875 | "}\n" |
13876 | "\n" |
13877 | "static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {\n" |
13878 | " __builtin_ia32_clrssbsy(__p);\n" |
13879 | "}\n" |
13880 | "\n" |
13881 | "#undef __DEFAULT_FN_ATTRS\n" |
13882 | "\n" |
13883 | "#endif /* __CETINTRIN_H */\n" |
13884 | "" } , |
13885 | { "/builtins/cldemoteintrin.h" , "/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------===\n" |
13886 | " *\n" |
13887 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
13888 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
13889 | " * in the Software without restriction, including without limitation the rights\n" |
13890 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
13891 | " * copies of the Software, and to permit persons to whom the Software is\n" |
13892 | " * furnished to do so, subject to the following conditions:\n" |
13893 | " *\n" |
13894 | " * The above copyright notice and this permission notice shall be included in\n" |
13895 | " * all copies or substantial portions of the Software.\n" |
13896 | " *\n" |
13897 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
13898 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
13899 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
13900 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
13901 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
13902 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
13903 | " * THE SOFTWARE.\n" |
13904 | " *\n" |
13905 | " *===-----------------------------------------------------------------------===\n" |
13906 | " */\n" |
13907 | "\n" |
13908 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
13909 | "#error \"Never use <cldemoteintrin.h> directly; include <x86intrin.h> instead.\"\n" |
13910 | "#endif\n" |
13911 | "\n" |
13912 | "#ifndef __CLDEMOTEINTRIN_H\n" |
13913 | "#define __CLDEMOTEINTRIN_H\n" |
13914 | "\n" |
13915 | "/* Define the default attributes for the functions in this file. */\n" |
13916 | "#define __DEFAULT_FN_ATTRS \\\n" |
13917 | " __attribute__((__always_inline__, __nodebug__, __target__(\"cldemote\")))\n" |
13918 | "\n" |
13919 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
13920 | "_cldemote(const void * __P) {\n" |
13921 | " __builtin_ia32_cldemote(__P);\n" |
13922 | "}\n" |
13923 | "\n" |
13924 | "#undef __DEFAULT_FN_ATTRS\n" |
13925 | "\n" |
13926 | "#endif\n" |
13927 | "" } , |
13928 | { "/builtins/clflushoptintrin.h" , "/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------===\n" |
13929 | " *\n" |
13930 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
13931 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
13932 | " * in the Software without restriction, including without limitation the rights\n" |
13933 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
13934 | " * copies of the Software, and to permit persons to whom the Software is\n" |
13935 | " * furnished to do so, subject to the following conditions:\n" |
13936 | " *\n" |
13937 | " * The above copyright notice and this permission notice shall be included in\n" |
13938 | " * all copies or substantial portions of the Software.\n" |
13939 | " *\n" |
13940 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
13941 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
13942 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
13943 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
13944 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
13945 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
13946 | " * THE SOFTWARE.\n" |
13947 | " *\n" |
13948 | " *===-----------------------------------------------------------------------===\n" |
13949 | " */\n" |
13950 | "\n" |
13951 | "#ifndef __IMMINTRIN_H\n" |
13952 | "#error \"Never use <clflushoptintrin.h> directly; include <immintrin.h> instead.\"\n" |
13953 | "#endif\n" |
13954 | "\n" |
13955 | "#ifndef __CLFLUSHOPTINTRIN_H\n" |
13956 | "#define __CLFLUSHOPTINTRIN_H\n" |
13957 | "\n" |
13958 | "/* Define the default attributes for the functions in this file. */\n" |
13959 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clflushopt\")))\n" |
13960 | "\n" |
13961 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
13962 | "_mm_clflushopt(void const * __m) {\n" |
13963 | " __builtin_ia32_clflushopt(__m);\n" |
13964 | "}\n" |
13965 | "\n" |
13966 | "#undef __DEFAULT_FN_ATTRS\n" |
13967 | "\n" |
13968 | "#endif\n" |
13969 | "" } , |
13970 | { "/builtins/clwbintrin.h" , "/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===\n" |
13971 | " *\n" |
13972 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
13973 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
13974 | " * in the Software without restriction, including without limitation the rights\n" |
13975 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
13976 | " * copies of the Software, and to permit persons to whom the Software is\n" |
13977 | " * furnished to do so, subject to the following conditions:\n" |
13978 | " *\n" |
13979 | " * The above copyright notice and this permission notice shall be included in\n" |
13980 | " * all copies or substantial portions of the Software.\n" |
13981 | " *\n" |
13982 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
13983 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
13984 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
13985 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
13986 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
13987 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
13988 | " * THE SOFTWARE.\n" |
13989 | " *\n" |
13990 | " *===-----------------------------------------------------------------------===\n" |
13991 | " */\n" |
13992 | "\n" |
13993 | "#ifndef __IMMINTRIN_H\n" |
13994 | "#error \"Never use <clwbintrin.h> directly; include <immintrin.h> instead.\"\n" |
13995 | "#endif\n" |
13996 | "\n" |
13997 | "#ifndef __CLWBINTRIN_H\n" |
13998 | "#define __CLWBINTRIN_H\n" |
13999 | "\n" |
14000 | "/* Define the default attributes for the functions in this file. */\n" |
14001 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clwb\")))\n" |
14002 | "\n" |
14003 | "/// Writes back to memory the cache line (if modified) that contains the\n" |
14004 | "/// linear address specified in \\a __p from any level of the cache hierarchy in\n" |
14005 | "/// the cache coherence domain\n" |
14006 | "///\n" |
14007 | "/// \\headerfile <immintrin.h>\n" |
14008 | "///\n" |
14009 | "/// This intrinsic corresponds to the <c> CLWB </c> instruction.\n" |
14010 | "///\n" |
14011 | "/// \\param __p\n" |
14012 | "/// A pointer to the memory location used to identify the cache line to be\n" |
14013 | "/// written back.\n" |
14014 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
14015 | "_mm_clwb(void const *__p) {\n" |
14016 | " __builtin_ia32_clwb(__p);\n" |
14017 | "}\n" |
14018 | "\n" |
14019 | "#undef __DEFAULT_FN_ATTRS\n" |
14020 | "\n" |
14021 | "#endif\n" |
14022 | "" } , |
14023 | { "/builtins/clzerointrin.h" , "/*===----------------------- clzerointrin.h - CLZERO ----------------------===\n" |
14024 | " *\n" |
14025 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
14026 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
14027 | " * in the Software without restriction, including without limitation the rights\n" |
14028 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
14029 | " * copies of the Software, and to permit persons to whom the Software is\n" |
14030 | " * furnished to do so, subject to the following conditions:\n" |
14031 | " *\n" |
14032 | " * The above copyright notice and this permission notice shall be included in\n" |
14033 | " * all copies or substantial portions of the Software.\n" |
14034 | " *\n" |
14035 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
14036 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
14037 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
14038 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
14039 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
14040 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
14041 | " * THE SOFTWARE.\n" |
14042 | " *\n" |
14043 | " *===-----------------------------------------------------------------------===\n" |
14044 | " */\n" |
14045 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
14046 | "#error \"Never use <clzerointrin.h> directly; include <x86intrin.h> instead.\"\n" |
14047 | "#endif\n" |
14048 | "\n" |
14049 | "#ifndef __CLZEROINTRIN_H\n" |
14050 | "#define __CLZEROINTRIN_H\n" |
14051 | "\n" |
14052 | "/* Define the default attributes for the functions in this file. */\n" |
14053 | "#define __DEFAULT_FN_ATTRS \\\n" |
14054 | " __attribute__((__always_inline__, __nodebug__, __target__(\"clzero\")))\n" |
14055 | "\n" |
14056 | "/// Loads the cache line address and zero's out the cacheline\n" |
14057 | "///\n" |
14058 | "/// \\headerfile <clzerointrin.h>\n" |
14059 | "///\n" |
14060 | "/// This intrinsic corresponds to the <c> CLZERO </c> instruction.\n" |
14061 | "///\n" |
14062 | "/// \\param __line\n" |
14063 | "/// A pointer to a cacheline which needs to be zeroed out.\n" |
14064 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
14065 | "_mm_clzero (void * __line)\n" |
14066 | "{\n" |
14067 | " __builtin_ia32_clzero ((void *)__line);\n" |
14068 | "}\n" |
14069 | "\n" |
14070 | "#undef __DEFAULT_FN_ATTRS\n" |
14071 | "\n" |
14072 | "#endif /* __CLZEROINTRIN_H */\n" |
14073 | "" } , |
14074 | { "/builtins/cpuid.h" , "/*===---- cpuid.h - X86 cpu model detection --------------------------------===\n" |
14075 | " *\n" |
14076 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
14077 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
14078 | " * in the Software without restriction, including without limitation the rights\n" |
14079 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
14080 | " * copies of the Software, and to permit persons to whom the Software is\n" |
14081 | " * furnished to do so, subject to the following conditions:\n" |
14082 | " *\n" |
14083 | " * The above copyright notice and this permission notice shall be included in\n" |
14084 | " * all copies or substantial portions of the Software.\n" |
14085 | " *\n" |
14086 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
14087 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
14088 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
14089 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
14090 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
14091 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
14092 | " * THE SOFTWARE.\n" |
14093 | " *\n" |
14094 | " *===-----------------------------------------------------------------------===\n" |
14095 | " */\n" |
14096 | "\n" |
14097 | "#if !(__x86_64__ || __i386__)\n" |
14098 | "#error this header is for x86 only\n" |
14099 | "#endif\n" |
14100 | "\n" |
14101 | "/* Responses identification request with %eax 0 */\n" |
14102 | "/* AMD: \"AuthenticAMD\" */\n" |
14103 | "#define signature_AMD_ebx 0x68747541\n" |
14104 | "#define signature_AMD_edx 0x69746e65\n" |
14105 | "#define signature_AMD_ecx 0x444d4163\n" |
14106 | "/* CENTAUR: \"CentaurHauls\" */\n" |
14107 | "#define signature_CENTAUR_ebx 0x746e6543\n" |
14108 | "#define signature_CENTAUR_edx 0x48727561\n" |
14109 | "#define signature_CENTAUR_ecx 0x736c7561\n" |
14110 | "/* CYRIX: \"CyrixInstead\" */\n" |
14111 | "#define signature_CYRIX_ebx 0x69727943\n" |
14112 | "#define signature_CYRIX_edx 0x736e4978\n" |
14113 | "#define signature_CYRIX_ecx 0x64616574\n" |
14114 | "/* INTEL: \"GenuineIntel\" */\n" |
14115 | "#define signature_INTEL_ebx 0x756e6547\n" |
14116 | "#define signature_INTEL_edx 0x49656e69\n" |
14117 | "#define signature_INTEL_ecx 0x6c65746e\n" |
14118 | "/* TM1: \"TransmetaCPU\" */\n" |
14119 | "#define signature_TM1_ebx 0x6e617254\n" |
14120 | "#define signature_TM1_edx 0x74656d73\n" |
14121 | "#define signature_TM1_ecx 0x55504361\n" |
14122 | "/* TM2: \"GenuineTMx86\" */\n" |
14123 | "#define signature_TM2_ebx 0x756e6547\n" |
14124 | "#define signature_TM2_edx 0x54656e69\n" |
14125 | "#define signature_TM2_ecx 0x3638784d\n" |
14126 | "/* NSC: \"Geode by NSC\" */\n" |
14127 | "#define signature_NSC_ebx 0x646f6547\n" |
14128 | "#define signature_NSC_edx 0x43534e20\n" |
14129 | "#define signature_NSC_ecx 0x79622065\n" |
14130 | "/* NEXGEN: \"NexGenDriven\" */\n" |
14131 | "#define signature_NEXGEN_ebx 0x4778654e\n" |
14132 | "#define signature_NEXGEN_edx 0x72446e65\n" |
14133 | "#define signature_NEXGEN_ecx 0x6e657669\n" |
14134 | "/* RISE: \"RiseRiseRise\" */\n" |
14135 | "#define signature_RISE_ebx 0x65736952\n" |
14136 | "#define signature_RISE_edx 0x65736952\n" |
14137 | "#define signature_RISE_ecx 0x65736952\n" |
14138 | "/* SIS: \"SiS SiS SiS \" */\n" |
14139 | "#define signature_SIS_ebx 0x20536953\n" |
14140 | "#define signature_SIS_edx 0x20536953\n" |
14141 | "#define signature_SIS_ecx 0x20536953\n" |
14142 | "/* UMC: \"UMC UMC UMC \" */\n" |
14143 | "#define signature_UMC_ebx 0x20434d55\n" |
14144 | "#define signature_UMC_edx 0x20434d55\n" |
14145 | "#define signature_UMC_ecx 0x20434d55\n" |
14146 | "/* VIA: \"VIA VIA VIA \" */\n" |
14147 | "#define signature_VIA_ebx 0x20414956\n" |
14148 | "#define signature_VIA_edx 0x20414956\n" |
14149 | "#define signature_VIA_ecx 0x20414956\n" |
14150 | "/* VORTEX: \"Vortex86 SoC\" */\n" |
14151 | "#define signature_VORTEX_ebx 0x74726f56\n" |
14152 | "#define signature_VORTEX_edx 0x36387865\n" |
14153 | "#define signature_VORTEX_ecx 0x436f5320\n" |
14154 | "\n" |
14155 | "/* Features in %ecx for leaf 1 */\n" |
14156 | "#define bit_SSE3 0x00000001\n" |
14157 | "#define bit_PCLMULQDQ 0x00000002\n" |
14158 | "#define bit_PCLMUL bit_PCLMULQDQ /* for gcc compat */\n" |
14159 | "#define bit_DTES64 0x00000004\n" |
14160 | "#define bit_MONITOR 0x00000008\n" |
14161 | "#define bit_DSCPL 0x00000010\n" |
14162 | "#define bit_VMX 0x00000020\n" |
14163 | "#define bit_SMX 0x00000040\n" |
14164 | "#define bit_EIST 0x00000080\n" |
14165 | "#define bit_TM2 0x00000100\n" |
14166 | "#define bit_SSSE3 0x00000200\n" |
14167 | "#define bit_CNXTID 0x00000400\n" |
14168 | "#define bit_FMA 0x00001000\n" |
14169 | "#define bit_CMPXCHG16B 0x00002000\n" |
14170 | "#define bit_xTPR 0x00004000\n" |
14171 | "#define bit_PDCM 0x00008000\n" |
14172 | "#define bit_PCID 0x00020000\n" |
14173 | "#define bit_DCA 0x00040000\n" |
14174 | "#define bit_SSE41 0x00080000\n" |
14175 | "#define bit_SSE4_1 bit_SSE41 /* for gcc compat */\n" |
14176 | "#define bit_SSE42 0x00100000\n" |
14177 | "#define bit_SSE4_2 bit_SSE42 /* for gcc compat */\n" |
14178 | "#define bit_x2APIC 0x00200000\n" |
14179 | "#define bit_MOVBE 0x00400000\n" |
14180 | "#define bit_POPCNT 0x00800000\n" |
14181 | "#define bit_TSCDeadline 0x01000000\n" |
14182 | "#define bit_AESNI 0x02000000\n" |
14183 | "#define bit_AES bit_AESNI /* for gcc compat */\n" |
14184 | "#define bit_XSAVE 0x04000000\n" |
14185 | "#define bit_OSXSAVE 0x08000000\n" |
14186 | "#define bit_AVX 0x10000000\n" |
14187 | "#define bit_F16C 0x20000000\n" |
14188 | "#define bit_RDRND 0x40000000\n" |
14189 | "\n" |
14190 | "/* Features in %edx for leaf 1 */\n" |
14191 | "#define bit_FPU 0x00000001\n" |
14192 | "#define bit_VME 0x00000002\n" |
14193 | "#define bit_DE 0x00000004\n" |
14194 | "#define bit_PSE 0x00000008\n" |
14195 | "#define bit_TSC 0x00000010\n" |
14196 | "#define bit_MSR 0x00000020\n" |
14197 | "#define bit_PAE 0x00000040\n" |
14198 | "#define bit_MCE 0x00000080\n" |
14199 | "#define bit_CX8 0x00000100\n" |
14200 | "#define bit_CMPXCHG8B bit_CX8 /* for gcc compat */\n" |
14201 | "#define bit_APIC 0x00000200\n" |
14202 | "#define bit_SEP 0x00000800\n" |
14203 | "#define bit_MTRR 0x00001000\n" |
14204 | "#define bit_PGE 0x00002000\n" |
14205 | "#define bit_MCA 0x00004000\n" |
14206 | "#define bit_CMOV 0x00008000\n" |
14207 | "#define bit_PAT 0x00010000\n" |
14208 | "#define bit_PSE36 0x00020000\n" |
14209 | "#define bit_PSN 0x00040000\n" |
14210 | "#define bit_CLFSH 0x00080000\n" |
14211 | "#define bit_DS 0x00200000\n" |
14212 | "#define bit_ACPI 0x00400000\n" |
14213 | "#define bit_MMX 0x00800000\n" |
14214 | "#define bit_FXSR 0x01000000\n" |
14215 | "#define bit_FXSAVE bit_FXSR /* for gcc compat */\n" |
14216 | "#define bit_SSE 0x02000000\n" |
14217 | "#define bit_SSE2 0x04000000\n" |
14218 | "#define bit_SS 0x08000000\n" |
14219 | "#define bit_HTT 0x10000000\n" |
14220 | "#define bit_TM 0x20000000\n" |
14221 | "#define bit_PBE 0x80000000\n" |
14222 | "\n" |
14223 | "/* Features in %ebx for leaf 7 sub-leaf 0 */\n" |
14224 | "#define bit_FSGSBASE 0x00000001\n" |
14225 | "#define bit_SGX 0x00000004\n" |
14226 | "#define bit_BMI 0x00000008\n" |
14227 | "#define bit_HLE 0x00000010\n" |
14228 | "#define bit_AVX2 0x00000020\n" |
14229 | "#define bit_SMEP 0x00000080\n" |
14230 | "#define bit_BMI2 0x00000100\n" |
14231 | "#define bit_ENH_MOVSB 0x00000200\n" |
14232 | "#define bit_INVPCID 0x00000400\n" |
14233 | "#define bit_RTM 0x00000800\n" |
14234 | "#define bit_MPX 0x00004000\n" |
14235 | "#define bit_AVX512F 0x00010000\n" |
14236 | "#define bit_AVX512DQ 0x00020000\n" |
14237 | "#define bit_RDSEED 0x00040000\n" |
14238 | "#define bit_ADX 0x00080000\n" |
14239 | "#define bit_AVX512IFMA 0x00200000\n" |
14240 | "#define bit_CLFLUSHOPT 0x00800000\n" |
14241 | "#define bit_CLWB 0x01000000\n" |
14242 | "#define bit_AVX512PF 0x04000000\n" |
14243 | "#define bit_AVX512ER 0x08000000\n" |
14244 | "#define bit_AVX512CD 0x10000000\n" |
14245 | "#define bit_SHA 0x20000000\n" |
14246 | "#define bit_AVX512BW 0x40000000\n" |
14247 | "#define bit_AVX512VL 0x80000000\n" |
14248 | "\n" |
14249 | "/* Features in %ecx for leaf 7 sub-leaf 0 */\n" |
14250 | "#define bit_PREFTCHWT1 0x00000001\n" |
14251 | "#define bit_AVX512VBMI 0x00000002\n" |
14252 | "#define bit_PKU 0x00000004\n" |
14253 | "#define bit_OSPKE 0x00000010\n" |
14254 | "#define bit_WAITPKG 0x00000020\n" |
14255 | "#define bit_AVX512VBMI2 0x00000040\n" |
14256 | "#define bit_SHSTK 0x00000080\n" |
14257 | "#define bit_GFNI 0x00000100\n" |
14258 | "#define bit_VAES 0x00000200\n" |
14259 | "#define bit_VPCLMULQDQ 0x00000400\n" |
14260 | "#define bit_AVX512VNNI 0x00000800\n" |
14261 | "#define bit_AVX512BITALG 0x00001000\n" |
14262 | "#define bit_AVX512VPOPCNTDQ 0x00004000\n" |
14263 | "#define bit_RDPID 0x00400000\n" |
14264 | "#define bit_CLDEMOTE 0x02000000\n" |
14265 | "#define bit_MOVDIRI 0x08000000\n" |
14266 | "#define bit_MOVDIR64B 0x10000000\n" |
14267 | "\n" |
14268 | "/* Features in %edx for leaf 7 sub-leaf 0 */\n" |
14269 | "#define bit_AVX5124VNNIW 0x00000004\n" |
14270 | "#define bit_AVX5124FMAPS 0x00000008\n" |
14271 | "#define bit_PCONFIG 0x00040000\n" |
14272 | "#define bit_IBT 0x00100000\n" |
14273 | "\n" |
14274 | "/* Features in %eax for leaf 13 sub-leaf 1 */\n" |
14275 | "#define bit_XSAVEOPT 0x00000001\n" |
14276 | "#define bit_XSAVEC 0x00000002\n" |
14277 | "#define bit_XSAVES 0x00000008\n" |
14278 | "\n" |
14279 | "/* Features in %eax for leaf 0x14 sub-leaf 0 */\n" |
14280 | "#define bit_PTWRITE 0x00000010\n" |
14281 | "\n" |
14282 | "/* Features in %ecx for leaf 0x80000001 */\n" |
14283 | "#define bit_LAHF_LM 0x00000001\n" |
14284 | "#define bit_ABM 0x00000020\n" |
14285 | "#define bit_LZCNT bit_ABM /* for gcc compat */\n" |
14286 | "#define bit_SSE4a 0x00000040\n" |
14287 | "#define bit_PRFCHW 0x00000100\n" |
14288 | "#define bit_XOP 0x00000800\n" |
14289 | "#define bit_LWP 0x00008000\n" |
14290 | "#define bit_FMA4 0x00010000\n" |
14291 | "#define bit_TBM 0x00200000\n" |
14292 | "#define bit_MWAITX 0x20000000\n" |
14293 | "\n" |
14294 | "/* Features in %edx for leaf 0x80000001 */\n" |
14295 | "#define bit_MMXEXT 0x00400000\n" |
14296 | "#define bit_LM 0x20000000\n" |
14297 | "#define bit_3DNOWP 0x40000000\n" |
14298 | "#define bit_3DNOW 0x80000000\n" |
14299 | "\n" |
14300 | "/* Features in %ebx for leaf 0x80000008 */\n" |
14301 | "#define bit_CLZERO 0x00000001\n" |
14302 | "#define bit_WBNOINVD 0x00000200\n" |
14303 | "\n" |
14304 | "\n" |
14305 | "#if __i386__\n" |
14306 | "#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n" |
14307 | " __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n" |
14308 | " : \"0\"(__leaf))\n" |
14309 | "\n" |
14310 | "#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n" |
14311 | " __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n" |
14312 | " : \"0\"(__leaf), \"2\"(__count))\n" |
14313 | "#else\n" |
14314 | "/* x86-64 uses %rbx as the base register, so preserve it. */\n" |
14315 | "#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n" |
14316 | " __asm(\" xchgq %%rbx,%q1\\n\" \\\n" |
14317 | " \" cpuid\\n\" \\\n" |
14318 | " \" xchgq %%rbx,%q1\" \\\n" |
14319 | " : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n" |
14320 | " : \"0\"(__leaf))\n" |
14321 | "\n" |
14322 | "#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n" |
14323 | " __asm(\" xchgq %%rbx,%q1\\n\" \\\n" |
14324 | " \" cpuid\\n\" \\\n" |
14325 | " \" xchgq %%rbx,%q1\" \\\n" |
14326 | " : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n" |
14327 | " : \"0\"(__leaf), \"2\"(__count))\n" |
14328 | "#endif\n" |
14329 | "\n" |
14330 | "static __inline int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig)\n" |
14331 | "{\n" |
14332 | " unsigned int __eax, __ebx, __ecx, __edx;\n" |
14333 | "#if __i386__\n" |
14334 | " int __cpuid_supported;\n" |
14335 | "\n" |
14336 | " __asm(\" pushfl\\n\"\n" |
14337 | " \" popl %%eax\\n\"\n" |
14338 | " \" movl %%eax,%%ecx\\n\"\n" |
14339 | " \" xorl $0x00200000,%%eax\\n\"\n" |
14340 | " \" pushl %%eax\\n\"\n" |
14341 | " \" popfl\\n\"\n" |
14342 | " \" pushfl\\n\"\n" |
14343 | " \" popl %%eax\\n\"\n" |
14344 | " \" movl $0,%0\\n\"\n" |
14345 | " \" cmpl %%eax,%%ecx\\n\"\n" |
14346 | " \" je 1f\\n\"\n" |
14347 | " \" movl $1,%0\\n\"\n" |
14348 | " \"1:\"\n" |
14349 | " : \"=r\" (__cpuid_supported) : : \"eax\", \"ecx\");\n" |
14350 | " if (!__cpuid_supported)\n" |
14351 | " return 0;\n" |
14352 | "#endif\n" |
14353 | "\n" |
14354 | " __cpuid(__leaf, __eax, __ebx, __ecx, __edx);\n" |
14355 | " if (__sig)\n" |
14356 | " *__sig = __ebx;\n" |
14357 | " return __eax;\n" |
14358 | "}\n" |
14359 | "\n" |
14360 | "static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax,\n" |
14361 | " unsigned int *__ebx, unsigned int *__ecx,\n" |
14362 | " unsigned int *__edx)\n" |
14363 | "{\n" |
14364 | " unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n" |
14365 | "\n" |
14366 | " if (__max_leaf == 0 || __max_leaf < __leaf)\n" |
14367 | " return 0;\n" |
14368 | "\n" |
14369 | " __cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx);\n" |
14370 | " return 1;\n" |
14371 | "}\n" |
14372 | "\n" |
14373 | "static __inline int __get_cpuid_count (unsigned int __leaf,\n" |
14374 | " unsigned int __subleaf,\n" |
14375 | " unsigned int *__eax, unsigned int *__ebx,\n" |
14376 | " unsigned int *__ecx, unsigned int *__edx)\n" |
14377 | "{\n" |
14378 | " unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n" |
14379 | "\n" |
14380 | " if (__max_leaf == 0 || __max_leaf < __leaf)\n" |
14381 | " return 0;\n" |
14382 | "\n" |
14383 | " __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);\n" |
14384 | " return 1;\n" |
14385 | "}\n" |
14386 | "" } , |
14387 | { "/builtins/emmintrin.h" , "/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===\n" |
14388 | " *\n" |
14389 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
14390 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
14391 | " * in the Software without restriction, including without limitation the rights\n" |
14392 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
14393 | " * copies of the Software, and to permit persons to whom the Software is\n" |
14394 | " * furnished to do so, subject to the following conditions:\n" |
14395 | " *\n" |
14396 | " * The above copyright notice and this permission notice shall be included in\n" |
14397 | " * all copies or substantial portions of the Software.\n" |
14398 | " *\n" |
14399 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
14400 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
14401 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
14402 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
14403 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
14404 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
14405 | " * THE SOFTWARE.\n" |
14406 | " *\n" |
14407 | " *===-----------------------------------------------------------------------===\n" |
14408 | " */\n" |
14409 | "\n" |
14410 | "#ifndef __EMMINTRIN_H\n" |
14411 | "#define __EMMINTRIN_H\n" |
14412 | "\n" |
14413 | "#include <xmmintrin.h>\n" |
14414 | "\n" |
14415 | "typedef double __m128d __attribute__((__vector_size__(16)));\n" |
14416 | "typedef long long __m128i __attribute__((__vector_size__(16)));\n" |
14417 | "\n" |
14418 | "/* Type defines. */\n" |
14419 | "typedef double __v2df __attribute__ ((__vector_size__ (16)));\n" |
14420 | "typedef long long __v2di __attribute__ ((__vector_size__ (16)));\n" |
14421 | "typedef short __v8hi __attribute__((__vector_size__(16)));\n" |
14422 | "typedef char __v16qi __attribute__((__vector_size__(16)));\n" |
14423 | "\n" |
14424 | "/* Unsigned types */\n" |
14425 | "typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));\n" |
14426 | "typedef unsigned short __v8hu __attribute__((__vector_size__(16)));\n" |
14427 | "typedef unsigned char __v16qu __attribute__((__vector_size__(16)));\n" |
14428 | "\n" |
14429 | "/* We need an explicitly signed variant for char. Note that this shouldn't\n" |
14430 | " * appear in the interface though. */\n" |
14431 | "typedef signed char __v16qs __attribute__((__vector_size__(16)));\n" |
14432 | "\n" |
14433 | "/* Define the default attributes for the functions in this file. */\n" |
14434 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\"), __min_vector_width__(128)))\n" |
14435 | "#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse2\"), __min_vector_width__(64)))\n" |
14436 | "\n" |
14437 | "/// Adds lower double-precision values in both operands and returns the\n" |
14438 | "/// sum in the lower 64 bits of the result. The upper 64 bits of the result\n" |
14439 | "/// are copied from the upper double-precision value of the first operand.\n" |
14440 | "///\n" |
14441 | "/// \\headerfile <x86intrin.h>\n" |
14442 | "///\n" |
14443 | "/// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.\n" |
14444 | "///\n" |
14445 | "/// \\param __a\n" |
14446 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14447 | "/// \\param __b\n" |
14448 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14449 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
14450 | "/// sum of the lower 64 bits of both operands. The upper 64 bits are copied\n" |
14451 | "/// from the upper 64 bits of the first source operand.\n" |
14452 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14453 | "_mm_add_sd(__m128d __a, __m128d __b)\n" |
14454 | "{\n" |
14455 | " __a[0] += __b[0];\n" |
14456 | " return __a;\n" |
14457 | "}\n" |
14458 | "\n" |
14459 | "/// Adds two 128-bit vectors of [2 x double].\n" |
14460 | "///\n" |
14461 | "/// \\headerfile <x86intrin.h>\n" |
14462 | "///\n" |
14463 | "/// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.\n" |
14464 | "///\n" |
14465 | "/// \\param __a\n" |
14466 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14467 | "/// \\param __b\n" |
14468 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14469 | "/// \\returns A 128-bit vector of [2 x double] containing the sums of both\n" |
14470 | "/// operands.\n" |
14471 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14472 | "_mm_add_pd(__m128d __a, __m128d __b)\n" |
14473 | "{\n" |
14474 | " return (__m128d)((__v2df)__a + (__v2df)__b);\n" |
14475 | "}\n" |
14476 | "\n" |
14477 | "/// Subtracts the lower double-precision value of the second operand\n" |
14478 | "/// from the lower double-precision value of the first operand and returns\n" |
14479 | "/// the difference in the lower 64 bits of the result. The upper 64 bits of\n" |
14480 | "/// the result are copied from the upper double-precision value of the first\n" |
14481 | "/// operand.\n" |
14482 | "///\n" |
14483 | "/// \\headerfile <x86intrin.h>\n" |
14484 | "///\n" |
14485 | "/// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.\n" |
14486 | "///\n" |
14487 | "/// \\param __a\n" |
14488 | "/// A 128-bit vector of [2 x double] containing the minuend.\n" |
14489 | "/// \\param __b\n" |
14490 | "/// A 128-bit vector of [2 x double] containing the subtrahend.\n" |
14491 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
14492 | "/// difference of the lower 64 bits of both operands. The upper 64 bits are\n" |
14493 | "/// copied from the upper 64 bits of the first source operand.\n" |
14494 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14495 | "_mm_sub_sd(__m128d __a, __m128d __b)\n" |
14496 | "{\n" |
14497 | " __a[0] -= __b[0];\n" |
14498 | " return __a;\n" |
14499 | "}\n" |
14500 | "\n" |
14501 | "/// Subtracts two 128-bit vectors of [2 x double].\n" |
14502 | "///\n" |
14503 | "/// \\headerfile <x86intrin.h>\n" |
14504 | "///\n" |
14505 | "/// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.\n" |
14506 | "///\n" |
14507 | "/// \\param __a\n" |
14508 | "/// A 128-bit vector of [2 x double] containing the minuend.\n" |
14509 | "/// \\param __b\n" |
14510 | "/// A 128-bit vector of [2 x double] containing the subtrahend.\n" |
14511 | "/// \\returns A 128-bit vector of [2 x double] containing the differences between\n" |
14512 | "/// both operands.\n" |
14513 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14514 | "_mm_sub_pd(__m128d __a, __m128d __b)\n" |
14515 | "{\n" |
14516 | " return (__m128d)((__v2df)__a - (__v2df)__b);\n" |
14517 | "}\n" |
14518 | "\n" |
14519 | "/// Multiplies lower double-precision values in both operands and returns\n" |
14520 | "/// the product in the lower 64 bits of the result. The upper 64 bits of the\n" |
14521 | "/// result are copied from the upper double-precision value of the first\n" |
14522 | "/// operand.\n" |
14523 | "///\n" |
14524 | "/// \\headerfile <x86intrin.h>\n" |
14525 | "///\n" |
14526 | "/// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.\n" |
14527 | "///\n" |
14528 | "/// \\param __a\n" |
14529 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14530 | "/// \\param __b\n" |
14531 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14532 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
14533 | "/// product of the lower 64 bits of both operands. The upper 64 bits are\n" |
14534 | "/// copied from the upper 64 bits of the first source operand.\n" |
14535 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14536 | "_mm_mul_sd(__m128d __a, __m128d __b)\n" |
14537 | "{\n" |
14538 | " __a[0] *= __b[0];\n" |
14539 | " return __a;\n" |
14540 | "}\n" |
14541 | "\n" |
14542 | "/// Multiplies two 128-bit vectors of [2 x double].\n" |
14543 | "///\n" |
14544 | "/// \\headerfile <x86intrin.h>\n" |
14545 | "///\n" |
14546 | "/// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.\n" |
14547 | "///\n" |
14548 | "/// \\param __a\n" |
14549 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
14550 | "/// \\param __b\n" |
14551 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
14552 | "/// \\returns A 128-bit vector of [2 x double] containing the products of both\n" |
14553 | "/// operands.\n" |
14554 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14555 | "_mm_mul_pd(__m128d __a, __m128d __b)\n" |
14556 | "{\n" |
14557 | " return (__m128d)((__v2df)__a * (__v2df)__b);\n" |
14558 | "}\n" |
14559 | "\n" |
14560 | "/// Divides the lower double-precision value of the first operand by the\n" |
14561 | "/// lower double-precision value of the second operand and returns the\n" |
14562 | "/// quotient in the lower 64 bits of the result. The upper 64 bits of the\n" |
14563 | "/// result are copied from the upper double-precision value of the first\n" |
14564 | "/// operand.\n" |
14565 | "///\n" |
14566 | "/// \\headerfile <x86intrin.h>\n" |
14567 | "///\n" |
14568 | "/// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.\n" |
14569 | "///\n" |
14570 | "/// \\param __a\n" |
14571 | "/// A 128-bit vector of [2 x double] containing the dividend.\n" |
14572 | "/// \\param __b\n" |
14573 | "/// A 128-bit vector of [2 x double] containing divisor.\n" |
14574 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
14575 | "/// quotient of the lower 64 bits of both operands. The upper 64 bits are\n" |
14576 | "/// copied from the upper 64 bits of the first source operand.\n" |
14577 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14578 | "_mm_div_sd(__m128d __a, __m128d __b)\n" |
14579 | "{\n" |
14580 | " __a[0] /= __b[0];\n" |
14581 | " return __a;\n" |
14582 | "}\n" |
14583 | "\n" |
14584 | "/// Performs an element-by-element division of two 128-bit vectors of\n" |
14585 | "/// [2 x double].\n" |
14586 | "///\n" |
14587 | "/// \\headerfile <x86intrin.h>\n" |
14588 | "///\n" |
14589 | "/// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.\n" |
14590 | "///\n" |
14591 | "/// \\param __a\n" |
14592 | "/// A 128-bit vector of [2 x double] containing the dividend.\n" |
14593 | "/// \\param __b\n" |
14594 | "/// A 128-bit vector of [2 x double] containing the divisor.\n" |
14595 | "/// \\returns A 128-bit vector of [2 x double] containing the quotients of both\n" |
14596 | "/// operands.\n" |
14597 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14598 | "_mm_div_pd(__m128d __a, __m128d __b)\n" |
14599 | "{\n" |
14600 | " return (__m128d)((__v2df)__a / (__v2df)__b);\n" |
14601 | "}\n" |
14602 | "\n" |
14603 | "/// Calculates the square root of the lower double-precision value of\n" |
14604 | "/// the second operand and returns it in the lower 64 bits of the result.\n" |
14605 | "/// The upper 64 bits of the result are copied from the upper\n" |
14606 | "/// double-precision value of the first operand.\n" |
14607 | "///\n" |
14608 | "/// \\headerfile <x86intrin.h>\n" |
14609 | "///\n" |
14610 | "/// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.\n" |
14611 | "///\n" |
14612 | "/// \\param __a\n" |
14613 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
14614 | "/// upper 64 bits of this operand are copied to the upper 64 bits of the\n" |
14615 | "/// result.\n" |
14616 | "/// \\param __b\n" |
14617 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
14618 | "/// square root is calculated using the lower 64 bits of this operand.\n" |
14619 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
14620 | "/// square root of the lower 64 bits of operand \\a __b, and whose upper 64\n" |
14621 | "/// bits are copied from the upper 64 bits of operand \\a __a.\n" |
14622 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14623 | "_mm_sqrt_sd(__m128d __a, __m128d __b)\n" |
14624 | "{\n" |
14625 | " __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);\n" |
14626 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
14627 | "}\n" |
14628 | "\n" |
14629 | "/// Calculates the square root of the each of two values stored in a\n" |
14630 | "/// 128-bit vector of [2 x double].\n" |
14631 | "///\n" |
14632 | "/// \\headerfile <x86intrin.h>\n" |
14633 | "///\n" |
14634 | "/// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.\n" |
14635 | "///\n" |
14636 | "/// \\param __a\n" |
14637 | "/// A 128-bit vector of [2 x double].\n" |
14638 | "/// \\returns A 128-bit vector of [2 x double] containing the square roots of the\n" |
14639 | "/// values in the operand.\n" |
14640 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14641 | "_mm_sqrt_pd(__m128d __a)\n" |
14642 | "{\n" |
14643 | " return __builtin_ia32_sqrtpd((__v2df)__a);\n" |
14644 | "}\n" |
14645 | "\n" |
14646 | "/// Compares lower 64-bit double-precision values of both operands, and\n" |
14647 | "/// returns the lesser of the pair of values in the lower 64-bits of the\n" |
14648 | "/// result. The upper 64 bits of the result are copied from the upper\n" |
14649 | "/// double-precision value of the first operand.\n" |
14650 | "///\n" |
14651 | "/// \\headerfile <x86intrin.h>\n" |
14652 | "///\n" |
14653 | "/// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.\n" |
14654 | "///\n" |
14655 | "/// \\param __a\n" |
14656 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
14657 | "/// lower 64 bits of this operand are used in the comparison.\n" |
14658 | "/// \\param __b\n" |
14659 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
14660 | "/// lower 64 bits of this operand are used in the comparison.\n" |
14661 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
14662 | "/// minimum value between both operands. The upper 64 bits are copied from\n" |
14663 | "/// the upper 64 bits of the first source operand.\n" |
14664 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14665 | "_mm_min_sd(__m128d __a, __m128d __b)\n" |
14666 | "{\n" |
14667 | " return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);\n" |
14668 | "}\n" |
14669 | "\n" |
14670 | "/// Performs element-by-element comparison of the two 128-bit vectors of\n" |
14671 | "/// [2 x double] and returns the vector containing the lesser of each pair of\n" |
14672 | "/// values.\n" |
14673 | "///\n" |
14674 | "/// \\headerfile <x86intrin.h>\n" |
14675 | "///\n" |
14676 | "/// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.\n" |
14677 | "///\n" |
14678 | "/// \\param __a\n" |
14679 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
14680 | "/// \\param __b\n" |
14681 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
14682 | "/// \\returns A 128-bit vector of [2 x double] containing the minimum values\n" |
14683 | "/// between both operands.\n" |
14684 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14685 | "_mm_min_pd(__m128d __a, __m128d __b)\n" |
14686 | "{\n" |
14687 | " return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);\n" |
14688 | "}\n" |
14689 | "\n" |
14690 | "/// Compares lower 64-bit double-precision values of both operands, and\n" |
14691 | "/// returns the greater of the pair of values in the lower 64-bits of the\n" |
14692 | "/// result. The upper 64 bits of the result are copied from the upper\n" |
14693 | "/// double-precision value of the first operand.\n" |
14694 | "///\n" |
14695 | "/// \\headerfile <x86intrin.h>\n" |
14696 | "///\n" |
14697 | "/// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.\n" |
14698 | "///\n" |
14699 | "/// \\param __a\n" |
14700 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
14701 | "/// lower 64 bits of this operand are used in the comparison.\n" |
14702 | "/// \\param __b\n" |
14703 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
14704 | "/// lower 64 bits of this operand are used in the comparison.\n" |
14705 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
14706 | "/// maximum value between both operands. The upper 64 bits are copied from\n" |
14707 | "/// the upper 64 bits of the first source operand.\n" |
14708 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14709 | "_mm_max_sd(__m128d __a, __m128d __b)\n" |
14710 | "{\n" |
14711 | " return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);\n" |
14712 | "}\n" |
14713 | "\n" |
14714 | "/// Performs element-by-element comparison of the two 128-bit vectors of\n" |
14715 | "/// [2 x double] and returns the vector containing the greater of each pair\n" |
14716 | "/// of values.\n" |
14717 | "///\n" |
14718 | "/// \\headerfile <x86intrin.h>\n" |
14719 | "///\n" |
14720 | "/// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.\n" |
14721 | "///\n" |
14722 | "/// \\param __a\n" |
14723 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
14724 | "/// \\param __b\n" |
14725 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
14726 | "/// \\returns A 128-bit vector of [2 x double] containing the maximum values\n" |
14727 | "/// between both operands.\n" |
14728 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14729 | "_mm_max_pd(__m128d __a, __m128d __b)\n" |
14730 | "{\n" |
14731 | " return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);\n" |
14732 | "}\n" |
14733 | "\n" |
14734 | "/// Performs a bitwise AND of two 128-bit vectors of [2 x double].\n" |
14735 | "///\n" |
14736 | "/// \\headerfile <x86intrin.h>\n" |
14737 | "///\n" |
14738 | "/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n" |
14739 | "///\n" |
14740 | "/// \\param __a\n" |
14741 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14742 | "/// \\param __b\n" |
14743 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14744 | "/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n" |
14745 | "/// values between both operands.\n" |
14746 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14747 | "_mm_and_pd(__m128d __a, __m128d __b)\n" |
14748 | "{\n" |
14749 | " return (__m128d)((__v2du)__a & (__v2du)__b);\n" |
14750 | "}\n" |
14751 | "\n" |
14752 | "/// Performs a bitwise AND of two 128-bit vectors of [2 x double], using\n" |
14753 | "/// the one's complement of the values contained in the first source operand.\n" |
14754 | "///\n" |
14755 | "/// \\headerfile <x86intrin.h>\n" |
14756 | "///\n" |
14757 | "/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n" |
14758 | "///\n" |
14759 | "/// \\param __a\n" |
14760 | "/// A 128-bit vector of [2 x double] containing the left source operand. The\n" |
14761 | "/// one's complement of this value is used in the bitwise AND.\n" |
14762 | "/// \\param __b\n" |
14763 | "/// A 128-bit vector of [2 x double] containing the right source operand.\n" |
14764 | "/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n" |
14765 | "/// values in the second operand and the one's complement of the first\n" |
14766 | "/// operand.\n" |
14767 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14768 | "_mm_andnot_pd(__m128d __a, __m128d __b)\n" |
14769 | "{\n" |
14770 | " return (__m128d)(~(__v2du)__a & (__v2du)__b);\n" |
14771 | "}\n" |
14772 | "\n" |
14773 | "/// Performs a bitwise OR of two 128-bit vectors of [2 x double].\n" |
14774 | "///\n" |
14775 | "/// \\headerfile <x86intrin.h>\n" |
14776 | "///\n" |
14777 | "/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n" |
14778 | "///\n" |
14779 | "/// \\param __a\n" |
14780 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14781 | "/// \\param __b\n" |
14782 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14783 | "/// \\returns A 128-bit vector of [2 x double] containing the bitwise OR of the\n" |
14784 | "/// values between both operands.\n" |
14785 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14786 | "_mm_or_pd(__m128d __a, __m128d __b)\n" |
14787 | "{\n" |
14788 | " return (__m128d)((__v2du)__a | (__v2du)__b);\n" |
14789 | "}\n" |
14790 | "\n" |
14791 | "/// Performs a bitwise XOR of two 128-bit vectors of [2 x double].\n" |
14792 | "///\n" |
14793 | "/// \\headerfile <x86intrin.h>\n" |
14794 | "///\n" |
14795 | "/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n" |
14796 | "///\n" |
14797 | "/// \\param __a\n" |
14798 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14799 | "/// \\param __b\n" |
14800 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
14801 | "/// \\returns A 128-bit vector of [2 x double] containing the bitwise XOR of the\n" |
14802 | "/// values between both operands.\n" |
14803 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14804 | "_mm_xor_pd(__m128d __a, __m128d __b)\n" |
14805 | "{\n" |
14806 | " return (__m128d)((__v2du)__a ^ (__v2du)__b);\n" |
14807 | "}\n" |
14808 | "\n" |
14809 | "/// Compares each of the corresponding double-precision values of the\n" |
14810 | "/// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0\n" |
14811 | "/// for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
14812 | "///\n" |
14813 | "/// \\headerfile <x86intrin.h>\n" |
14814 | "///\n" |
14815 | "/// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.\n" |
14816 | "///\n" |
14817 | "/// \\param __a\n" |
14818 | "/// A 128-bit vector of [2 x double].\n" |
14819 | "/// \\param __b\n" |
14820 | "/// A 128-bit vector of [2 x double].\n" |
14821 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
14822 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14823 | "_mm_cmpeq_pd(__m128d __a, __m128d __b)\n" |
14824 | "{\n" |
14825 | " return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);\n" |
14826 | "}\n" |
14827 | "\n" |
14828 | "/// Compares each of the corresponding double-precision values of the\n" |
14829 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
14830 | "/// operand are less than those in the second operand. Each comparison\n" |
14831 | "/// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
14832 | "///\n" |
14833 | "/// \\headerfile <x86intrin.h>\n" |
14834 | "///\n" |
14835 | "/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n" |
14836 | "///\n" |
14837 | "/// \\param __a\n" |
14838 | "/// A 128-bit vector of [2 x double].\n" |
14839 | "/// \\param __b\n" |
14840 | "/// A 128-bit vector of [2 x double].\n" |
14841 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
14842 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14843 | "_mm_cmplt_pd(__m128d __a, __m128d __b)\n" |
14844 | "{\n" |
14845 | " return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);\n" |
14846 | "}\n" |
14847 | "\n" |
14848 | "/// Compares each of the corresponding double-precision values of the\n" |
14849 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
14850 | "/// operand are less than or equal to those in the second operand.\n" |
14851 | "///\n" |
14852 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
14853 | "///\n" |
14854 | "/// \\headerfile <x86intrin.h>\n" |
14855 | "///\n" |
14856 | "/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n" |
14857 | "///\n" |
14858 | "/// \\param __a\n" |
14859 | "/// A 128-bit vector of [2 x double].\n" |
14860 | "/// \\param __b\n" |
14861 | "/// A 128-bit vector of [2 x double].\n" |
14862 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
14863 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14864 | "_mm_cmple_pd(__m128d __a, __m128d __b)\n" |
14865 | "{\n" |
14866 | " return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);\n" |
14867 | "}\n" |
14868 | "\n" |
14869 | "/// Compares each of the corresponding double-precision values of the\n" |
14870 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
14871 | "/// operand are greater than those in the second operand.\n" |
14872 | "///\n" |
14873 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
14874 | "///\n" |
14875 | "/// \\headerfile <x86intrin.h>\n" |
14876 | "///\n" |
14877 | "/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n" |
14878 | "///\n" |
14879 | "/// \\param __a\n" |
14880 | "/// A 128-bit vector of [2 x double].\n" |
14881 | "/// \\param __b\n" |
14882 | "/// A 128-bit vector of [2 x double].\n" |
14883 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
14884 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14885 | "_mm_cmpgt_pd(__m128d __a, __m128d __b)\n" |
14886 | "{\n" |
14887 | " return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);\n" |
14888 | "}\n" |
14889 | "\n" |
14890 | "/// Compares each of the corresponding double-precision values of the\n" |
14891 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
14892 | "/// operand are greater than or equal to those in the second operand.\n" |
14893 | "///\n" |
14894 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
14895 | "///\n" |
14896 | "/// \\headerfile <x86intrin.h>\n" |
14897 | "///\n" |
14898 | "/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n" |
14899 | "///\n" |
14900 | "/// \\param __a\n" |
14901 | "/// A 128-bit vector of [2 x double].\n" |
14902 | "/// \\param __b\n" |
14903 | "/// A 128-bit vector of [2 x double].\n" |
14904 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
14905 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14906 | "_mm_cmpge_pd(__m128d __a, __m128d __b)\n" |
14907 | "{\n" |
14908 | " return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);\n" |
14909 | "}\n" |
14910 | "\n" |
14911 | "/// Compares each of the corresponding double-precision values of the\n" |
14912 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
14913 | "/// operand are ordered with respect to those in the second operand.\n" |
14914 | "///\n" |
14915 | "/// A pair of double-precision values are \"ordered\" with respect to each\n" |
14916 | "/// other if neither value is a NaN. Each comparison yields 0x0 for false,\n" |
14917 | "/// 0xFFFFFFFFFFFFFFFF for true.\n" |
14918 | "///\n" |
14919 | "/// \\headerfile <x86intrin.h>\n" |
14920 | "///\n" |
14921 | "/// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.\n" |
14922 | "///\n" |
14923 | "/// \\param __a\n" |
14924 | "/// A 128-bit vector of [2 x double].\n" |
14925 | "/// \\param __b\n" |
14926 | "/// A 128-bit vector of [2 x double].\n" |
14927 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
14928 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14929 | "_mm_cmpord_pd(__m128d __a, __m128d __b)\n" |
14930 | "{\n" |
14931 | " return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);\n" |
14932 | "}\n" |
14933 | "\n" |
14934 | "/// Compares each of the corresponding double-precision values of the\n" |
14935 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
14936 | "/// operand are unordered with respect to those in the second operand.\n" |
14937 | "///\n" |
14938 | "/// A pair of double-precision values are \"unordered\" with respect to each\n" |
14939 | "/// other if one or both values are NaN. Each comparison yields 0x0 for\n" |
14940 | "/// false, 0xFFFFFFFFFFFFFFFF for true.\n" |
14941 | "///\n" |
14942 | "/// \\headerfile <x86intrin.h>\n" |
14943 | "///\n" |
14944 | "/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>\n" |
14945 | "/// instruction.\n" |
14946 | "///\n" |
14947 | "/// \\param __a\n" |
14948 | "/// A 128-bit vector of [2 x double].\n" |
14949 | "/// \\param __b\n" |
14950 | "/// A 128-bit vector of [2 x double].\n" |
14951 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
14952 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14953 | "_mm_cmpunord_pd(__m128d __a, __m128d __b)\n" |
14954 | "{\n" |
14955 | " return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);\n" |
14956 | "}\n" |
14957 | "\n" |
14958 | "/// Compares each of the corresponding double-precision values of the\n" |
14959 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
14960 | "/// operand are unequal to those in the second operand.\n" |
14961 | "///\n" |
14962 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
14963 | "///\n" |
14964 | "/// \\headerfile <x86intrin.h>\n" |
14965 | "///\n" |
14966 | "/// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.\n" |
14967 | "///\n" |
14968 | "/// \\param __a\n" |
14969 | "/// A 128-bit vector of [2 x double].\n" |
14970 | "/// \\param __b\n" |
14971 | "/// A 128-bit vector of [2 x double].\n" |
14972 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
14973 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14974 | "_mm_cmpneq_pd(__m128d __a, __m128d __b)\n" |
14975 | "{\n" |
14976 | " return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);\n" |
14977 | "}\n" |
14978 | "\n" |
14979 | "/// Compares each of the corresponding double-precision values of the\n" |
14980 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
14981 | "/// operand are not less than those in the second operand.\n" |
14982 | "///\n" |
14983 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
14984 | "///\n" |
14985 | "/// \\headerfile <x86intrin.h>\n" |
14986 | "///\n" |
14987 | "/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n" |
14988 | "///\n" |
14989 | "/// \\param __a\n" |
14990 | "/// A 128-bit vector of [2 x double].\n" |
14991 | "/// \\param __b\n" |
14992 | "/// A 128-bit vector of [2 x double].\n" |
14993 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
14994 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
14995 | "_mm_cmpnlt_pd(__m128d __a, __m128d __b)\n" |
14996 | "{\n" |
14997 | " return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);\n" |
14998 | "}\n" |
14999 | "\n" |
15000 | "/// Compares each of the corresponding double-precision values of the\n" |
15001 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
15002 | "/// operand are not less than or equal to those in the second operand.\n" |
15003 | "///\n" |
15004 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15005 | "///\n" |
15006 | "/// \\headerfile <x86intrin.h>\n" |
15007 | "///\n" |
15008 | "/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n" |
15009 | "///\n" |
15010 | "/// \\param __a\n" |
15011 | "/// A 128-bit vector of [2 x double].\n" |
15012 | "/// \\param __b\n" |
15013 | "/// A 128-bit vector of [2 x double].\n" |
15014 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
15015 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15016 | "_mm_cmpnle_pd(__m128d __a, __m128d __b)\n" |
15017 | "{\n" |
15018 | " return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);\n" |
15019 | "}\n" |
15020 | "\n" |
15021 | "/// Compares each of the corresponding double-precision values of the\n" |
15022 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
15023 | "/// operand are not greater than those in the second operand.\n" |
15024 | "///\n" |
15025 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15026 | "///\n" |
15027 | "/// \\headerfile <x86intrin.h>\n" |
15028 | "///\n" |
15029 | "/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n" |
15030 | "///\n" |
15031 | "/// \\param __a\n" |
15032 | "/// A 128-bit vector of [2 x double].\n" |
15033 | "/// \\param __b\n" |
15034 | "/// A 128-bit vector of [2 x double].\n" |
15035 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
15036 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15037 | "_mm_cmpngt_pd(__m128d __a, __m128d __b)\n" |
15038 | "{\n" |
15039 | " return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);\n" |
15040 | "}\n" |
15041 | "\n" |
15042 | "/// Compares each of the corresponding double-precision values of the\n" |
15043 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
15044 | "/// operand are not greater than or equal to those in the second operand.\n" |
15045 | "///\n" |
15046 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15047 | "///\n" |
15048 | "/// \\headerfile <x86intrin.h>\n" |
15049 | "///\n" |
15050 | "/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n" |
15051 | "///\n" |
15052 | "/// \\param __a\n" |
15053 | "/// A 128-bit vector of [2 x double].\n" |
15054 | "/// \\param __b\n" |
15055 | "/// A 128-bit vector of [2 x double].\n" |
15056 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
15057 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15058 | "_mm_cmpnge_pd(__m128d __a, __m128d __b)\n" |
15059 | "{\n" |
15060 | " return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);\n" |
15061 | "}\n" |
15062 | "\n" |
15063 | "/// Compares the lower double-precision floating-point values in each of\n" |
15064 | "/// the two 128-bit floating-point vectors of [2 x double] for equality.\n" |
15065 | "///\n" |
15066 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15067 | "///\n" |
15068 | "/// \\headerfile <x86intrin.h>\n" |
15069 | "///\n" |
15070 | "/// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.\n" |
15071 | "///\n" |
15072 | "/// \\param __a\n" |
15073 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15074 | "/// compared to the lower double-precision value of \\a __b.\n" |
15075 | "/// \\param __b\n" |
15076 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15077 | "/// compared to the lower double-precision value of \\a __a.\n" |
15078 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15079 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15080 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15081 | "_mm_cmpeq_sd(__m128d __a, __m128d __b)\n" |
15082 | "{\n" |
15083 | " return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);\n" |
15084 | "}\n" |
15085 | "\n" |
15086 | "/// Compares the lower double-precision floating-point values in each of\n" |
15087 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15088 | "/// the value in the first parameter is less than the corresponding value in\n" |
15089 | "/// the second parameter.\n" |
15090 | "///\n" |
15091 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15092 | "///\n" |
15093 | "/// \\headerfile <x86intrin.h>\n" |
15094 | "///\n" |
15095 | "/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n" |
15096 | "///\n" |
15097 | "/// \\param __a\n" |
15098 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15099 | "/// compared to the lower double-precision value of \\a __b.\n" |
15100 | "/// \\param __b\n" |
15101 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15102 | "/// compared to the lower double-precision value of \\a __a.\n" |
15103 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15104 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15105 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15106 | "_mm_cmplt_sd(__m128d __a, __m128d __b)\n" |
15107 | "{\n" |
15108 | " return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);\n" |
15109 | "}\n" |
15110 | "\n" |
15111 | "/// Compares the lower double-precision floating-point values in each of\n" |
15112 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15113 | "/// the value in the first parameter is less than or equal to the\n" |
15114 | "/// corresponding value in the second parameter.\n" |
15115 | "///\n" |
15116 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15117 | "///\n" |
15118 | "/// \\headerfile <x86intrin.h>\n" |
15119 | "///\n" |
15120 | "/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n" |
15121 | "///\n" |
15122 | "/// \\param __a\n" |
15123 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15124 | "/// compared to the lower double-precision value of \\a __b.\n" |
15125 | "/// \\param __b\n" |
15126 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15127 | "/// compared to the lower double-precision value of \\a __a.\n" |
15128 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15129 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15130 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15131 | "_mm_cmple_sd(__m128d __a, __m128d __b)\n" |
15132 | "{\n" |
15133 | " return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);\n" |
15134 | "}\n" |
15135 | "\n" |
15136 | "/// Compares the lower double-precision floating-point values in each of\n" |
15137 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15138 | "/// the value in the first parameter is greater than the corresponding value\n" |
15139 | "/// in the second parameter.\n" |
15140 | "///\n" |
15141 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15142 | "///\n" |
15143 | "/// \\headerfile <x86intrin.h>\n" |
15144 | "///\n" |
15145 | "/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n" |
15146 | "///\n" |
15147 | "/// \\param __a\n" |
15148 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15149 | "/// compared to the lower double-precision value of \\a __b.\n" |
15150 | "/// \\param __b\n" |
15151 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15152 | "/// compared to the lower double-precision value of \\a __a.\n" |
15153 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15154 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15155 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15156 | "_mm_cmpgt_sd(__m128d __a, __m128d __b)\n" |
15157 | "{\n" |
15158 | " __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);\n" |
15159 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
15160 | "}\n" |
15161 | "\n" |
15162 | "/// Compares the lower double-precision floating-point values in each of\n" |
15163 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15164 | "/// the value in the first parameter is greater than or equal to the\n" |
15165 | "/// corresponding value in the second parameter.\n" |
15166 | "///\n" |
15167 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15168 | "///\n" |
15169 | "/// \\headerfile <x86intrin.h>\n" |
15170 | "///\n" |
15171 | "/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n" |
15172 | "///\n" |
15173 | "/// \\param __a\n" |
15174 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15175 | "/// compared to the lower double-precision value of \\a __b.\n" |
15176 | "/// \\param __b\n" |
15177 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15178 | "/// compared to the lower double-precision value of \\a __a.\n" |
15179 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15180 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15181 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15182 | "_mm_cmpge_sd(__m128d __a, __m128d __b)\n" |
15183 | "{\n" |
15184 | " __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);\n" |
15185 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
15186 | "}\n" |
15187 | "\n" |
15188 | "/// Compares the lower double-precision floating-point values in each of\n" |
15189 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15190 | "/// the value in the first parameter is \"ordered\" with respect to the\n" |
15191 | "/// corresponding value in the second parameter.\n" |
15192 | "///\n" |
15193 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n" |
15194 | "/// of double-precision values are \"ordered\" with respect to each other if\n" |
15195 | "/// neither value is a NaN.\n" |
15196 | "///\n" |
15197 | "/// \\headerfile <x86intrin.h>\n" |
15198 | "///\n" |
15199 | "/// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.\n" |
15200 | "///\n" |
15201 | "/// \\param __a\n" |
15202 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15203 | "/// compared to the lower double-precision value of \\a __b.\n" |
15204 | "/// \\param __b\n" |
15205 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15206 | "/// compared to the lower double-precision value of \\a __a.\n" |
15207 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15208 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15209 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15210 | "_mm_cmpord_sd(__m128d __a, __m128d __b)\n" |
15211 | "{\n" |
15212 | " return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);\n" |
15213 | "}\n" |
15214 | "\n" |
15215 | "/// Compares the lower double-precision floating-point values in each of\n" |
15216 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15217 | "/// the value in the first parameter is \"unordered\" with respect to the\n" |
15218 | "/// corresponding value in the second parameter.\n" |
15219 | "///\n" |
15220 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n" |
15221 | "/// of double-precision values are \"unordered\" with respect to each other if\n" |
15222 | "/// one or both values are NaN.\n" |
15223 | "///\n" |
15224 | "/// \\headerfile <x86intrin.h>\n" |
15225 | "///\n" |
15226 | "/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>\n" |
15227 | "/// instruction.\n" |
15228 | "///\n" |
15229 | "/// \\param __a\n" |
15230 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15231 | "/// compared to the lower double-precision value of \\a __b.\n" |
15232 | "/// \\param __b\n" |
15233 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15234 | "/// compared to the lower double-precision value of \\a __a.\n" |
15235 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15236 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15237 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15238 | "_mm_cmpunord_sd(__m128d __a, __m128d __b)\n" |
15239 | "{\n" |
15240 | " return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);\n" |
15241 | "}\n" |
15242 | "\n" |
15243 | "/// Compares the lower double-precision floating-point values in each of\n" |
15244 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15245 | "/// the value in the first parameter is unequal to the corresponding value in\n" |
15246 | "/// the second parameter.\n" |
15247 | "///\n" |
15248 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15249 | "///\n" |
15250 | "/// \\headerfile <x86intrin.h>\n" |
15251 | "///\n" |
15252 | "/// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.\n" |
15253 | "///\n" |
15254 | "/// \\param __a\n" |
15255 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15256 | "/// compared to the lower double-precision value of \\a __b.\n" |
15257 | "/// \\param __b\n" |
15258 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15259 | "/// compared to the lower double-precision value of \\a __a.\n" |
15260 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15261 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15262 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15263 | "_mm_cmpneq_sd(__m128d __a, __m128d __b)\n" |
15264 | "{\n" |
15265 | " return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);\n" |
15266 | "}\n" |
15267 | "\n" |
15268 | "/// Compares the lower double-precision floating-point values in each of\n" |
15269 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15270 | "/// the value in the first parameter is not less than the corresponding\n" |
15271 | "/// value in the second parameter.\n" |
15272 | "///\n" |
15273 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15274 | "///\n" |
15275 | "/// \\headerfile <x86intrin.h>\n" |
15276 | "///\n" |
15277 | "/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n" |
15278 | "///\n" |
15279 | "/// \\param __a\n" |
15280 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15281 | "/// compared to the lower double-precision value of \\a __b.\n" |
15282 | "/// \\param __b\n" |
15283 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15284 | "/// compared to the lower double-precision value of \\a __a.\n" |
15285 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15286 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15287 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15288 | "_mm_cmpnlt_sd(__m128d __a, __m128d __b)\n" |
15289 | "{\n" |
15290 | " return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);\n" |
15291 | "}\n" |
15292 | "\n" |
15293 | "/// Compares the lower double-precision floating-point values in each of\n" |
15294 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15295 | "/// the value in the first parameter is not less than or equal to the\n" |
15296 | "/// corresponding value in the second parameter.\n" |
15297 | "///\n" |
15298 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15299 | "///\n" |
15300 | "/// \\headerfile <x86intrin.h>\n" |
15301 | "///\n" |
15302 | "/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n" |
15303 | "///\n" |
15304 | "/// \\param __a\n" |
15305 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15306 | "/// compared to the lower double-precision value of \\a __b.\n" |
15307 | "/// \\param __b\n" |
15308 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15309 | "/// compared to the lower double-precision value of \\a __a.\n" |
15310 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15311 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15312 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15313 | "_mm_cmpnle_sd(__m128d __a, __m128d __b)\n" |
15314 | "{\n" |
15315 | " return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);\n" |
15316 | "}\n" |
15317 | "\n" |
15318 | "/// Compares the lower double-precision floating-point values in each of\n" |
15319 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15320 | "/// the value in the first parameter is not greater than the corresponding\n" |
15321 | "/// value in the second parameter.\n" |
15322 | "///\n" |
15323 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15324 | "///\n" |
15325 | "/// \\headerfile <x86intrin.h>\n" |
15326 | "///\n" |
15327 | "/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n" |
15328 | "///\n" |
15329 | "/// \\param __a\n" |
15330 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15331 | "/// compared to the lower double-precision value of \\a __b.\n" |
15332 | "/// \\param __b\n" |
15333 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15334 | "/// compared to the lower double-precision value of \\a __a.\n" |
15335 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15336 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15337 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15338 | "_mm_cmpngt_sd(__m128d __a, __m128d __b)\n" |
15339 | "{\n" |
15340 | " __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);\n" |
15341 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
15342 | "}\n" |
15343 | "\n" |
15344 | "/// Compares the lower double-precision floating-point values in each of\n" |
15345 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15346 | "/// the value in the first parameter is not greater than or equal to the\n" |
15347 | "/// corresponding value in the second parameter.\n" |
15348 | "///\n" |
15349 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
15350 | "///\n" |
15351 | "/// \\headerfile <x86intrin.h>\n" |
15352 | "///\n" |
15353 | "/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n" |
15354 | "///\n" |
15355 | "/// \\param __a\n" |
15356 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15357 | "/// compared to the lower double-precision value of \\a __b.\n" |
15358 | "/// \\param __b\n" |
15359 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15360 | "/// compared to the lower double-precision value of \\a __a.\n" |
15361 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
15362 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
15363 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15364 | "_mm_cmpnge_sd(__m128d __a, __m128d __b)\n" |
15365 | "{\n" |
15366 | " __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);\n" |
15367 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
15368 | "}\n" |
15369 | "\n" |
15370 | "/// Compares the lower double-precision floating-point values in each of\n" |
15371 | "/// the two 128-bit floating-point vectors of [2 x double] for equality.\n" |
15372 | "///\n" |
15373 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
15374 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15375 | "///\n" |
15376 | "/// \\headerfile <x86intrin.h>\n" |
15377 | "///\n" |
15378 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
15379 | "///\n" |
15380 | "/// \\param __a\n" |
15381 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15382 | "/// compared to the lower double-precision value of \\a __b.\n" |
15383 | "/// \\param __b\n" |
15384 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15385 | "/// compared to the lower double-precision value of \\a __a.\n" |
15386 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15387 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15388 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15389 | "_mm_comieq_sd(__m128d __a, __m128d __b)\n" |
15390 | "{\n" |
15391 | " return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);\n" |
15392 | "}\n" |
15393 | "\n" |
15394 | "/// Compares the lower double-precision floating-point values in each of\n" |
15395 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15396 | "/// the value in the first parameter is less than the corresponding value in\n" |
15397 | "/// the second parameter.\n" |
15398 | "///\n" |
15399 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
15400 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15401 | "///\n" |
15402 | "/// \\headerfile <x86intrin.h>\n" |
15403 | "///\n" |
15404 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
15405 | "///\n" |
15406 | "/// \\param __a\n" |
15407 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15408 | "/// compared to the lower double-precision value of \\a __b.\n" |
15409 | "/// \\param __b\n" |
15410 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15411 | "/// compared to the lower double-precision value of \\a __a.\n" |
15412 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15413 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15414 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15415 | "_mm_comilt_sd(__m128d __a, __m128d __b)\n" |
15416 | "{\n" |
15417 | " return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);\n" |
15418 | "}\n" |
15419 | "\n" |
15420 | "/// Compares the lower double-precision floating-point values in each of\n" |
15421 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15422 | "/// the value in the first parameter is less than or equal to the\n" |
15423 | "/// corresponding value in the second parameter.\n" |
15424 | "///\n" |
15425 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
15426 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15427 | "///\n" |
15428 | "/// \\headerfile <x86intrin.h>\n" |
15429 | "///\n" |
15430 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
15431 | "///\n" |
15432 | "/// \\param __a\n" |
15433 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15434 | "/// compared to the lower double-precision value of \\a __b.\n" |
15435 | "/// \\param __b\n" |
15436 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15437 | "/// compared to the lower double-precision value of \\a __a.\n" |
15438 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15439 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15440 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15441 | "_mm_comile_sd(__m128d __a, __m128d __b)\n" |
15442 | "{\n" |
15443 | " return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);\n" |
15444 | "}\n" |
15445 | "\n" |
15446 | "/// Compares the lower double-precision floating-point values in each of\n" |
15447 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15448 | "/// the value in the first parameter is greater than the corresponding value\n" |
15449 | "/// in the second parameter.\n" |
15450 | "///\n" |
15451 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
15452 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15453 | "///\n" |
15454 | "/// \\headerfile <x86intrin.h>\n" |
15455 | "///\n" |
15456 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
15457 | "///\n" |
15458 | "/// \\param __a\n" |
15459 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15460 | "/// compared to the lower double-precision value of \\a __b.\n" |
15461 | "/// \\param __b\n" |
15462 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15463 | "/// compared to the lower double-precision value of \\a __a.\n" |
15464 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15465 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15466 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15467 | "_mm_comigt_sd(__m128d __a, __m128d __b)\n" |
15468 | "{\n" |
15469 | " return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);\n" |
15470 | "}\n" |
15471 | "\n" |
15472 | "/// Compares the lower double-precision floating-point values in each of\n" |
15473 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15474 | "/// the value in the first parameter is greater than or equal to the\n" |
15475 | "/// corresponding value in the second parameter.\n" |
15476 | "///\n" |
15477 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
15478 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15479 | "///\n" |
15480 | "/// \\headerfile <x86intrin.h>\n" |
15481 | "///\n" |
15482 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
15483 | "///\n" |
15484 | "/// \\param __a\n" |
15485 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15486 | "/// compared to the lower double-precision value of \\a __b.\n" |
15487 | "/// \\param __b\n" |
15488 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15489 | "/// compared to the lower double-precision value of \\a __a.\n" |
15490 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15491 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15492 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15493 | "_mm_comige_sd(__m128d __a, __m128d __b)\n" |
15494 | "{\n" |
15495 | " return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);\n" |
15496 | "}\n" |
15497 | "\n" |
15498 | "/// Compares the lower double-precision floating-point values in each of\n" |
15499 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15500 | "/// the value in the first parameter is unequal to the corresponding value in\n" |
15501 | "/// the second parameter.\n" |
15502 | "///\n" |
15503 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
15504 | "/// lower double-precision values is NaN, 1 is returned.\n" |
15505 | "///\n" |
15506 | "/// \\headerfile <x86intrin.h>\n" |
15507 | "///\n" |
15508 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
15509 | "///\n" |
15510 | "/// \\param __a\n" |
15511 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15512 | "/// compared to the lower double-precision value of \\a __b.\n" |
15513 | "/// \\param __b\n" |
15514 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15515 | "/// compared to the lower double-precision value of \\a __a.\n" |
15516 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15517 | "/// lower double-precision values is NaN, 1 is returned.\n" |
15518 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15519 | "_mm_comineq_sd(__m128d __a, __m128d __b)\n" |
15520 | "{\n" |
15521 | " return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);\n" |
15522 | "}\n" |
15523 | "\n" |
15524 | "/// Compares the lower double-precision floating-point values in each of\n" |
15525 | "/// the two 128-bit floating-point vectors of [2 x double] for equality. The\n" |
15526 | "/// comparison yields 0 for false, 1 for true.\n" |
15527 | "///\n" |
15528 | "/// If either of the two lower double-precision values is NaN, 0 is returned.\n" |
15529 | "///\n" |
15530 | "/// \\headerfile <x86intrin.h>\n" |
15531 | "///\n" |
15532 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
15533 | "///\n" |
15534 | "/// \\param __a\n" |
15535 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15536 | "/// compared to the lower double-precision value of \\a __b.\n" |
15537 | "/// \\param __b\n" |
15538 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15539 | "/// compared to the lower double-precision value of \\a __a.\n" |
15540 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15541 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15542 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15543 | "_mm_ucomieq_sd(__m128d __a, __m128d __b)\n" |
15544 | "{\n" |
15545 | " return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);\n" |
15546 | "}\n" |
15547 | "\n" |
15548 | "/// Compares the lower double-precision floating-point values in each of\n" |
15549 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15550 | "/// the value in the first parameter is less than the corresponding value in\n" |
15551 | "/// the second parameter.\n" |
15552 | "///\n" |
15553 | "/// The comparison yields 0 for false, 1 for true. If either of the two lower\n" |
15554 | "/// double-precision values is NaN, 0 is returned.\n" |
15555 | "///\n" |
15556 | "/// \\headerfile <x86intrin.h>\n" |
15557 | "///\n" |
15558 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
15559 | "///\n" |
15560 | "/// \\param __a\n" |
15561 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15562 | "/// compared to the lower double-precision value of \\a __b.\n" |
15563 | "/// \\param __b\n" |
15564 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15565 | "/// compared to the lower double-precision value of \\a __a.\n" |
15566 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15567 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15568 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15569 | "_mm_ucomilt_sd(__m128d __a, __m128d __b)\n" |
15570 | "{\n" |
15571 | " return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);\n" |
15572 | "}\n" |
15573 | "\n" |
15574 | "/// Compares the lower double-precision floating-point values in each of\n" |
15575 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15576 | "/// the value in the first parameter is less than or equal to the\n" |
15577 | "/// corresponding value in the second parameter.\n" |
15578 | "///\n" |
15579 | "/// The comparison yields 0 for false, 1 for true. If either of the two lower\n" |
15580 | "/// double-precision values is NaN, 0 is returned.\n" |
15581 | "///\n" |
15582 | "/// \\headerfile <x86intrin.h>\n" |
15583 | "///\n" |
15584 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
15585 | "///\n" |
15586 | "/// \\param __a\n" |
15587 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15588 | "/// compared to the lower double-precision value of \\a __b.\n" |
15589 | "/// \\param __b\n" |
15590 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15591 | "/// compared to the lower double-precision value of \\a __a.\n" |
15592 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15593 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15594 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15595 | "_mm_ucomile_sd(__m128d __a, __m128d __b)\n" |
15596 | "{\n" |
15597 | " return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);\n" |
15598 | "}\n" |
15599 | "\n" |
15600 | "/// Compares the lower double-precision floating-point values in each of\n" |
15601 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15602 | "/// the value in the first parameter is greater than the corresponding value\n" |
15603 | "/// in the second parameter.\n" |
15604 | "///\n" |
15605 | "/// The comparison yields 0 for false, 1 for true. If either of the two lower\n" |
15606 | "/// double-precision values is NaN, 0 is returned.\n" |
15607 | "///\n" |
15608 | "/// \\headerfile <x86intrin.h>\n" |
15609 | "///\n" |
15610 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
15611 | "///\n" |
15612 | "/// \\param __a\n" |
15613 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15614 | "/// compared to the lower double-precision value of \\a __b.\n" |
15615 | "/// \\param __b\n" |
15616 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15617 | "/// compared to the lower double-precision value of \\a __a.\n" |
15618 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15619 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15620 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15621 | "_mm_ucomigt_sd(__m128d __a, __m128d __b)\n" |
15622 | "{\n" |
15623 | " return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);\n" |
15624 | "}\n" |
15625 | "\n" |
15626 | "/// Compares the lower double-precision floating-point values in each of\n" |
15627 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15628 | "/// the value in the first parameter is greater than or equal to the\n" |
15629 | "/// corresponding value in the second parameter.\n" |
15630 | "///\n" |
15631 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
15632 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15633 | "///\n" |
15634 | "/// \\headerfile <x86intrin.h>\n" |
15635 | "///\n" |
15636 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
15637 | "///\n" |
15638 | "/// \\param __a\n" |
15639 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15640 | "/// compared to the lower double-precision value of \\a __b.\n" |
15641 | "/// \\param __b\n" |
15642 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15643 | "/// compared to the lower double-precision value of \\a __a.\n" |
15644 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
15645 | "/// lower double-precision values is NaN, 0 is returned.\n" |
15646 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15647 | "_mm_ucomige_sd(__m128d __a, __m128d __b)\n" |
15648 | "{\n" |
15649 | " return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);\n" |
15650 | "}\n" |
15651 | "\n" |
15652 | "/// Compares the lower double-precision floating-point values in each of\n" |
15653 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
15654 | "/// the value in the first parameter is unequal to the corresponding value in\n" |
15655 | "/// the second parameter.\n" |
15656 | "///\n" |
15657 | "/// The comparison yields 0 for false, 1 for true. If either of the two lower\n" |
15658 | "/// double-precision values is NaN, 1 is returned.\n" |
15659 | "///\n" |
15660 | "/// \\headerfile <x86intrin.h>\n" |
15661 | "///\n" |
15662 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
15663 | "///\n" |
15664 | "/// \\param __a\n" |
15665 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15666 | "/// compared to the lower double-precision value of \\a __b.\n" |
15667 | "/// \\param __b\n" |
15668 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
15669 | "/// compared to the lower double-precision value of \\a __a.\n" |
15670 | "/// \\returns An integer containing the comparison result. If either of the two\n" |
15671 | "/// lower double-precision values is NaN, 1 is returned.\n" |
15672 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15673 | "_mm_ucomineq_sd(__m128d __a, __m128d __b)\n" |
15674 | "{\n" |
15675 | " return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);\n" |
15676 | "}\n" |
15677 | "\n" |
15678 | "/// Converts the two double-precision floating-point elements of a\n" |
15679 | "/// 128-bit vector of [2 x double] into two single-precision floating-point\n" |
15680 | "/// values, returned in the lower 64 bits of a 128-bit vector of [4 x float].\n" |
15681 | "/// The upper 64 bits of the result vector are set to zero.\n" |
15682 | "///\n" |
15683 | "/// \\headerfile <x86intrin.h>\n" |
15684 | "///\n" |
15685 | "/// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.\n" |
15686 | "///\n" |
15687 | "/// \\param __a\n" |
15688 | "/// A 128-bit vector of [2 x double].\n" |
15689 | "/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n" |
15690 | "/// converted values. The upper 64 bits are set to zero.\n" |
15691 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
15692 | "_mm_cvtpd_ps(__m128d __a)\n" |
15693 | "{\n" |
15694 | " return __builtin_ia32_cvtpd2ps((__v2df)__a);\n" |
15695 | "}\n" |
15696 | "\n" |
15697 | "/// Converts the lower two single-precision floating-point elements of a\n" |
15698 | "/// 128-bit vector of [4 x float] into two double-precision floating-point\n" |
15699 | "/// values, returned in a 128-bit vector of [2 x double]. The upper two\n" |
15700 | "/// elements of the input vector are unused.\n" |
15701 | "///\n" |
15702 | "/// \\headerfile <x86intrin.h>\n" |
15703 | "///\n" |
15704 | "/// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.\n" |
15705 | "///\n" |
15706 | "/// \\param __a\n" |
15707 | "/// A 128-bit vector of [4 x float]. The lower two single-precision\n" |
15708 | "/// floating-point elements are converted to double-precision values. The\n" |
15709 | "/// upper two elements are unused.\n" |
15710 | "/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n" |
15711 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15712 | "_mm_cvtps_pd(__m128 __a)\n" |
15713 | "{\n" |
15714 | " return (__m128d) __builtin_convertvector(\n" |
15715 | " __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);\n" |
15716 | "}\n" |
15717 | "\n" |
15718 | "/// Converts the lower two integer elements of a 128-bit vector of\n" |
15719 | "/// [4 x i32] into two double-precision floating-point values, returned in a\n" |
15720 | "/// 128-bit vector of [2 x double].\n" |
15721 | "///\n" |
15722 | "/// The upper two elements of the input vector are unused.\n" |
15723 | "///\n" |
15724 | "/// \\headerfile <x86intrin.h>\n" |
15725 | "///\n" |
15726 | "/// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.\n" |
15727 | "///\n" |
15728 | "/// \\param __a\n" |
15729 | "/// A 128-bit integer vector of [4 x i32]. The lower two integer elements are\n" |
15730 | "/// converted to double-precision values.\n" |
15731 | "///\n" |
15732 | "/// The upper two elements are unused.\n" |
15733 | "/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n" |
15734 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15735 | "_mm_cvtepi32_pd(__m128i __a)\n" |
15736 | "{\n" |
15737 | " return (__m128d) __builtin_convertvector(\n" |
15738 | " __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);\n" |
15739 | "}\n" |
15740 | "\n" |
15741 | "/// Converts the two double-precision floating-point elements of a\n" |
15742 | "/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n" |
15743 | "/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper\n" |
15744 | "/// 64 bits of the result vector are set to zero.\n" |
15745 | "///\n" |
15746 | "/// \\headerfile <x86intrin.h>\n" |
15747 | "///\n" |
15748 | "/// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.\n" |
15749 | "///\n" |
15750 | "/// \\param __a\n" |
15751 | "/// A 128-bit vector of [2 x double].\n" |
15752 | "/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n" |
15753 | "/// converted values. The upper 64 bits are set to zero.\n" |
15754 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
15755 | "_mm_cvtpd_epi32(__m128d __a)\n" |
15756 | "{\n" |
15757 | " return __builtin_ia32_cvtpd2dq((__v2df)__a);\n" |
15758 | "}\n" |
15759 | "\n" |
15760 | "/// Converts the low-order element of a 128-bit vector of [2 x double]\n" |
15761 | "/// into a 32-bit signed integer value.\n" |
15762 | "///\n" |
15763 | "/// \\headerfile <x86intrin.h>\n" |
15764 | "///\n" |
15765 | "/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n" |
15766 | "///\n" |
15767 | "/// \\param __a\n" |
15768 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n" |
15769 | "/// conversion.\n" |
15770 | "/// \\returns A 32-bit signed integer containing the converted value.\n" |
15771 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15772 | "_mm_cvtsd_si32(__m128d __a)\n" |
15773 | "{\n" |
15774 | " return __builtin_ia32_cvtsd2si((__v2df)__a);\n" |
15775 | "}\n" |
15776 | "\n" |
15777 | "/// Converts the lower double-precision floating-point element of a\n" |
15778 | "/// 128-bit vector of [2 x double], in the second parameter, into a\n" |
15779 | "/// single-precision floating-point value, returned in the lower 32 bits of a\n" |
15780 | "/// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are\n" |
15781 | "/// copied from the upper 96 bits of the first parameter.\n" |
15782 | "///\n" |
15783 | "/// \\headerfile <x86intrin.h>\n" |
15784 | "///\n" |
15785 | "/// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.\n" |
15786 | "///\n" |
15787 | "/// \\param __a\n" |
15788 | "/// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are\n" |
15789 | "/// copied to the upper 96 bits of the result.\n" |
15790 | "/// \\param __b\n" |
15791 | "/// A 128-bit vector of [2 x double]. The lower double-precision\n" |
15792 | "/// floating-point element is used in the conversion.\n" |
15793 | "/// \\returns A 128-bit vector of [4 x float]. The lower 32 bits contain the\n" |
15794 | "/// converted value from the second parameter. The upper 96 bits are copied\n" |
15795 | "/// from the upper 96 bits of the first parameter.\n" |
15796 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
15797 | "_mm_cvtsd_ss(__m128 __a, __m128d __b)\n" |
15798 | "{\n" |
15799 | " return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);\n" |
15800 | "}\n" |
15801 | "\n" |
15802 | "/// Converts a 32-bit signed integer value, in the second parameter, into\n" |
15803 | "/// a double-precision floating-point value, returned in the lower 64 bits of\n" |
15804 | "/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n" |
15805 | "/// are copied from the upper 64 bits of the first parameter.\n" |
15806 | "///\n" |
15807 | "/// \\headerfile <x86intrin.h>\n" |
15808 | "///\n" |
15809 | "/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n" |
15810 | "///\n" |
15811 | "/// \\param __a\n" |
15812 | "/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n" |
15813 | "/// copied to the upper 64 bits of the result.\n" |
15814 | "/// \\param __b\n" |
15815 | "/// A 32-bit signed integer containing the value to be converted.\n" |
15816 | "/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n" |
15817 | "/// converted value from the second parameter. The upper 64 bits are copied\n" |
15818 | "/// from the upper 64 bits of the first parameter.\n" |
15819 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15820 | "_mm_cvtsi32_sd(__m128d __a, int __b)\n" |
15821 | "{\n" |
15822 | " __a[0] = __b;\n" |
15823 | " return __a;\n" |
15824 | "}\n" |
15825 | "\n" |
15826 | "/// Converts the lower single-precision floating-point element of a\n" |
15827 | "/// 128-bit vector of [4 x float], in the second parameter, into a\n" |
15828 | "/// double-precision floating-point value, returned in the lower 64 bits of\n" |
15829 | "/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n" |
15830 | "/// are copied from the upper 64 bits of the first parameter.\n" |
15831 | "///\n" |
15832 | "/// \\headerfile <x86intrin.h>\n" |
15833 | "///\n" |
15834 | "/// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.\n" |
15835 | "///\n" |
15836 | "/// \\param __a\n" |
15837 | "/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n" |
15838 | "/// copied to the upper 64 bits of the result.\n" |
15839 | "/// \\param __b\n" |
15840 | "/// A 128-bit vector of [4 x float]. The lower single-precision\n" |
15841 | "/// floating-point element is used in the conversion.\n" |
15842 | "/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n" |
15843 | "/// converted value from the second parameter. The upper 64 bits are copied\n" |
15844 | "/// from the upper 64 bits of the first parameter.\n" |
15845 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15846 | "_mm_cvtss_sd(__m128d __a, __m128 __b)\n" |
15847 | "{\n" |
15848 | " __a[0] = __b[0];\n" |
15849 | " return __a;\n" |
15850 | "}\n" |
15851 | "\n" |
15852 | "/// Converts the two double-precision floating-point elements of a\n" |
15853 | "/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n" |
15854 | "/// returned in the lower 64 bits of a 128-bit vector of [4 x i32].\n" |
15855 | "///\n" |
15856 | "/// If the result of either conversion is inexact, the result is truncated\n" |
15857 | "/// (rounded towards zero) regardless of the current MXCSR setting. The upper\n" |
15858 | "/// 64 bits of the result vector are set to zero.\n" |
15859 | "///\n" |
15860 | "/// \\headerfile <x86intrin.h>\n" |
15861 | "///\n" |
15862 | "/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>\n" |
15863 | "/// instruction.\n" |
15864 | "///\n" |
15865 | "/// \\param __a\n" |
15866 | "/// A 128-bit vector of [2 x double].\n" |
15867 | "/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n" |
15868 | "/// converted values. The upper 64 bits are set to zero.\n" |
15869 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
15870 | "_mm_cvttpd_epi32(__m128d __a)\n" |
15871 | "{\n" |
15872 | " return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);\n" |
15873 | "}\n" |
15874 | "\n" |
15875 | "/// Converts the low-order element of a [2 x double] vector into a 32-bit\n" |
15876 | "/// signed integer value, truncating the result when it is inexact.\n" |
15877 | "///\n" |
15878 | "/// \\headerfile <x86intrin.h>\n" |
15879 | "///\n" |
15880 | "/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n" |
15881 | "/// instruction.\n" |
15882 | "///\n" |
15883 | "/// \\param __a\n" |
15884 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n" |
15885 | "/// conversion.\n" |
15886 | "/// \\returns A 32-bit signed integer containing the converted value.\n" |
15887 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
15888 | "_mm_cvttsd_si32(__m128d __a)\n" |
15889 | "{\n" |
15890 | " return __builtin_ia32_cvttsd2si((__v2df)__a);\n" |
15891 | "}\n" |
15892 | "\n" |
15893 | "/// Converts the two double-precision floating-point elements of a\n" |
15894 | "/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n" |
15895 | "/// returned in a 64-bit vector of [2 x i32].\n" |
15896 | "///\n" |
15897 | "/// \\headerfile <x86intrin.h>\n" |
15898 | "///\n" |
15899 | "/// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.\n" |
15900 | "///\n" |
15901 | "/// \\param __a\n" |
15902 | "/// A 128-bit vector of [2 x double].\n" |
15903 | "/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n" |
15904 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
15905 | "_mm_cvtpd_pi32(__m128d __a)\n" |
15906 | "{\n" |
15907 | " return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);\n" |
15908 | "}\n" |
15909 | "\n" |
15910 | "/// Converts the two double-precision floating-point elements of a\n" |
15911 | "/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n" |
15912 | "/// returned in a 64-bit vector of [2 x i32].\n" |
15913 | "///\n" |
15914 | "/// If the result of either conversion is inexact, the result is truncated\n" |
15915 | "/// (rounded towards zero) regardless of the current MXCSR setting.\n" |
15916 | "///\n" |
15917 | "/// \\headerfile <x86intrin.h>\n" |
15918 | "///\n" |
15919 | "/// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.\n" |
15920 | "///\n" |
15921 | "/// \\param __a\n" |
15922 | "/// A 128-bit vector of [2 x double].\n" |
15923 | "/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n" |
15924 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
15925 | "_mm_cvttpd_pi32(__m128d __a)\n" |
15926 | "{\n" |
15927 | " return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);\n" |
15928 | "}\n" |
15929 | "\n" |
15930 | "/// Converts the two signed 32-bit integer elements of a 64-bit vector of\n" |
15931 | "/// [2 x i32] into two double-precision floating-point values, returned in a\n" |
15932 | "/// 128-bit vector of [2 x double].\n" |
15933 | "///\n" |
15934 | "/// \\headerfile <x86intrin.h>\n" |
15935 | "///\n" |
15936 | "/// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.\n" |
15937 | "///\n" |
15938 | "/// \\param __a\n" |
15939 | "/// A 64-bit vector of [2 x i32].\n" |
15940 | "/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n" |
15941 | "static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX\n" |
15942 | "_mm_cvtpi32_pd(__m64 __a)\n" |
15943 | "{\n" |
15944 | " return __builtin_ia32_cvtpi2pd((__v2si)__a);\n" |
15945 | "}\n" |
15946 | "\n" |
15947 | "/// Returns the low-order element of a 128-bit vector of [2 x double] as\n" |
15948 | "/// a double-precision floating-point value.\n" |
15949 | "///\n" |
15950 | "/// \\headerfile <x86intrin.h>\n" |
15951 | "///\n" |
15952 | "/// This intrinsic has no corresponding instruction.\n" |
15953 | "///\n" |
15954 | "/// \\param __a\n" |
15955 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are returned.\n" |
15956 | "/// \\returns A double-precision floating-point value copied from the lower 64\n" |
15957 | "/// bits of \\a __a.\n" |
15958 | "static __inline__ double __DEFAULT_FN_ATTRS\n" |
15959 | "_mm_cvtsd_f64(__m128d __a)\n" |
15960 | "{\n" |
15961 | " return __a[0];\n" |
15962 | "}\n" |
15963 | "\n" |
15964 | "/// Loads a 128-bit floating-point vector of [2 x double] from an aligned\n" |
15965 | "/// memory location.\n" |
15966 | "///\n" |
15967 | "/// \\headerfile <x86intrin.h>\n" |
15968 | "///\n" |
15969 | "/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.\n" |
15970 | "///\n" |
15971 | "/// \\param __dp\n" |
15972 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
15973 | "/// location has to be 16-byte aligned.\n" |
15974 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n" |
15975 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15976 | "_mm_load_pd(double const *__dp)\n" |
15977 | "{\n" |
15978 | " return *(__m128d*)__dp;\n" |
15979 | "}\n" |
15980 | "\n" |
15981 | "/// Loads a double-precision floating-point value from a specified memory\n" |
15982 | "/// location and duplicates it to both vector elements of a 128-bit vector of\n" |
15983 | "/// [2 x double].\n" |
15984 | "///\n" |
15985 | "/// \\headerfile <x86intrin.h>\n" |
15986 | "///\n" |
15987 | "/// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.\n" |
15988 | "///\n" |
15989 | "/// \\param __dp\n" |
15990 | "/// A pointer to a memory location containing a double-precision value.\n" |
15991 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded and\n" |
15992 | "/// duplicated values.\n" |
15993 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
15994 | "_mm_load1_pd(double const *__dp)\n" |
15995 | "{\n" |
15996 | " struct __mm_load1_pd_struct {\n" |
15997 | " double __u;\n" |
15998 | " } __attribute__((__packed__, __may_alias__));\n" |
15999 | " double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;\n" |
16000 | " return __extension__ (__m128d){ __u, __u };\n" |
16001 | "}\n" |
16002 | "\n" |
16003 | "#define _mm_load_pd1(dp) _mm_load1_pd(dp)\n" |
16004 | "\n" |
16005 | "/// Loads two double-precision values, in reverse order, from an aligned\n" |
16006 | "/// memory location into a 128-bit vector of [2 x double].\n" |
16007 | "///\n" |
16008 | "/// \\headerfile <x86intrin.h>\n" |
16009 | "///\n" |
16010 | "/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +\n" |
16011 | "/// needed shuffling instructions. In AVX mode, the shuffling may be combined\n" |
16012 | "/// with the \\c VMOVAPD, resulting in only a \\c VPERMILPD instruction.\n" |
16013 | "///\n" |
16014 | "/// \\param __dp\n" |
16015 | "/// A 16-byte aligned pointer to an array of double-precision values to be\n" |
16016 | "/// loaded in reverse order.\n" |
16017 | "/// \\returns A 128-bit vector of [2 x double] containing the reversed loaded\n" |
16018 | "/// values.\n" |
16019 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16020 | "_mm_loadr_pd(double const *__dp)\n" |
16021 | "{\n" |
16022 | " __m128d __u = *(__m128d*)__dp;\n" |
16023 | " return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);\n" |
16024 | "}\n" |
16025 | "\n" |
16026 | "/// Loads a 128-bit floating-point vector of [2 x double] from an\n" |
16027 | "/// unaligned memory location.\n" |
16028 | "///\n" |
16029 | "/// \\headerfile <x86intrin.h>\n" |
16030 | "///\n" |
16031 | "/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n" |
16032 | "///\n" |
16033 | "/// \\param __dp\n" |
16034 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
16035 | "/// location does not have to be aligned.\n" |
16036 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n" |
16037 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16038 | "_mm_loadu_pd(double const *__dp)\n" |
16039 | "{\n" |
16040 | " struct __loadu_pd {\n" |
16041 | " __m128d __v;\n" |
16042 | " } __attribute__((__packed__, __may_alias__));\n" |
16043 | " return ((struct __loadu_pd*)__dp)->__v;\n" |
16044 | "}\n" |
16045 | "\n" |
16046 | "/// Loads a 64-bit integer value to the low element of a 128-bit integer\n" |
16047 | "/// vector and clears the upper element.\n" |
16048 | "///\n" |
16049 | "/// \\headerfile <x86intrin.h>\n" |
16050 | "///\n" |
16051 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
16052 | "///\n" |
16053 | "/// \\param __a\n" |
16054 | "/// A pointer to a 64-bit memory location. The address of the memory\n" |
16055 | "/// location does not have to be aligned.\n" |
16056 | "/// \\returns A 128-bit vector of [2 x i64] containing the loaded value.\n" |
16057 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16058 | "_mm_loadu_si64(void const *__a)\n" |
16059 | "{\n" |
16060 | " struct __loadu_si64 {\n" |
16061 | " long long __v;\n" |
16062 | " } __attribute__((__packed__, __may_alias__));\n" |
16063 | " long long __u = ((struct __loadu_si64*)__a)->__v;\n" |
16064 | " return __extension__ (__m128i)(__v2di){__u, 0LL};\n" |
16065 | "}\n" |
16066 | "\n" |
16067 | "/// Loads a 32-bit integer value to the low element of a 128-bit integer\n" |
16068 | "/// vector and clears the upper element.\n" |
16069 | "///\n" |
16070 | "/// \\headerfile <x86intrin.h>\n" |
16071 | "///\n" |
16072 | "/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n" |
16073 | "///\n" |
16074 | "/// \\param __a\n" |
16075 | "/// A pointer to a 32-bit memory location. The address of the memory\n" |
16076 | "/// location does not have to be aligned.\n" |
16077 | "/// \\returns A 128-bit vector of [4 x i32] containing the loaded value.\n" |
16078 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16079 | "_mm_loadu_si32(void const *__a)\n" |
16080 | "{\n" |
16081 | " struct __loadu_si32 {\n" |
16082 | " int __v;\n" |
16083 | " } __attribute__((__packed__, __may_alias__));\n" |
16084 | " int __u = ((struct __loadu_si32*)__a)->__v;\n" |
16085 | " return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};\n" |
16086 | "}\n" |
16087 | "\n" |
16088 | "/// Loads a 16-bit integer value to the low element of a 128-bit integer\n" |
16089 | "/// vector and clears the upper element.\n" |
16090 | "///\n" |
16091 | "/// \\headerfile <x86intrin.h>\n" |
16092 | "///\n" |
16093 | "/// This intrinsic does not correspond to a specific instruction.\n" |
16094 | "///\n" |
16095 | "/// \\param __a\n" |
16096 | "/// A pointer to a 16-bit memory location. The address of the memory\n" |
16097 | "/// location does not have to be aligned.\n" |
16098 | "/// \\returns A 128-bit vector of [8 x i16] containing the loaded value.\n" |
16099 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16100 | "_mm_loadu_si16(void const *__a)\n" |
16101 | "{\n" |
16102 | " struct __loadu_si16 {\n" |
16103 | " short __v;\n" |
16104 | " } __attribute__((__packed__, __may_alias__));\n" |
16105 | " short __u = ((struct __loadu_si16*)__a)->__v;\n" |
16106 | " return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};\n" |
16107 | "}\n" |
16108 | "\n" |
16109 | "/// Loads a 64-bit double-precision value to the low element of a\n" |
16110 | "/// 128-bit integer vector and clears the upper element.\n" |
16111 | "///\n" |
16112 | "/// \\headerfile <x86intrin.h>\n" |
16113 | "///\n" |
16114 | "/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n" |
16115 | "///\n" |
16116 | "/// \\param __dp\n" |
16117 | "/// A pointer to a memory location containing a double-precision value.\n" |
16118 | "/// The address of the memory location does not have to be aligned.\n" |
16119 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded value.\n" |
16120 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16121 | "_mm_load_sd(double const *__dp)\n" |
16122 | "{\n" |
16123 | " struct __mm_load_sd_struct {\n" |
16124 | " double __u;\n" |
16125 | " } __attribute__((__packed__, __may_alias__));\n" |
16126 | " double __u = ((struct __mm_load_sd_struct*)__dp)->__u;\n" |
16127 | " return __extension__ (__m128d){ __u, 0 };\n" |
16128 | "}\n" |
16129 | "\n" |
16130 | "/// Loads a double-precision value into the high-order bits of a 128-bit\n" |
16131 | "/// vector of [2 x double]. The low-order bits are copied from the low-order\n" |
16132 | "/// bits of the first operand.\n" |
16133 | "///\n" |
16134 | "/// \\headerfile <x86intrin.h>\n" |
16135 | "///\n" |
16136 | "/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n" |
16137 | "///\n" |
16138 | "/// \\param __a\n" |
16139 | "/// A 128-bit vector of [2 x double]. \\n\n" |
16140 | "/// Bits [63:0] are written to bits [63:0] of the result.\n" |
16141 | "/// \\param __dp\n" |
16142 | "/// A pointer to a 64-bit memory location containing a double-precision\n" |
16143 | "/// floating-point value that is loaded. The loaded value is written to bits\n" |
16144 | "/// [127:64] of the result. The address of the memory location does not have\n" |
16145 | "/// to be aligned.\n" |
16146 | "/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n" |
16147 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16148 | "_mm_loadh_pd(__m128d __a, double const *__dp)\n" |
16149 | "{\n" |
16150 | " struct __mm_loadh_pd_struct {\n" |
16151 | " double __u;\n" |
16152 | " } __attribute__((__packed__, __may_alias__));\n" |
16153 | " double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;\n" |
16154 | " return __extension__ (__m128d){ __a[0], __u };\n" |
16155 | "}\n" |
16156 | "\n" |
16157 | "/// Loads a double-precision value into the low-order bits of a 128-bit\n" |
16158 | "/// vector of [2 x double]. The high-order bits are copied from the\n" |
16159 | "/// high-order bits of the first operand.\n" |
16160 | "///\n" |
16161 | "/// \\headerfile <x86intrin.h>\n" |
16162 | "///\n" |
16163 | "/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n" |
16164 | "///\n" |
16165 | "/// \\param __a\n" |
16166 | "/// A 128-bit vector of [2 x double]. \\n\n" |
16167 | "/// Bits [127:64] are written to bits [127:64] of the result.\n" |
16168 | "/// \\param __dp\n" |
16169 | "/// A pointer to a 64-bit memory location containing a double-precision\n" |
16170 | "/// floating-point value that is loaded. The loaded value is written to bits\n" |
16171 | "/// [63:0] of the result. The address of the memory location does not have to\n" |
16172 | "/// be aligned.\n" |
16173 | "/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n" |
16174 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16175 | "_mm_loadl_pd(__m128d __a, double const *__dp)\n" |
16176 | "{\n" |
16177 | " struct __mm_loadl_pd_struct {\n" |
16178 | " double __u;\n" |
16179 | " } __attribute__((__packed__, __may_alias__));\n" |
16180 | " double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;\n" |
16181 | " return __extension__ (__m128d){ __u, __a[1] };\n" |
16182 | "}\n" |
16183 | "\n" |
16184 | "/// Constructs a 128-bit floating-point vector of [2 x double] with\n" |
16185 | "/// unspecified content. This could be used as an argument to another\n" |
16186 | "/// intrinsic function where the argument is required but the value is not\n" |
16187 | "/// actually used.\n" |
16188 | "///\n" |
16189 | "/// \\headerfile <x86intrin.h>\n" |
16190 | "///\n" |
16191 | "/// This intrinsic has no corresponding instruction.\n" |
16192 | "///\n" |
16193 | "/// \\returns A 128-bit floating-point vector of [2 x double] with unspecified\n" |
16194 | "/// content.\n" |
16195 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16196 | "_mm_undefined_pd(void)\n" |
16197 | "{\n" |
16198 | " return (__m128d)__builtin_ia32_undef128();\n" |
16199 | "}\n" |
16200 | "\n" |
16201 | "/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n" |
16202 | "/// 64 bits of the vector are initialized with the specified double-precision\n" |
16203 | "/// floating-point value. The upper 64 bits are set to zero.\n" |
16204 | "///\n" |
16205 | "/// \\headerfile <x86intrin.h>\n" |
16206 | "///\n" |
16207 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
16208 | "///\n" |
16209 | "/// \\param __w\n" |
16210 | "/// A double-precision floating-point value used to initialize the lower 64\n" |
16211 | "/// bits of the result.\n" |
16212 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double]. The\n" |
16213 | "/// lower 64 bits contain the value of the parameter. The upper 64 bits are\n" |
16214 | "/// set to zero.\n" |
16215 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16216 | "_mm_set_sd(double __w)\n" |
16217 | "{\n" |
16218 | " return __extension__ (__m128d){ __w, 0 };\n" |
16219 | "}\n" |
16220 | "\n" |
16221 | "/// Constructs a 128-bit floating-point vector of [2 x double], with each\n" |
16222 | "/// of the two double-precision floating-point vector elements set to the\n" |
16223 | "/// specified double-precision floating-point value.\n" |
16224 | "///\n" |
16225 | "/// \\headerfile <x86intrin.h>\n" |
16226 | "///\n" |
16227 | "/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n" |
16228 | "///\n" |
16229 | "/// \\param __w\n" |
16230 | "/// A double-precision floating-point value used to initialize each vector\n" |
16231 | "/// element of the result.\n" |
16232 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n" |
16233 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16234 | "_mm_set1_pd(double __w)\n" |
16235 | "{\n" |
16236 | " return __extension__ (__m128d){ __w, __w };\n" |
16237 | "}\n" |
16238 | "\n" |
16239 | "/// Constructs a 128-bit floating-point vector of [2 x double], with each\n" |
16240 | "/// of the two double-precision floating-point vector elements set to the\n" |
16241 | "/// specified double-precision floating-point value.\n" |
16242 | "///\n" |
16243 | "/// \\headerfile <x86intrin.h>\n" |
16244 | "///\n" |
16245 | "/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n" |
16246 | "///\n" |
16247 | "/// \\param __w\n" |
16248 | "/// A double-precision floating-point value used to initialize each vector\n" |
16249 | "/// element of the result.\n" |
16250 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n" |
16251 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16252 | "_mm_set_pd1(double __w)\n" |
16253 | "{\n" |
16254 | " return _mm_set1_pd(__w);\n" |
16255 | "}\n" |
16256 | "\n" |
16257 | "/// Constructs a 128-bit floating-point vector of [2 x double]\n" |
16258 | "/// initialized with the specified double-precision floating-point values.\n" |
16259 | "///\n" |
16260 | "/// \\headerfile <x86intrin.h>\n" |
16261 | "///\n" |
16262 | "/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n" |
16263 | "///\n" |
16264 | "/// \\param __w\n" |
16265 | "/// A double-precision floating-point value used to initialize the upper 64\n" |
16266 | "/// bits of the result.\n" |
16267 | "/// \\param __x\n" |
16268 | "/// A double-precision floating-point value used to initialize the lower 64\n" |
16269 | "/// bits of the result.\n" |
16270 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n" |
16271 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16272 | "_mm_set_pd(double __w, double __x)\n" |
16273 | "{\n" |
16274 | " return __extension__ (__m128d){ __x, __w };\n" |
16275 | "}\n" |
16276 | "\n" |
16277 | "/// Constructs a 128-bit floating-point vector of [2 x double],\n" |
16278 | "/// initialized in reverse order with the specified double-precision\n" |
16279 | "/// floating-point values.\n" |
16280 | "///\n" |
16281 | "/// \\headerfile <x86intrin.h>\n" |
16282 | "///\n" |
16283 | "/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n" |
16284 | "///\n" |
16285 | "/// \\param __w\n" |
16286 | "/// A double-precision floating-point value used to initialize the lower 64\n" |
16287 | "/// bits of the result.\n" |
16288 | "/// \\param __x\n" |
16289 | "/// A double-precision floating-point value used to initialize the upper 64\n" |
16290 | "/// bits of the result.\n" |
16291 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n" |
16292 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16293 | "_mm_setr_pd(double __w, double __x)\n" |
16294 | "{\n" |
16295 | " return __extension__ (__m128d){ __w, __x };\n" |
16296 | "}\n" |
16297 | "\n" |
16298 | "/// Constructs a 128-bit floating-point vector of [2 x double]\n" |
16299 | "/// initialized to zero.\n" |
16300 | "///\n" |
16301 | "/// \\headerfile <x86intrin.h>\n" |
16302 | "///\n" |
16303 | "/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n" |
16304 | "///\n" |
16305 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double] with\n" |
16306 | "/// all elements set to zero.\n" |
16307 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16308 | "_mm_setzero_pd(void)\n" |
16309 | "{\n" |
16310 | " return __extension__ (__m128d){ 0, 0 };\n" |
16311 | "}\n" |
16312 | "\n" |
16313 | "/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n" |
16314 | "/// 64 bits are set to the lower 64 bits of the second parameter. The upper\n" |
16315 | "/// 64 bits are set to the upper 64 bits of the first parameter.\n" |
16316 | "///\n" |
16317 | "/// \\headerfile <x86intrin.h>\n" |
16318 | "///\n" |
16319 | "/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n" |
16320 | "///\n" |
16321 | "/// \\param __a\n" |
16322 | "/// A 128-bit vector of [2 x double]. The upper 64 bits are written to the\n" |
16323 | "/// upper 64 bits of the result.\n" |
16324 | "/// \\param __b\n" |
16325 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are written to the\n" |
16326 | "/// lower 64 bits of the result.\n" |
16327 | "/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n" |
16328 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
16329 | "_mm_move_sd(__m128d __a, __m128d __b)\n" |
16330 | "{\n" |
16331 | " __a[0] = __b[0];\n" |
16332 | " return __a;\n" |
16333 | "}\n" |
16334 | "\n" |
16335 | "/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n" |
16336 | "/// memory location.\n" |
16337 | "///\n" |
16338 | "/// \\headerfile <x86intrin.h>\n" |
16339 | "///\n" |
16340 | "/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n" |
16341 | "///\n" |
16342 | "/// \\param __dp\n" |
16343 | "/// A pointer to a 64-bit memory location.\n" |
16344 | "/// \\param __a\n" |
16345 | "/// A 128-bit vector of [2 x double] containing the value to be stored.\n" |
16346 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
16347 | "_mm_store_sd(double *__dp, __m128d __a)\n" |
16348 | "{\n" |
16349 | " struct __mm_store_sd_struct {\n" |
16350 | " double __u;\n" |
16351 | " } __attribute__((__packed__, __may_alias__));\n" |
16352 | " ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];\n" |
16353 | "}\n" |
16354 | "\n" |
16355 | "/// Moves packed double-precision values from a 128-bit vector of\n" |
16356 | "/// [2 x double] to a memory location.\n" |
16357 | "///\n" |
16358 | "/// \\headerfile <x86intrin.h>\n" |
16359 | "///\n" |
16360 | "/// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction.\n" |
16361 | "///\n" |
16362 | "/// \\param __dp\n" |
16363 | "/// A pointer to an aligned memory location that can store two\n" |
16364 | "/// double-precision values.\n" |
16365 | "/// \\param __a\n" |
16366 | "/// A packed 128-bit vector of [2 x double] containing the values to be\n" |
16367 | "/// moved.\n" |
16368 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
16369 | "_mm_store_pd(double *__dp, __m128d __a)\n" |
16370 | "{\n" |
16371 | " *(__m128d*)__dp = __a;\n" |
16372 | "}\n" |
16373 | "\n" |
16374 | "/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n" |
16375 | "/// the upper and lower 64 bits of a memory location.\n" |
16376 | "///\n" |
16377 | "/// \\headerfile <x86intrin.h>\n" |
16378 | "///\n" |
16379 | "/// This intrinsic corresponds to the\n" |
16380 | "/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n" |
16381 | "///\n" |
16382 | "/// \\param __dp\n" |
16383 | "/// A pointer to a memory location that can store two double-precision\n" |
16384 | "/// values.\n" |
16385 | "/// \\param __a\n" |
16386 | "/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n" |
16387 | "/// of the values in \\a __dp.\n" |
16388 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
16389 | "_mm_store1_pd(double *__dp, __m128d __a)\n" |
16390 | "{\n" |
16391 | " __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n" |
16392 | " _mm_store_pd(__dp, __a);\n" |
16393 | "}\n" |
16394 | "\n" |
16395 | "/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n" |
16396 | "/// the upper and lower 64 bits of a memory location.\n" |
16397 | "///\n" |
16398 | "/// \\headerfile <x86intrin.h>\n" |
16399 | "///\n" |
16400 | "/// This intrinsic corresponds to the\n" |
16401 | "/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n" |
16402 | "///\n" |
16403 | "/// \\param __dp\n" |
16404 | "/// A pointer to a memory location that can store two double-precision\n" |
16405 | "/// values.\n" |
16406 | "/// \\param __a\n" |
16407 | "/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n" |
16408 | "/// of the values in \\a __dp.\n" |
16409 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
16410 | "_mm_store_pd1(double *__dp, __m128d __a)\n" |
16411 | "{\n" |
16412 | " _mm_store1_pd(__dp, __a);\n" |
16413 | "}\n" |
16414 | "\n" |
16415 | "/// Stores a 128-bit vector of [2 x double] into an unaligned memory\n" |
16416 | "/// location.\n" |
16417 | "///\n" |
16418 | "/// \\headerfile <x86intrin.h>\n" |
16419 | "///\n" |
16420 | "/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n" |
16421 | "///\n" |
16422 | "/// \\param __dp\n" |
16423 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
16424 | "/// location does not have to be aligned.\n" |
16425 | "/// \\param __a\n" |
16426 | "/// A 128-bit vector of [2 x double] containing the values to be stored.\n" |
16427 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
16428 | "_mm_storeu_pd(double *__dp, __m128d __a)\n" |
16429 | "{\n" |
16430 | " struct __storeu_pd {\n" |
16431 | " __m128d __v;\n" |
16432 | " } __attribute__((__packed__, __may_alias__));\n" |
16433 | " ((struct __storeu_pd*)__dp)->__v = __a;\n" |
16434 | "}\n" |
16435 | "\n" |
16436 | "/// Stores two double-precision values, in reverse order, from a 128-bit\n" |
16437 | "/// vector of [2 x double] to a 16-byte aligned memory location.\n" |
16438 | "///\n" |
16439 | "/// \\headerfile <x86intrin.h>\n" |
16440 | "///\n" |
16441 | "/// This intrinsic corresponds to a shuffling instruction followed by a\n" |
16442 | "/// <c> VMOVAPD / MOVAPD </c> instruction.\n" |
16443 | "///\n" |
16444 | "/// \\param __dp\n" |
16445 | "/// A pointer to a 16-byte aligned memory location that can store two\n" |
16446 | "/// double-precision values.\n" |
16447 | "/// \\param __a\n" |
16448 | "/// A 128-bit vector of [2 x double] containing the values to be reversed and\n" |
16449 | "/// stored.\n" |
16450 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
16451 | "_mm_storer_pd(double *__dp, __m128d __a)\n" |
16452 | "{\n" |
16453 | " __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);\n" |
16454 | " *(__m128d *)__dp = __a;\n" |
16455 | "}\n" |
16456 | "\n" |
16457 | "/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a\n" |
16458 | "/// memory location.\n" |
16459 | "///\n" |
16460 | "/// \\headerfile <x86intrin.h>\n" |
16461 | "///\n" |
16462 | "/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n" |
16463 | "///\n" |
16464 | "/// \\param __dp\n" |
16465 | "/// A pointer to a 64-bit memory location.\n" |
16466 | "/// \\param __a\n" |
16467 | "/// A 128-bit vector of [2 x double] containing the value to be stored.\n" |
16468 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
16469 | "_mm_storeh_pd(double *__dp, __m128d __a)\n" |
16470 | "{\n" |
16471 | " struct __mm_storeh_pd_struct {\n" |
16472 | " double __u;\n" |
16473 | " } __attribute__((__packed__, __may_alias__));\n" |
16474 | " ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];\n" |
16475 | "}\n" |
16476 | "\n" |
16477 | "/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n" |
16478 | "/// memory location.\n" |
16479 | "///\n" |
16480 | "/// \\headerfile <x86intrin.h>\n" |
16481 | "///\n" |
16482 | "/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n" |
16483 | "///\n" |
16484 | "/// \\param __dp\n" |
16485 | "/// A pointer to a 64-bit memory location.\n" |
16486 | "/// \\param __a\n" |
16487 | "/// A 128-bit vector of [2 x double] containing the value to be stored.\n" |
16488 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
16489 | "_mm_storel_pd(double *__dp, __m128d __a)\n" |
16490 | "{\n" |
16491 | " struct __mm_storeh_pd_struct {\n" |
16492 | " double __u;\n" |
16493 | " } __attribute__((__packed__, __may_alias__));\n" |
16494 | " ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];\n" |
16495 | "}\n" |
16496 | "\n" |
16497 | "/// Adds the corresponding elements of two 128-bit vectors of [16 x i8],\n" |
16498 | "/// saving the lower 8 bits of each sum in the corresponding element of a\n" |
16499 | "/// 128-bit result vector of [16 x i8].\n" |
16500 | "///\n" |
16501 | "/// The integer elements of both parameters can be either signed or unsigned.\n" |
16502 | "///\n" |
16503 | "/// \\headerfile <x86intrin.h>\n" |
16504 | "///\n" |
16505 | "/// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.\n" |
16506 | "///\n" |
16507 | "/// \\param __a\n" |
16508 | "/// A 128-bit vector of [16 x i8].\n" |
16509 | "/// \\param __b\n" |
16510 | "/// A 128-bit vector of [16 x i8].\n" |
16511 | "/// \\returns A 128-bit vector of [16 x i8] containing the sums of both\n" |
16512 | "/// parameters.\n" |
16513 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16514 | "_mm_add_epi8(__m128i __a, __m128i __b)\n" |
16515 | "{\n" |
16516 | " return (__m128i)((__v16qu)__a + (__v16qu)__b);\n" |
16517 | "}\n" |
16518 | "\n" |
16519 | "/// Adds the corresponding elements of two 128-bit vectors of [8 x i16],\n" |
16520 | "/// saving the lower 16 bits of each sum in the corresponding element of a\n" |
16521 | "/// 128-bit result vector of [8 x i16].\n" |
16522 | "///\n" |
16523 | "/// The integer elements of both parameters can be either signed or unsigned.\n" |
16524 | "///\n" |
16525 | "/// \\headerfile <x86intrin.h>\n" |
16526 | "///\n" |
16527 | "/// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.\n" |
16528 | "///\n" |
16529 | "/// \\param __a\n" |
16530 | "/// A 128-bit vector of [8 x i16].\n" |
16531 | "/// \\param __b\n" |
16532 | "/// A 128-bit vector of [8 x i16].\n" |
16533 | "/// \\returns A 128-bit vector of [8 x i16] containing the sums of both\n" |
16534 | "/// parameters.\n" |
16535 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16536 | "_mm_add_epi16(__m128i __a, __m128i __b)\n" |
16537 | "{\n" |
16538 | " return (__m128i)((__v8hu)__a + (__v8hu)__b);\n" |
16539 | "}\n" |
16540 | "\n" |
16541 | "/// Adds the corresponding elements of two 128-bit vectors of [4 x i32],\n" |
16542 | "/// saving the lower 32 bits of each sum in the corresponding element of a\n" |
16543 | "/// 128-bit result vector of [4 x i32].\n" |
16544 | "///\n" |
16545 | "/// The integer elements of both parameters can be either signed or unsigned.\n" |
16546 | "///\n" |
16547 | "/// \\headerfile <x86intrin.h>\n" |
16548 | "///\n" |
16549 | "/// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.\n" |
16550 | "///\n" |
16551 | "/// \\param __a\n" |
16552 | "/// A 128-bit vector of [4 x i32].\n" |
16553 | "/// \\param __b\n" |
16554 | "/// A 128-bit vector of [4 x i32].\n" |
16555 | "/// \\returns A 128-bit vector of [4 x i32] containing the sums of both\n" |
16556 | "/// parameters.\n" |
16557 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16558 | "_mm_add_epi32(__m128i __a, __m128i __b)\n" |
16559 | "{\n" |
16560 | " return (__m128i)((__v4su)__a + (__v4su)__b);\n" |
16561 | "}\n" |
16562 | "\n" |
16563 | "/// Adds two signed or unsigned 64-bit integer values, returning the\n" |
16564 | "/// lower 64 bits of the sum.\n" |
16565 | "///\n" |
16566 | "/// \\headerfile <x86intrin.h>\n" |
16567 | "///\n" |
16568 | "/// This intrinsic corresponds to the <c> PADDQ </c> instruction.\n" |
16569 | "///\n" |
16570 | "/// \\param __a\n" |
16571 | "/// A 64-bit integer.\n" |
16572 | "/// \\param __b\n" |
16573 | "/// A 64-bit integer.\n" |
16574 | "/// \\returns A 64-bit integer containing the sum of both parameters.\n" |
16575 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
16576 | "_mm_add_si64(__m64 __a, __m64 __b)\n" |
16577 | "{\n" |
16578 | " return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);\n" |
16579 | "}\n" |
16580 | "\n" |
16581 | "/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],\n" |
16582 | "/// saving the lower 64 bits of each sum in the corresponding element of a\n" |
16583 | "/// 128-bit result vector of [2 x i64].\n" |
16584 | "///\n" |
16585 | "/// The integer elements of both parameters can be either signed or unsigned.\n" |
16586 | "///\n" |
16587 | "/// \\headerfile <x86intrin.h>\n" |
16588 | "///\n" |
16589 | "/// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.\n" |
16590 | "///\n" |
16591 | "/// \\param __a\n" |
16592 | "/// A 128-bit vector of [2 x i64].\n" |
16593 | "/// \\param __b\n" |
16594 | "/// A 128-bit vector of [2 x i64].\n" |
16595 | "/// \\returns A 128-bit vector of [2 x i64] containing the sums of both\n" |
16596 | "/// parameters.\n" |
16597 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16598 | "_mm_add_epi64(__m128i __a, __m128i __b)\n" |
16599 | "{\n" |
16600 | " return (__m128i)((__v2du)__a + (__v2du)__b);\n" |
16601 | "}\n" |
16602 | "\n" |
16603 | "/// Adds, with saturation, the corresponding elements of two 128-bit\n" |
16604 | "/// signed [16 x i8] vectors, saving each sum in the corresponding element of\n" |
16605 | "/// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are\n" |
16606 | "/// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80.\n" |
16607 | "///\n" |
16608 | "/// \\headerfile <x86intrin.h>\n" |
16609 | "///\n" |
16610 | "/// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.\n" |
16611 | "///\n" |
16612 | "/// \\param __a\n" |
16613 | "/// A 128-bit signed [16 x i8] vector.\n" |
16614 | "/// \\param __b\n" |
16615 | "/// A 128-bit signed [16 x i8] vector.\n" |
16616 | "/// \\returns A 128-bit signed [16 x i8] vector containing the saturated sums of\n" |
16617 | "/// both parameters.\n" |
16618 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16619 | "_mm_adds_epi8(__m128i __a, __m128i __b)\n" |
16620 | "{\n" |
16621 | " return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);\n" |
16622 | "}\n" |
16623 | "\n" |
16624 | "/// Adds, with saturation, the corresponding elements of two 128-bit\n" |
16625 | "/// signed [8 x i16] vectors, saving each sum in the corresponding element of\n" |
16626 | "/// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF\n" |
16627 | "/// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n" |
16628 | "/// 0x8000.\n" |
16629 | "///\n" |
16630 | "/// \\headerfile <x86intrin.h>\n" |
16631 | "///\n" |
16632 | "/// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.\n" |
16633 | "///\n" |
16634 | "/// \\param __a\n" |
16635 | "/// A 128-bit signed [8 x i16] vector.\n" |
16636 | "/// \\param __b\n" |
16637 | "/// A 128-bit signed [8 x i16] vector.\n" |
16638 | "/// \\returns A 128-bit signed [8 x i16] vector containing the saturated sums of\n" |
16639 | "/// both parameters.\n" |
16640 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16641 | "_mm_adds_epi16(__m128i __a, __m128i __b)\n" |
16642 | "{\n" |
16643 | " return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);\n" |
16644 | "}\n" |
16645 | "\n" |
16646 | "/// Adds, with saturation, the corresponding elements of two 128-bit\n" |
16647 | "/// unsigned [16 x i8] vectors, saving each sum in the corresponding element\n" |
16648 | "/// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF\n" |
16649 | "/// are saturated to 0xFF. Negative sums are saturated to 0x00.\n" |
16650 | "///\n" |
16651 | "/// \\headerfile <x86intrin.h>\n" |
16652 | "///\n" |
16653 | "/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n" |
16654 | "///\n" |
16655 | "/// \\param __a\n" |
16656 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
16657 | "/// \\param __b\n" |
16658 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
16659 | "/// \\returns A 128-bit unsigned [16 x i8] vector containing the saturated sums\n" |
16660 | "/// of both parameters.\n" |
16661 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16662 | "_mm_adds_epu8(__m128i __a, __m128i __b)\n" |
16663 | "{\n" |
16664 | " return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);\n" |
16665 | "}\n" |
16666 | "\n" |
16667 | "/// Adds, with saturation, the corresponding elements of two 128-bit\n" |
16668 | "/// unsigned [8 x i16] vectors, saving each sum in the corresponding element\n" |
16669 | "/// of a 128-bit result vector of [8 x i16]. Positive sums greater than\n" |
16670 | "/// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000.\n" |
16671 | "///\n" |
16672 | "/// \\headerfile <x86intrin.h>\n" |
16673 | "///\n" |
16674 | "/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n" |
16675 | "///\n" |
16676 | "/// \\param __a\n" |
16677 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
16678 | "/// \\param __b\n" |
16679 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
16680 | "/// \\returns A 128-bit unsigned [8 x i16] vector containing the saturated sums\n" |
16681 | "/// of both parameters.\n" |
16682 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16683 | "_mm_adds_epu16(__m128i __a, __m128i __b)\n" |
16684 | "{\n" |
16685 | " return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);\n" |
16686 | "}\n" |
16687 | "\n" |
16688 | "/// Computes the rounded avarages of corresponding elements of two\n" |
16689 | "/// 128-bit unsigned [16 x i8] vectors, saving each result in the\n" |
16690 | "/// corresponding element of a 128-bit result vector of [16 x i8].\n" |
16691 | "///\n" |
16692 | "/// \\headerfile <x86intrin.h>\n" |
16693 | "///\n" |
16694 | "/// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.\n" |
16695 | "///\n" |
16696 | "/// \\param __a\n" |
16697 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
16698 | "/// \\param __b\n" |
16699 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
16700 | "/// \\returns A 128-bit unsigned [16 x i8] vector containing the rounded\n" |
16701 | "/// averages of both parameters.\n" |
16702 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16703 | "_mm_avg_epu8(__m128i __a, __m128i __b)\n" |
16704 | "{\n" |
16705 | " typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n" |
16706 | " return (__m128i)__builtin_convertvector(\n" |
16707 | " ((__builtin_convertvector((__v16qu)__a, __v16hu) +\n" |
16708 | " __builtin_convertvector((__v16qu)__b, __v16hu)) + 1)\n" |
16709 | " >> 1, __v16qu);\n" |
16710 | "}\n" |
16711 | "\n" |
16712 | "/// Computes the rounded avarages of corresponding elements of two\n" |
16713 | "/// 128-bit unsigned [8 x i16] vectors, saving each result in the\n" |
16714 | "/// corresponding element of a 128-bit result vector of [8 x i16].\n" |
16715 | "///\n" |
16716 | "/// \\headerfile <x86intrin.h>\n" |
16717 | "///\n" |
16718 | "/// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.\n" |
16719 | "///\n" |
16720 | "/// \\param __a\n" |
16721 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
16722 | "/// \\param __b\n" |
16723 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
16724 | "/// \\returns A 128-bit unsigned [8 x i16] vector containing the rounded\n" |
16725 | "/// averages of both parameters.\n" |
16726 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16727 | "_mm_avg_epu16(__m128i __a, __m128i __b)\n" |
16728 | "{\n" |
16729 | " typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n" |
16730 | " return (__m128i)__builtin_convertvector(\n" |
16731 | " ((__builtin_convertvector((__v8hu)__a, __v8su) +\n" |
16732 | " __builtin_convertvector((__v8hu)__b, __v8su)) + 1)\n" |
16733 | " >> 1, __v8hu);\n" |
16734 | "}\n" |
16735 | "\n" |
16736 | "/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]\n" |
16737 | "/// vectors, producing eight intermediate 32-bit signed integer products, and\n" |
16738 | "/// adds the consecutive pairs of 32-bit products to form a 128-bit signed\n" |
16739 | "/// [4 x i32] vector.\n" |
16740 | "///\n" |
16741 | "/// For example, bits [15:0] of both parameters are multiplied producing a\n" |
16742 | "/// 32-bit product, bits [31:16] of both parameters are multiplied producing\n" |
16743 | "/// a 32-bit product, and the sum of those two products becomes bits [31:0]\n" |
16744 | "/// of the result.\n" |
16745 | "///\n" |
16746 | "/// \\headerfile <x86intrin.h>\n" |
16747 | "///\n" |
16748 | "/// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.\n" |
16749 | "///\n" |
16750 | "/// \\param __a\n" |
16751 | "/// A 128-bit signed [8 x i16] vector.\n" |
16752 | "/// \\param __b\n" |
16753 | "/// A 128-bit signed [8 x i16] vector.\n" |
16754 | "/// \\returns A 128-bit signed [4 x i32] vector containing the sums of products\n" |
16755 | "/// of both parameters.\n" |
16756 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16757 | "_mm_madd_epi16(__m128i __a, __m128i __b)\n" |
16758 | "{\n" |
16759 | " return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);\n" |
16760 | "}\n" |
16761 | "\n" |
16762 | "/// Compares corresponding elements of two 128-bit signed [8 x i16]\n" |
16763 | "/// vectors, saving the greater value from each comparison in the\n" |
16764 | "/// corresponding element of a 128-bit result vector of [8 x i16].\n" |
16765 | "///\n" |
16766 | "/// \\headerfile <x86intrin.h>\n" |
16767 | "///\n" |
16768 | "/// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.\n" |
16769 | "///\n" |
16770 | "/// \\param __a\n" |
16771 | "/// A 128-bit signed [8 x i16] vector.\n" |
16772 | "/// \\param __b\n" |
16773 | "/// A 128-bit signed [8 x i16] vector.\n" |
16774 | "/// \\returns A 128-bit signed [8 x i16] vector containing the greater value of\n" |
16775 | "/// each comparison.\n" |
16776 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16777 | "_mm_max_epi16(__m128i __a, __m128i __b)\n" |
16778 | "{\n" |
16779 | " return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);\n" |
16780 | "}\n" |
16781 | "\n" |
16782 | "/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n" |
16783 | "/// vectors, saving the greater value from each comparison in the\n" |
16784 | "/// corresponding element of a 128-bit result vector of [16 x i8].\n" |
16785 | "///\n" |
16786 | "/// \\headerfile <x86intrin.h>\n" |
16787 | "///\n" |
16788 | "/// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.\n" |
16789 | "///\n" |
16790 | "/// \\param __a\n" |
16791 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
16792 | "/// \\param __b\n" |
16793 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
16794 | "/// \\returns A 128-bit unsigned [16 x i8] vector containing the greater value of\n" |
16795 | "/// each comparison.\n" |
16796 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16797 | "_mm_max_epu8(__m128i __a, __m128i __b)\n" |
16798 | "{\n" |
16799 | " return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);\n" |
16800 | "}\n" |
16801 | "\n" |
16802 | "/// Compares corresponding elements of two 128-bit signed [8 x i16]\n" |
16803 | "/// vectors, saving the smaller value from each comparison in the\n" |
16804 | "/// corresponding element of a 128-bit result vector of [8 x i16].\n" |
16805 | "///\n" |
16806 | "/// \\headerfile <x86intrin.h>\n" |
16807 | "///\n" |
16808 | "/// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.\n" |
16809 | "///\n" |
16810 | "/// \\param __a\n" |
16811 | "/// A 128-bit signed [8 x i16] vector.\n" |
16812 | "/// \\param __b\n" |
16813 | "/// A 128-bit signed [8 x i16] vector.\n" |
16814 | "/// \\returns A 128-bit signed [8 x i16] vector containing the smaller value of\n" |
16815 | "/// each comparison.\n" |
16816 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16817 | "_mm_min_epi16(__m128i __a, __m128i __b)\n" |
16818 | "{\n" |
16819 | " return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);\n" |
16820 | "}\n" |
16821 | "\n" |
16822 | "/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n" |
16823 | "/// vectors, saving the smaller value from each comparison in the\n" |
16824 | "/// corresponding element of a 128-bit result vector of [16 x i8].\n" |
16825 | "///\n" |
16826 | "/// \\headerfile <x86intrin.h>\n" |
16827 | "///\n" |
16828 | "/// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.\n" |
16829 | "///\n" |
16830 | "/// \\param __a\n" |
16831 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
16832 | "/// \\param __b\n" |
16833 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
16834 | "/// \\returns A 128-bit unsigned [16 x i8] vector containing the smaller value of\n" |
16835 | "/// each comparison.\n" |
16836 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16837 | "_mm_min_epu8(__m128i __a, __m128i __b)\n" |
16838 | "{\n" |
16839 | " return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);\n" |
16840 | "}\n" |
16841 | "\n" |
16842 | "/// Multiplies the corresponding elements of two signed [8 x i16]\n" |
16843 | "/// vectors, saving the upper 16 bits of each 32-bit product in the\n" |
16844 | "/// corresponding element of a 128-bit signed [8 x i16] result vector.\n" |
16845 | "///\n" |
16846 | "/// \\headerfile <x86intrin.h>\n" |
16847 | "///\n" |
16848 | "/// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.\n" |
16849 | "///\n" |
16850 | "/// \\param __a\n" |
16851 | "/// A 128-bit signed [8 x i16] vector.\n" |
16852 | "/// \\param __b\n" |
16853 | "/// A 128-bit signed [8 x i16] vector.\n" |
16854 | "/// \\returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of\n" |
16855 | "/// each of the eight 32-bit products.\n" |
16856 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16857 | "_mm_mulhi_epi16(__m128i __a, __m128i __b)\n" |
16858 | "{\n" |
16859 | " return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);\n" |
16860 | "}\n" |
16861 | "\n" |
16862 | "/// Multiplies the corresponding elements of two unsigned [8 x i16]\n" |
16863 | "/// vectors, saving the upper 16 bits of each 32-bit product in the\n" |
16864 | "/// corresponding element of a 128-bit unsigned [8 x i16] result vector.\n" |
16865 | "///\n" |
16866 | "/// \\headerfile <x86intrin.h>\n" |
16867 | "///\n" |
16868 | "/// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.\n" |
16869 | "///\n" |
16870 | "/// \\param __a\n" |
16871 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
16872 | "/// \\param __b\n" |
16873 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
16874 | "/// \\returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits\n" |
16875 | "/// of each of the eight 32-bit products.\n" |
16876 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16877 | "_mm_mulhi_epu16(__m128i __a, __m128i __b)\n" |
16878 | "{\n" |
16879 | " return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);\n" |
16880 | "}\n" |
16881 | "\n" |
16882 | "/// Multiplies the corresponding elements of two signed [8 x i16]\n" |
16883 | "/// vectors, saving the lower 16 bits of each 32-bit product in the\n" |
16884 | "/// corresponding element of a 128-bit signed [8 x i16] result vector.\n" |
16885 | "///\n" |
16886 | "/// \\headerfile <x86intrin.h>\n" |
16887 | "///\n" |
16888 | "/// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.\n" |
16889 | "///\n" |
16890 | "/// \\param __a\n" |
16891 | "/// A 128-bit signed [8 x i16] vector.\n" |
16892 | "/// \\param __b\n" |
16893 | "/// A 128-bit signed [8 x i16] vector.\n" |
16894 | "/// \\returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of\n" |
16895 | "/// each of the eight 32-bit products.\n" |
16896 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16897 | "_mm_mullo_epi16(__m128i __a, __m128i __b)\n" |
16898 | "{\n" |
16899 | " return (__m128i)((__v8hu)__a * (__v8hu)__b);\n" |
16900 | "}\n" |
16901 | "\n" |
16902 | "/// Multiplies 32-bit unsigned integer values contained in the lower bits\n" |
16903 | "/// of the two 64-bit integer vectors and returns the 64-bit unsigned\n" |
16904 | "/// product.\n" |
16905 | "///\n" |
16906 | "/// \\headerfile <x86intrin.h>\n" |
16907 | "///\n" |
16908 | "/// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.\n" |
16909 | "///\n" |
16910 | "/// \\param __a\n" |
16911 | "/// A 64-bit integer containing one of the source operands.\n" |
16912 | "/// \\param __b\n" |
16913 | "/// A 64-bit integer containing one of the source operands.\n" |
16914 | "/// \\returns A 64-bit integer vector containing the product of both operands.\n" |
16915 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
16916 | "_mm_mul_su32(__m64 __a, __m64 __b)\n" |
16917 | "{\n" |
16918 | " return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);\n" |
16919 | "}\n" |
16920 | "\n" |
16921 | "/// Multiplies 32-bit unsigned integer values contained in the lower\n" |
16922 | "/// bits of the corresponding elements of two [2 x i64] vectors, and returns\n" |
16923 | "/// the 64-bit products in the corresponding elements of a [2 x i64] vector.\n" |
16924 | "///\n" |
16925 | "/// \\headerfile <x86intrin.h>\n" |
16926 | "///\n" |
16927 | "/// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.\n" |
16928 | "///\n" |
16929 | "/// \\param __a\n" |
16930 | "/// A [2 x i64] vector containing one of the source operands.\n" |
16931 | "/// \\param __b\n" |
16932 | "/// A [2 x i64] vector containing one of the source operands.\n" |
16933 | "/// \\returns A [2 x i64] vector containing the product of both operands.\n" |
16934 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16935 | "_mm_mul_epu32(__m128i __a, __m128i __b)\n" |
16936 | "{\n" |
16937 | " return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);\n" |
16938 | "}\n" |
16939 | "\n" |
16940 | "/// Computes the absolute differences of corresponding 8-bit integer\n" |
16941 | "/// values in two 128-bit vectors. Sums the first 8 absolute differences, and\n" |
16942 | "/// separately sums the second 8 absolute differences. Packs these two\n" |
16943 | "/// unsigned 16-bit integer sums into the upper and lower elements of a\n" |
16944 | "/// [2 x i64] vector.\n" |
16945 | "///\n" |
16946 | "/// \\headerfile <x86intrin.h>\n" |
16947 | "///\n" |
16948 | "/// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.\n" |
16949 | "///\n" |
16950 | "/// \\param __a\n" |
16951 | "/// A 128-bit integer vector containing one of the source operands.\n" |
16952 | "/// \\param __b\n" |
16953 | "/// A 128-bit integer vector containing one of the source operands.\n" |
16954 | "/// \\returns A [2 x i64] vector containing the sums of the sets of absolute\n" |
16955 | "/// differences between both operands.\n" |
16956 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16957 | "_mm_sad_epu8(__m128i __a, __m128i __b)\n" |
16958 | "{\n" |
16959 | " return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);\n" |
16960 | "}\n" |
16961 | "\n" |
16962 | "/// Subtracts the corresponding 8-bit integer values in the operands.\n" |
16963 | "///\n" |
16964 | "/// \\headerfile <x86intrin.h>\n" |
16965 | "///\n" |
16966 | "/// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.\n" |
16967 | "///\n" |
16968 | "/// \\param __a\n" |
16969 | "/// A 128-bit integer vector containing the minuends.\n" |
16970 | "/// \\param __b\n" |
16971 | "/// A 128-bit integer vector containing the subtrahends.\n" |
16972 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
16973 | "/// in the operands.\n" |
16974 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16975 | "_mm_sub_epi8(__m128i __a, __m128i __b)\n" |
16976 | "{\n" |
16977 | " return (__m128i)((__v16qu)__a - (__v16qu)__b);\n" |
16978 | "}\n" |
16979 | "\n" |
16980 | "/// Subtracts the corresponding 16-bit integer values in the operands.\n" |
16981 | "///\n" |
16982 | "/// \\headerfile <x86intrin.h>\n" |
16983 | "///\n" |
16984 | "/// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.\n" |
16985 | "///\n" |
16986 | "/// \\param __a\n" |
16987 | "/// A 128-bit integer vector containing the minuends.\n" |
16988 | "/// \\param __b\n" |
16989 | "/// A 128-bit integer vector containing the subtrahends.\n" |
16990 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
16991 | "/// in the operands.\n" |
16992 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
16993 | "_mm_sub_epi16(__m128i __a, __m128i __b)\n" |
16994 | "{\n" |
16995 | " return (__m128i)((__v8hu)__a - (__v8hu)__b);\n" |
16996 | "}\n" |
16997 | "\n" |
16998 | "/// Subtracts the corresponding 32-bit integer values in the operands.\n" |
16999 | "///\n" |
17000 | "/// \\headerfile <x86intrin.h>\n" |
17001 | "///\n" |
17002 | "/// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.\n" |
17003 | "///\n" |
17004 | "/// \\param __a\n" |
17005 | "/// A 128-bit integer vector containing the minuends.\n" |
17006 | "/// \\param __b\n" |
17007 | "/// A 128-bit integer vector containing the subtrahends.\n" |
17008 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
17009 | "/// in the operands.\n" |
17010 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17011 | "_mm_sub_epi32(__m128i __a, __m128i __b)\n" |
17012 | "{\n" |
17013 | " return (__m128i)((__v4su)__a - (__v4su)__b);\n" |
17014 | "}\n" |
17015 | "\n" |
17016 | "/// Subtracts signed or unsigned 64-bit integer values and writes the\n" |
17017 | "/// difference to the corresponding bits in the destination.\n" |
17018 | "///\n" |
17019 | "/// \\headerfile <x86intrin.h>\n" |
17020 | "///\n" |
17021 | "/// This intrinsic corresponds to the <c> PSUBQ </c> instruction.\n" |
17022 | "///\n" |
17023 | "/// \\param __a\n" |
17024 | "/// A 64-bit integer vector containing the minuend.\n" |
17025 | "/// \\param __b\n" |
17026 | "/// A 64-bit integer vector containing the subtrahend.\n" |
17027 | "/// \\returns A 64-bit integer vector containing the difference of the values in\n" |
17028 | "/// the operands.\n" |
17029 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
17030 | "_mm_sub_si64(__m64 __a, __m64 __b)\n" |
17031 | "{\n" |
17032 | " return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);\n" |
17033 | "}\n" |
17034 | "\n" |
17035 | "/// Subtracts the corresponding elements of two [2 x i64] vectors.\n" |
17036 | "///\n" |
17037 | "/// \\headerfile <x86intrin.h>\n" |
17038 | "///\n" |
17039 | "/// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.\n" |
17040 | "///\n" |
17041 | "/// \\param __a\n" |
17042 | "/// A 128-bit integer vector containing the minuends.\n" |
17043 | "/// \\param __b\n" |
17044 | "/// A 128-bit integer vector containing the subtrahends.\n" |
17045 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
17046 | "/// in the operands.\n" |
17047 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17048 | "_mm_sub_epi64(__m128i __a, __m128i __b)\n" |
17049 | "{\n" |
17050 | " return (__m128i)((__v2du)__a - (__v2du)__b);\n" |
17051 | "}\n" |
17052 | "\n" |
17053 | "/// Subtracts corresponding 8-bit signed integer values in the input and\n" |
17054 | "/// returns the differences in the corresponding bytes in the destination.\n" |
17055 | "/// Differences greater than 0x7F are saturated to 0x7F, and differences less\n" |
17056 | "/// than 0x80 are saturated to 0x80.\n" |
17057 | "///\n" |
17058 | "/// \\headerfile <x86intrin.h>\n" |
17059 | "///\n" |
17060 | "/// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.\n" |
17061 | "///\n" |
17062 | "/// \\param __a\n" |
17063 | "/// A 128-bit integer vector containing the minuends.\n" |
17064 | "/// \\param __b\n" |
17065 | "/// A 128-bit integer vector containing the subtrahends.\n" |
17066 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
17067 | "/// in the operands.\n" |
17068 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17069 | "_mm_subs_epi8(__m128i __a, __m128i __b)\n" |
17070 | "{\n" |
17071 | " return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);\n" |
17072 | "}\n" |
17073 | "\n" |
17074 | "/// Subtracts corresponding 16-bit signed integer values in the input and\n" |
17075 | "/// returns the differences in the corresponding bytes in the destination.\n" |
17076 | "/// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less\n" |
17077 | "/// than 0x8000 are saturated to 0x8000.\n" |
17078 | "///\n" |
17079 | "/// \\headerfile <x86intrin.h>\n" |
17080 | "///\n" |
17081 | "/// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.\n" |
17082 | "///\n" |
17083 | "/// \\param __a\n" |
17084 | "/// A 128-bit integer vector containing the minuends.\n" |
17085 | "/// \\param __b\n" |
17086 | "/// A 128-bit integer vector containing the subtrahends.\n" |
17087 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
17088 | "/// in the operands.\n" |
17089 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17090 | "_mm_subs_epi16(__m128i __a, __m128i __b)\n" |
17091 | "{\n" |
17092 | " return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);\n" |
17093 | "}\n" |
17094 | "\n" |
17095 | "/// Subtracts corresponding 8-bit unsigned integer values in the input\n" |
17096 | "/// and returns the differences in the corresponding bytes in the\n" |
17097 | "/// destination. Differences less than 0x00 are saturated to 0x00.\n" |
17098 | "///\n" |
17099 | "/// \\headerfile <x86intrin.h>\n" |
17100 | "///\n" |
17101 | "/// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.\n" |
17102 | "///\n" |
17103 | "/// \\param __a\n" |
17104 | "/// A 128-bit integer vector containing the minuends.\n" |
17105 | "/// \\param __b\n" |
17106 | "/// A 128-bit integer vector containing the subtrahends.\n" |
17107 | "/// \\returns A 128-bit integer vector containing the unsigned integer\n" |
17108 | "/// differences of the values in the operands.\n" |
17109 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17110 | "_mm_subs_epu8(__m128i __a, __m128i __b)\n" |
17111 | "{\n" |
17112 | " return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);\n" |
17113 | "}\n" |
17114 | "\n" |
17115 | "/// Subtracts corresponding 16-bit unsigned integer values in the input\n" |
17116 | "/// and returns the differences in the corresponding bytes in the\n" |
17117 | "/// destination. Differences less than 0x0000 are saturated to 0x0000.\n" |
17118 | "///\n" |
17119 | "/// \\headerfile <x86intrin.h>\n" |
17120 | "///\n" |
17121 | "/// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.\n" |
17122 | "///\n" |
17123 | "/// \\param __a\n" |
17124 | "/// A 128-bit integer vector containing the minuends.\n" |
17125 | "/// \\param __b\n" |
17126 | "/// A 128-bit integer vector containing the subtrahends.\n" |
17127 | "/// \\returns A 128-bit integer vector containing the unsigned integer\n" |
17128 | "/// differences of the values in the operands.\n" |
17129 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17130 | "_mm_subs_epu16(__m128i __a, __m128i __b)\n" |
17131 | "{\n" |
17132 | " return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);\n" |
17133 | "}\n" |
17134 | "\n" |
17135 | "/// Performs a bitwise AND of two 128-bit integer vectors.\n" |
17136 | "///\n" |
17137 | "/// \\headerfile <x86intrin.h>\n" |
17138 | "///\n" |
17139 | "/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n" |
17140 | "///\n" |
17141 | "/// \\param __a\n" |
17142 | "/// A 128-bit integer vector containing one of the source operands.\n" |
17143 | "/// \\param __b\n" |
17144 | "/// A 128-bit integer vector containing one of the source operands.\n" |
17145 | "/// \\returns A 128-bit integer vector containing the bitwise AND of the values\n" |
17146 | "/// in both operands.\n" |
17147 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17148 | "_mm_and_si128(__m128i __a, __m128i __b)\n" |
17149 | "{\n" |
17150 | " return (__m128i)((__v2du)__a & (__v2du)__b);\n" |
17151 | "}\n" |
17152 | "\n" |
17153 | "/// Performs a bitwise AND of two 128-bit integer vectors, using the\n" |
17154 | "/// one's complement of the values contained in the first source operand.\n" |
17155 | "///\n" |
17156 | "/// \\headerfile <x86intrin.h>\n" |
17157 | "///\n" |
17158 | "/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n" |
17159 | "///\n" |
17160 | "/// \\param __a\n" |
17161 | "/// A 128-bit vector containing the left source operand. The one's complement\n" |
17162 | "/// of this value is used in the bitwise AND.\n" |
17163 | "/// \\param __b\n" |
17164 | "/// A 128-bit vector containing the right source operand.\n" |
17165 | "/// \\returns A 128-bit integer vector containing the bitwise AND of the one's\n" |
17166 | "/// complement of the first operand and the values in the second operand.\n" |
17167 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17168 | "_mm_andnot_si128(__m128i __a, __m128i __b)\n" |
17169 | "{\n" |
17170 | " return (__m128i)(~(__v2du)__a & (__v2du)__b);\n" |
17171 | "}\n" |
17172 | "/// Performs a bitwise OR of two 128-bit integer vectors.\n" |
17173 | "///\n" |
17174 | "/// \\headerfile <x86intrin.h>\n" |
17175 | "///\n" |
17176 | "/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n" |
17177 | "///\n" |
17178 | "/// \\param __a\n" |
17179 | "/// A 128-bit integer vector containing one of the source operands.\n" |
17180 | "/// \\param __b\n" |
17181 | "/// A 128-bit integer vector containing one of the source operands.\n" |
17182 | "/// \\returns A 128-bit integer vector containing the bitwise OR of the values\n" |
17183 | "/// in both operands.\n" |
17184 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17185 | "_mm_or_si128(__m128i __a, __m128i __b)\n" |
17186 | "{\n" |
17187 | " return (__m128i)((__v2du)__a | (__v2du)__b);\n" |
17188 | "}\n" |
17189 | "\n" |
17190 | "/// Performs a bitwise exclusive OR of two 128-bit integer vectors.\n" |
17191 | "///\n" |
17192 | "/// \\headerfile <x86intrin.h>\n" |
17193 | "///\n" |
17194 | "/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n" |
17195 | "///\n" |
17196 | "/// \\param __a\n" |
17197 | "/// A 128-bit integer vector containing one of the source operands.\n" |
17198 | "/// \\param __b\n" |
17199 | "/// A 128-bit integer vector containing one of the source operands.\n" |
17200 | "/// \\returns A 128-bit integer vector containing the bitwise exclusive OR of the\n" |
17201 | "/// values in both operands.\n" |
17202 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17203 | "_mm_xor_si128(__m128i __a, __m128i __b)\n" |
17204 | "{\n" |
17205 | " return (__m128i)((__v2du)__a ^ (__v2du)__b);\n" |
17206 | "}\n" |
17207 | "\n" |
17208 | "/// Left-shifts the 128-bit integer vector operand by the specified\n" |
17209 | "/// number of bytes. Low-order bits are cleared.\n" |
17210 | "///\n" |
17211 | "/// \\headerfile <x86intrin.h>\n" |
17212 | "///\n" |
17213 | "/// \\code\n" |
17214 | "/// __m128i _mm_slli_si128(__m128i a, const int imm);\n" |
17215 | "/// \\endcode\n" |
17216 | "///\n" |
17217 | "/// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.\n" |
17218 | "///\n" |
17219 | "/// \\param a\n" |
17220 | "/// A 128-bit integer vector containing the source operand.\n" |
17221 | "/// \\param imm\n" |
17222 | "/// An immediate value specifying the number of bytes to left-shift operand\n" |
17223 | "/// \\a a.\n" |
17224 | "/// \\returns A 128-bit integer vector containing the left-shifted value.\n" |
17225 | "#define _mm_slli_si128(a, imm) \\\n" |
17226 | " (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n" |
17227 | "\n" |
17228 | "#define _mm_bslli_si128(a, imm) \\\n" |
17229 | " (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n" |
17230 | "\n" |
17231 | "/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n" |
17232 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
17233 | "///\n" |
17234 | "/// \\headerfile <x86intrin.h>\n" |
17235 | "///\n" |
17236 | "/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n" |
17237 | "///\n" |
17238 | "/// \\param __a\n" |
17239 | "/// A 128-bit integer vector containing the source operand.\n" |
17240 | "/// \\param __count\n" |
17241 | "/// An integer value specifying the number of bits to left-shift each value\n" |
17242 | "/// in operand \\a __a.\n" |
17243 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
17244 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17245 | "_mm_slli_epi16(__m128i __a, int __count)\n" |
17246 | "{\n" |
17247 | " return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);\n" |
17248 | "}\n" |
17249 | "\n" |
17250 | "/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n" |
17251 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
17252 | "///\n" |
17253 | "/// \\headerfile <x86intrin.h>\n" |
17254 | "///\n" |
17255 | "/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n" |
17256 | "///\n" |
17257 | "/// \\param __a\n" |
17258 | "/// A 128-bit integer vector containing the source operand.\n" |
17259 | "/// \\param __count\n" |
17260 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
17261 | "/// to left-shift each value in operand \\a __a.\n" |
17262 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
17263 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17264 | "_mm_sll_epi16(__m128i __a, __m128i __count)\n" |
17265 | "{\n" |
17266 | " return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);\n" |
17267 | "}\n" |
17268 | "\n" |
17269 | "/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n" |
17270 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
17271 | "///\n" |
17272 | "/// \\headerfile <x86intrin.h>\n" |
17273 | "///\n" |
17274 | "/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n" |
17275 | "///\n" |
17276 | "/// \\param __a\n" |
17277 | "/// A 128-bit integer vector containing the source operand.\n" |
17278 | "/// \\param __count\n" |
17279 | "/// An integer value specifying the number of bits to left-shift each value\n" |
17280 | "/// in operand \\a __a.\n" |
17281 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
17282 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17283 | "_mm_slli_epi32(__m128i __a, int __count)\n" |
17284 | "{\n" |
17285 | " return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);\n" |
17286 | "}\n" |
17287 | "\n" |
17288 | "/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n" |
17289 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
17290 | "///\n" |
17291 | "/// \\headerfile <x86intrin.h>\n" |
17292 | "///\n" |
17293 | "/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n" |
17294 | "///\n" |
17295 | "/// \\param __a\n" |
17296 | "/// A 128-bit integer vector containing the source operand.\n" |
17297 | "/// \\param __count\n" |
17298 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
17299 | "/// to left-shift each value in operand \\a __a.\n" |
17300 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
17301 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17302 | "_mm_sll_epi32(__m128i __a, __m128i __count)\n" |
17303 | "{\n" |
17304 | " return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);\n" |
17305 | "}\n" |
17306 | "\n" |
17307 | "/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n" |
17308 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
17309 | "///\n" |
17310 | "/// \\headerfile <x86intrin.h>\n" |
17311 | "///\n" |
17312 | "/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n" |
17313 | "///\n" |
17314 | "/// \\param __a\n" |
17315 | "/// A 128-bit integer vector containing the source operand.\n" |
17316 | "/// \\param __count\n" |
17317 | "/// An integer value specifying the number of bits to left-shift each value\n" |
17318 | "/// in operand \\a __a.\n" |
17319 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
17320 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17321 | "_mm_slli_epi64(__m128i __a, int __count)\n" |
17322 | "{\n" |
17323 | " return __builtin_ia32_psllqi128((__v2di)__a, __count);\n" |
17324 | "}\n" |
17325 | "\n" |
17326 | "/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n" |
17327 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
17328 | "///\n" |
17329 | "/// \\headerfile <x86intrin.h>\n" |
17330 | "///\n" |
17331 | "/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n" |
17332 | "///\n" |
17333 | "/// \\param __a\n" |
17334 | "/// A 128-bit integer vector containing the source operand.\n" |
17335 | "/// \\param __count\n" |
17336 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
17337 | "/// to left-shift each value in operand \\a __a.\n" |
17338 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
17339 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17340 | "_mm_sll_epi64(__m128i __a, __m128i __count)\n" |
17341 | "{\n" |
17342 | " return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);\n" |
17343 | "}\n" |
17344 | "\n" |
17345 | "/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n" |
17346 | "/// by the specified number of bits. High-order bits are filled with the sign\n" |
17347 | "/// bit of the initial value.\n" |
17348 | "///\n" |
17349 | "/// \\headerfile <x86intrin.h>\n" |
17350 | "///\n" |
17351 | "/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n" |
17352 | "///\n" |
17353 | "/// \\param __a\n" |
17354 | "/// A 128-bit integer vector containing the source operand.\n" |
17355 | "/// \\param __count\n" |
17356 | "/// An integer value specifying the number of bits to right-shift each value\n" |
17357 | "/// in operand \\a __a.\n" |
17358 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17359 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17360 | "_mm_srai_epi16(__m128i __a, int __count)\n" |
17361 | "{\n" |
17362 | " return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);\n" |
17363 | "}\n" |
17364 | "\n" |
17365 | "/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n" |
17366 | "/// by the specified number of bits. High-order bits are filled with the sign\n" |
17367 | "/// bit of the initial value.\n" |
17368 | "///\n" |
17369 | "/// \\headerfile <x86intrin.h>\n" |
17370 | "///\n" |
17371 | "/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n" |
17372 | "///\n" |
17373 | "/// \\param __a\n" |
17374 | "/// A 128-bit integer vector containing the source operand.\n" |
17375 | "/// \\param __count\n" |
17376 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
17377 | "/// to right-shift each value in operand \\a __a.\n" |
17378 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17379 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17380 | "_mm_sra_epi16(__m128i __a, __m128i __count)\n" |
17381 | "{\n" |
17382 | " return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);\n" |
17383 | "}\n" |
17384 | "\n" |
17385 | "/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n" |
17386 | "/// by the specified number of bits. High-order bits are filled with the sign\n" |
17387 | "/// bit of the initial value.\n" |
17388 | "///\n" |
17389 | "/// \\headerfile <x86intrin.h>\n" |
17390 | "///\n" |
17391 | "/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n" |
17392 | "///\n" |
17393 | "/// \\param __a\n" |
17394 | "/// A 128-bit integer vector containing the source operand.\n" |
17395 | "/// \\param __count\n" |
17396 | "/// An integer value specifying the number of bits to right-shift each value\n" |
17397 | "/// in operand \\a __a.\n" |
17398 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17399 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17400 | "_mm_srai_epi32(__m128i __a, int __count)\n" |
17401 | "{\n" |
17402 | " return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);\n" |
17403 | "}\n" |
17404 | "\n" |
17405 | "/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n" |
17406 | "/// by the specified number of bits. High-order bits are filled with the sign\n" |
17407 | "/// bit of the initial value.\n" |
17408 | "///\n" |
17409 | "/// \\headerfile <x86intrin.h>\n" |
17410 | "///\n" |
17411 | "/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n" |
17412 | "///\n" |
17413 | "/// \\param __a\n" |
17414 | "/// A 128-bit integer vector containing the source operand.\n" |
17415 | "/// \\param __count\n" |
17416 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
17417 | "/// to right-shift each value in operand \\a __a.\n" |
17418 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17419 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17420 | "_mm_sra_epi32(__m128i __a, __m128i __count)\n" |
17421 | "{\n" |
17422 | " return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);\n" |
17423 | "}\n" |
17424 | "\n" |
17425 | "/// Right-shifts the 128-bit integer vector operand by the specified\n" |
17426 | "/// number of bytes. High-order bits are cleared.\n" |
17427 | "///\n" |
17428 | "/// \\headerfile <x86intrin.h>\n" |
17429 | "///\n" |
17430 | "/// \\code\n" |
17431 | "/// __m128i _mm_srli_si128(__m128i a, const int imm);\n" |
17432 | "/// \\endcode\n" |
17433 | "///\n" |
17434 | "/// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.\n" |
17435 | "///\n" |
17436 | "/// \\param a\n" |
17437 | "/// A 128-bit integer vector containing the source operand.\n" |
17438 | "/// \\param imm\n" |
17439 | "/// An immediate value specifying the number of bytes to right-shift operand\n" |
17440 | "/// \\a a.\n" |
17441 | "/// \\returns A 128-bit integer vector containing the right-shifted value.\n" |
17442 | "#define _mm_srli_si128(a, imm) \\\n" |
17443 | " (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n" |
17444 | "\n" |
17445 | "#define _mm_bsrli_si128(a, imm) \\\n" |
17446 | " (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n" |
17447 | "\n" |
17448 | "/// Right-shifts each of 16-bit values in the 128-bit integer vector\n" |
17449 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
17450 | "///\n" |
17451 | "/// \\headerfile <x86intrin.h>\n" |
17452 | "///\n" |
17453 | "/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n" |
17454 | "///\n" |
17455 | "/// \\param __a\n" |
17456 | "/// A 128-bit integer vector containing the source operand.\n" |
17457 | "/// \\param __count\n" |
17458 | "/// An integer value specifying the number of bits to right-shift each value\n" |
17459 | "/// in operand \\a __a.\n" |
17460 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17461 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17462 | "_mm_srli_epi16(__m128i __a, int __count)\n" |
17463 | "{\n" |
17464 | " return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);\n" |
17465 | "}\n" |
17466 | "\n" |
17467 | "/// Right-shifts each of 16-bit values in the 128-bit integer vector\n" |
17468 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
17469 | "///\n" |
17470 | "/// \\headerfile <x86intrin.h>\n" |
17471 | "///\n" |
17472 | "/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n" |
17473 | "///\n" |
17474 | "/// \\param __a\n" |
17475 | "/// A 128-bit integer vector containing the source operand.\n" |
17476 | "/// \\param __count\n" |
17477 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
17478 | "/// to right-shift each value in operand \\a __a.\n" |
17479 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17480 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17481 | "_mm_srl_epi16(__m128i __a, __m128i __count)\n" |
17482 | "{\n" |
17483 | " return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);\n" |
17484 | "}\n" |
17485 | "\n" |
17486 | "/// Right-shifts each of 32-bit values in the 128-bit integer vector\n" |
17487 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
17488 | "///\n" |
17489 | "/// \\headerfile <x86intrin.h>\n" |
17490 | "///\n" |
17491 | "/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n" |
17492 | "///\n" |
17493 | "/// \\param __a\n" |
17494 | "/// A 128-bit integer vector containing the source operand.\n" |
17495 | "/// \\param __count\n" |
17496 | "/// An integer value specifying the number of bits to right-shift each value\n" |
17497 | "/// in operand \\a __a.\n" |
17498 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17499 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17500 | "_mm_srli_epi32(__m128i __a, int __count)\n" |
17501 | "{\n" |
17502 | " return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);\n" |
17503 | "}\n" |
17504 | "\n" |
17505 | "/// Right-shifts each of 32-bit values in the 128-bit integer vector\n" |
17506 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
17507 | "///\n" |
17508 | "/// \\headerfile <x86intrin.h>\n" |
17509 | "///\n" |
17510 | "/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n" |
17511 | "///\n" |
17512 | "/// \\param __a\n" |
17513 | "/// A 128-bit integer vector containing the source operand.\n" |
17514 | "/// \\param __count\n" |
17515 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
17516 | "/// to right-shift each value in operand \\a __a.\n" |
17517 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17518 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17519 | "_mm_srl_epi32(__m128i __a, __m128i __count)\n" |
17520 | "{\n" |
17521 | " return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);\n" |
17522 | "}\n" |
17523 | "\n" |
17524 | "/// Right-shifts each of 64-bit values in the 128-bit integer vector\n" |
17525 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
17526 | "///\n" |
17527 | "/// \\headerfile <x86intrin.h>\n" |
17528 | "///\n" |
17529 | "/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n" |
17530 | "///\n" |
17531 | "/// \\param __a\n" |
17532 | "/// A 128-bit integer vector containing the source operand.\n" |
17533 | "/// \\param __count\n" |
17534 | "/// An integer value specifying the number of bits to right-shift each value\n" |
17535 | "/// in operand \\a __a.\n" |
17536 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17537 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17538 | "_mm_srli_epi64(__m128i __a, int __count)\n" |
17539 | "{\n" |
17540 | " return __builtin_ia32_psrlqi128((__v2di)__a, __count);\n" |
17541 | "}\n" |
17542 | "\n" |
17543 | "/// Right-shifts each of 64-bit values in the 128-bit integer vector\n" |
17544 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
17545 | "///\n" |
17546 | "/// \\headerfile <x86intrin.h>\n" |
17547 | "///\n" |
17548 | "/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n" |
17549 | "///\n" |
17550 | "/// \\param __a\n" |
17551 | "/// A 128-bit integer vector containing the source operand.\n" |
17552 | "/// \\param __count\n" |
17553 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
17554 | "/// to right-shift each value in operand \\a __a.\n" |
17555 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
17556 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17557 | "_mm_srl_epi64(__m128i __a, __m128i __count)\n" |
17558 | "{\n" |
17559 | " return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);\n" |
17560 | "}\n" |
17561 | "\n" |
17562 | "/// Compares each of the corresponding 8-bit values of the 128-bit\n" |
17563 | "/// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF\n" |
17564 | "/// for true.\n" |
17565 | "///\n" |
17566 | "/// \\headerfile <x86intrin.h>\n" |
17567 | "///\n" |
17568 | "/// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.\n" |
17569 | "///\n" |
17570 | "/// \\param __a\n" |
17571 | "/// A 128-bit integer vector.\n" |
17572 | "/// \\param __b\n" |
17573 | "/// A 128-bit integer vector.\n" |
17574 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
17575 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17576 | "_mm_cmpeq_epi8(__m128i __a, __m128i __b)\n" |
17577 | "{\n" |
17578 | " return (__m128i)((__v16qi)__a == (__v16qi)__b);\n" |
17579 | "}\n" |
17580 | "\n" |
17581 | "/// Compares each of the corresponding 16-bit values of the 128-bit\n" |
17582 | "/// integer vectors for equality. Each comparison yields 0x0 for false,\n" |
17583 | "/// 0xFFFF for true.\n" |
17584 | "///\n" |
17585 | "/// \\headerfile <x86intrin.h>\n" |
17586 | "///\n" |
17587 | "/// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.\n" |
17588 | "///\n" |
17589 | "/// \\param __a\n" |
17590 | "/// A 128-bit integer vector.\n" |
17591 | "/// \\param __b\n" |
17592 | "/// A 128-bit integer vector.\n" |
17593 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
17594 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17595 | "_mm_cmpeq_epi16(__m128i __a, __m128i __b)\n" |
17596 | "{\n" |
17597 | " return (__m128i)((__v8hi)__a == (__v8hi)__b);\n" |
17598 | "}\n" |
17599 | "\n" |
17600 | "/// Compares each of the corresponding 32-bit values of the 128-bit\n" |
17601 | "/// integer vectors for equality. Each comparison yields 0x0 for false,\n" |
17602 | "/// 0xFFFFFFFF for true.\n" |
17603 | "///\n" |
17604 | "/// \\headerfile <x86intrin.h>\n" |
17605 | "///\n" |
17606 | "/// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.\n" |
17607 | "///\n" |
17608 | "/// \\param __a\n" |
17609 | "/// A 128-bit integer vector.\n" |
17610 | "/// \\param __b\n" |
17611 | "/// A 128-bit integer vector.\n" |
17612 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
17613 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17614 | "_mm_cmpeq_epi32(__m128i __a, __m128i __b)\n" |
17615 | "{\n" |
17616 | " return (__m128i)((__v4si)__a == (__v4si)__b);\n" |
17617 | "}\n" |
17618 | "\n" |
17619 | "/// Compares each of the corresponding signed 8-bit values of the 128-bit\n" |
17620 | "/// integer vectors to determine if the values in the first operand are\n" |
17621 | "/// greater than those in the second operand. Each comparison yields 0x0 for\n" |
17622 | "/// false, 0xFF for true.\n" |
17623 | "///\n" |
17624 | "/// \\headerfile <x86intrin.h>\n" |
17625 | "///\n" |
17626 | "/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n" |
17627 | "///\n" |
17628 | "/// \\param __a\n" |
17629 | "/// A 128-bit integer vector.\n" |
17630 | "/// \\param __b\n" |
17631 | "/// A 128-bit integer vector.\n" |
17632 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
17633 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17634 | "_mm_cmpgt_epi8(__m128i __a, __m128i __b)\n" |
17635 | "{\n" |
17636 | " /* This function always performs a signed comparison, but __v16qi is a char\n" |
17637 | " which may be signed or unsigned, so use __v16qs. */\n" |
17638 | " return (__m128i)((__v16qs)__a > (__v16qs)__b);\n" |
17639 | "}\n" |
17640 | "\n" |
17641 | "/// Compares each of the corresponding signed 16-bit values of the\n" |
17642 | "/// 128-bit integer vectors to determine if the values in the first operand\n" |
17643 | "/// are greater than those in the second operand.\n" |
17644 | "///\n" |
17645 | "/// Each comparison yields 0x0 for false, 0xFFFF for true.\n" |
17646 | "///\n" |
17647 | "/// \\headerfile <x86intrin.h>\n" |
17648 | "///\n" |
17649 | "/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n" |
17650 | "///\n" |
17651 | "/// \\param __a\n" |
17652 | "/// A 128-bit integer vector.\n" |
17653 | "/// \\param __b\n" |
17654 | "/// A 128-bit integer vector.\n" |
17655 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
17656 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17657 | "_mm_cmpgt_epi16(__m128i __a, __m128i __b)\n" |
17658 | "{\n" |
17659 | " return (__m128i)((__v8hi)__a > (__v8hi)__b);\n" |
17660 | "}\n" |
17661 | "\n" |
17662 | "/// Compares each of the corresponding signed 32-bit values of the\n" |
17663 | "/// 128-bit integer vectors to determine if the values in the first operand\n" |
17664 | "/// are greater than those in the second operand.\n" |
17665 | "///\n" |
17666 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n" |
17667 | "///\n" |
17668 | "/// \\headerfile <x86intrin.h>\n" |
17669 | "///\n" |
17670 | "/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n" |
17671 | "///\n" |
17672 | "/// \\param __a\n" |
17673 | "/// A 128-bit integer vector.\n" |
17674 | "/// \\param __b\n" |
17675 | "/// A 128-bit integer vector.\n" |
17676 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
17677 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17678 | "_mm_cmpgt_epi32(__m128i __a, __m128i __b)\n" |
17679 | "{\n" |
17680 | " return (__m128i)((__v4si)__a > (__v4si)__b);\n" |
17681 | "}\n" |
17682 | "\n" |
17683 | "/// Compares each of the corresponding signed 8-bit values of the 128-bit\n" |
17684 | "/// integer vectors to determine if the values in the first operand are less\n" |
17685 | "/// than those in the second operand.\n" |
17686 | "///\n" |
17687 | "/// Each comparison yields 0x0 for false, 0xFF for true.\n" |
17688 | "///\n" |
17689 | "/// \\headerfile <x86intrin.h>\n" |
17690 | "///\n" |
17691 | "/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n" |
17692 | "///\n" |
17693 | "/// \\param __a\n" |
17694 | "/// A 128-bit integer vector.\n" |
17695 | "/// \\param __b\n" |
17696 | "/// A 128-bit integer vector.\n" |
17697 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
17698 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17699 | "_mm_cmplt_epi8(__m128i __a, __m128i __b)\n" |
17700 | "{\n" |
17701 | " return _mm_cmpgt_epi8(__b, __a);\n" |
17702 | "}\n" |
17703 | "\n" |
17704 | "/// Compares each of the corresponding signed 16-bit values of the\n" |
17705 | "/// 128-bit integer vectors to determine if the values in the first operand\n" |
17706 | "/// are less than those in the second operand.\n" |
17707 | "///\n" |
17708 | "/// Each comparison yields 0x0 for false, 0xFFFF for true.\n" |
17709 | "///\n" |
17710 | "/// \\headerfile <x86intrin.h>\n" |
17711 | "///\n" |
17712 | "/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n" |
17713 | "///\n" |
17714 | "/// \\param __a\n" |
17715 | "/// A 128-bit integer vector.\n" |
17716 | "/// \\param __b\n" |
17717 | "/// A 128-bit integer vector.\n" |
17718 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
17719 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17720 | "_mm_cmplt_epi16(__m128i __a, __m128i __b)\n" |
17721 | "{\n" |
17722 | " return _mm_cmpgt_epi16(__b, __a);\n" |
17723 | "}\n" |
17724 | "\n" |
17725 | "/// Compares each of the corresponding signed 32-bit values of the\n" |
17726 | "/// 128-bit integer vectors to determine if the values in the first operand\n" |
17727 | "/// are less than those in the second operand.\n" |
17728 | "///\n" |
17729 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n" |
17730 | "///\n" |
17731 | "/// \\headerfile <x86intrin.h>\n" |
17732 | "///\n" |
17733 | "/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n" |
17734 | "///\n" |
17735 | "/// \\param __a\n" |
17736 | "/// A 128-bit integer vector.\n" |
17737 | "/// \\param __b\n" |
17738 | "/// A 128-bit integer vector.\n" |
17739 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
17740 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17741 | "_mm_cmplt_epi32(__m128i __a, __m128i __b)\n" |
17742 | "{\n" |
17743 | " return _mm_cmpgt_epi32(__b, __a);\n" |
17744 | "}\n" |
17745 | "\n" |
17746 | "#ifdef __x86_64__\n" |
17747 | "/// Converts a 64-bit signed integer value from the second operand into a\n" |
17748 | "/// double-precision value and returns it in the lower element of a [2 x\n" |
17749 | "/// double] vector; the upper element of the returned vector is copied from\n" |
17750 | "/// the upper element of the first operand.\n" |
17751 | "///\n" |
17752 | "/// \\headerfile <x86intrin.h>\n" |
17753 | "///\n" |
17754 | "/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n" |
17755 | "///\n" |
17756 | "/// \\param __a\n" |
17757 | "/// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are\n" |
17758 | "/// copied to the upper 64 bits of the destination.\n" |
17759 | "/// \\param __b\n" |
17760 | "/// A 64-bit signed integer operand containing the value to be converted.\n" |
17761 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
17762 | "/// converted value of the second operand. The upper 64 bits are copied from\n" |
17763 | "/// the upper 64 bits of the first operand.\n" |
17764 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
17765 | "_mm_cvtsi64_sd(__m128d __a, long long __b)\n" |
17766 | "{\n" |
17767 | " __a[0] = __b;\n" |
17768 | " return __a;\n" |
17769 | "}\n" |
17770 | "\n" |
17771 | "/// Converts the first (lower) element of a vector of [2 x double] into a\n" |
17772 | "/// 64-bit signed integer value, according to the current rounding mode.\n" |
17773 | "///\n" |
17774 | "/// \\headerfile <x86intrin.h>\n" |
17775 | "///\n" |
17776 | "/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n" |
17777 | "///\n" |
17778 | "/// \\param __a\n" |
17779 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n" |
17780 | "/// conversion.\n" |
17781 | "/// \\returns A 64-bit signed integer containing the converted value.\n" |
17782 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
17783 | "_mm_cvtsd_si64(__m128d __a)\n" |
17784 | "{\n" |
17785 | " return __builtin_ia32_cvtsd2si64((__v2df)__a);\n" |
17786 | "}\n" |
17787 | "\n" |
17788 | "/// Converts the first (lower) element of a vector of [2 x double] into a\n" |
17789 | "/// 64-bit signed integer value, truncating the result when it is inexact.\n" |
17790 | "///\n" |
17791 | "/// \\headerfile <x86intrin.h>\n" |
17792 | "///\n" |
17793 | "/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n" |
17794 | "/// instruction.\n" |
17795 | "///\n" |
17796 | "/// \\param __a\n" |
17797 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n" |
17798 | "/// conversion.\n" |
17799 | "/// \\returns A 64-bit signed integer containing the converted value.\n" |
17800 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
17801 | "_mm_cvttsd_si64(__m128d __a)\n" |
17802 | "{\n" |
17803 | " return __builtin_ia32_cvttsd2si64((__v2df)__a);\n" |
17804 | "}\n" |
17805 | "#endif\n" |
17806 | "\n" |
17807 | "/// Converts a vector of [4 x i32] into a vector of [4 x float].\n" |
17808 | "///\n" |
17809 | "/// \\headerfile <x86intrin.h>\n" |
17810 | "///\n" |
17811 | "/// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.\n" |
17812 | "///\n" |
17813 | "/// \\param __a\n" |
17814 | "/// A 128-bit integer vector.\n" |
17815 | "/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n" |
17816 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
17817 | "_mm_cvtepi32_ps(__m128i __a)\n" |
17818 | "{\n" |
17819 | " return (__m128)__builtin_convertvector((__v4si)__a, __v4sf);\n" |
17820 | "}\n" |
17821 | "\n" |
17822 | "/// Converts a vector of [4 x float] into a vector of [4 x i32].\n" |
17823 | "///\n" |
17824 | "/// \\headerfile <x86intrin.h>\n" |
17825 | "///\n" |
17826 | "/// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.\n" |
17827 | "///\n" |
17828 | "/// \\param __a\n" |
17829 | "/// A 128-bit vector of [4 x float].\n" |
17830 | "/// \\returns A 128-bit integer vector of [4 x i32] containing the converted\n" |
17831 | "/// values.\n" |
17832 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17833 | "_mm_cvtps_epi32(__m128 __a)\n" |
17834 | "{\n" |
17835 | " return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);\n" |
17836 | "}\n" |
17837 | "\n" |
17838 | "/// Converts a vector of [4 x float] into a vector of [4 x i32],\n" |
17839 | "/// truncating the result when it is inexact.\n" |
17840 | "///\n" |
17841 | "/// \\headerfile <x86intrin.h>\n" |
17842 | "///\n" |
17843 | "/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>\n" |
17844 | "/// instruction.\n" |
17845 | "///\n" |
17846 | "/// \\param __a\n" |
17847 | "/// A 128-bit vector of [4 x float].\n" |
17848 | "/// \\returns A 128-bit vector of [4 x i32] containing the converted values.\n" |
17849 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17850 | "_mm_cvttps_epi32(__m128 __a)\n" |
17851 | "{\n" |
17852 | " return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);\n" |
17853 | "}\n" |
17854 | "\n" |
17855 | "/// Returns a vector of [4 x i32] where the lowest element is the input\n" |
17856 | "/// operand and the remaining elements are zero.\n" |
17857 | "///\n" |
17858 | "/// \\headerfile <x86intrin.h>\n" |
17859 | "///\n" |
17860 | "/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n" |
17861 | "///\n" |
17862 | "/// \\param __a\n" |
17863 | "/// A 32-bit signed integer operand.\n" |
17864 | "/// \\returns A 128-bit vector of [4 x i32].\n" |
17865 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17866 | "_mm_cvtsi32_si128(int __a)\n" |
17867 | "{\n" |
17868 | " return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 };\n" |
17869 | "}\n" |
17870 | "\n" |
17871 | "#ifdef __x86_64__\n" |
17872 | "/// Returns a vector of [2 x i64] where the lower element is the input\n" |
17873 | "/// operand and the upper element is zero.\n" |
17874 | "///\n" |
17875 | "/// \\headerfile <x86intrin.h>\n" |
17876 | "///\n" |
17877 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
17878 | "///\n" |
17879 | "/// \\param __a\n" |
17880 | "/// A 64-bit signed integer operand containing the value to be converted.\n" |
17881 | "/// \\returns A 128-bit vector of [2 x i64] containing the converted value.\n" |
17882 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17883 | "_mm_cvtsi64_si128(long long __a)\n" |
17884 | "{\n" |
17885 | " return __extension__ (__m128i)(__v2di){ __a, 0 };\n" |
17886 | "}\n" |
17887 | "#endif\n" |
17888 | "\n" |
17889 | "/// Moves the least significant 32 bits of a vector of [4 x i32] to a\n" |
17890 | "/// 32-bit signed integer value.\n" |
17891 | "///\n" |
17892 | "/// \\headerfile <x86intrin.h>\n" |
17893 | "///\n" |
17894 | "/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n" |
17895 | "///\n" |
17896 | "/// \\param __a\n" |
17897 | "/// A vector of [4 x i32]. The least significant 32 bits are moved to the\n" |
17898 | "/// destination.\n" |
17899 | "/// \\returns A 32-bit signed integer containing the moved value.\n" |
17900 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
17901 | "_mm_cvtsi128_si32(__m128i __a)\n" |
17902 | "{\n" |
17903 | " __v4si __b = (__v4si)__a;\n" |
17904 | " return __b[0];\n" |
17905 | "}\n" |
17906 | "\n" |
17907 | "#ifdef __x86_64__\n" |
17908 | "/// Moves the least significant 64 bits of a vector of [2 x i64] to a\n" |
17909 | "/// 64-bit signed integer value.\n" |
17910 | "///\n" |
17911 | "/// \\headerfile <x86intrin.h>\n" |
17912 | "///\n" |
17913 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
17914 | "///\n" |
17915 | "/// \\param __a\n" |
17916 | "/// A vector of [2 x i64]. The least significant 64 bits are moved to the\n" |
17917 | "/// destination.\n" |
17918 | "/// \\returns A 64-bit signed integer containing the moved value.\n" |
17919 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
17920 | "_mm_cvtsi128_si64(__m128i __a)\n" |
17921 | "{\n" |
17922 | " return __a[0];\n" |
17923 | "}\n" |
17924 | "#endif\n" |
17925 | "\n" |
17926 | "/// Moves packed integer values from an aligned 128-bit memory location\n" |
17927 | "/// to elements in a 128-bit integer vector.\n" |
17928 | "///\n" |
17929 | "/// \\headerfile <x86intrin.h>\n" |
17930 | "///\n" |
17931 | "/// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.\n" |
17932 | "///\n" |
17933 | "/// \\param __p\n" |
17934 | "/// An aligned pointer to a memory location containing integer values.\n" |
17935 | "/// \\returns A 128-bit integer vector containing the moved values.\n" |
17936 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17937 | "_mm_load_si128(__m128i const *__p)\n" |
17938 | "{\n" |
17939 | " return *__p;\n" |
17940 | "}\n" |
17941 | "\n" |
17942 | "/// Moves packed integer values from an unaligned 128-bit memory location\n" |
17943 | "/// to elements in a 128-bit integer vector.\n" |
17944 | "///\n" |
17945 | "/// \\headerfile <x86intrin.h>\n" |
17946 | "///\n" |
17947 | "/// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.\n" |
17948 | "///\n" |
17949 | "/// \\param __p\n" |
17950 | "/// A pointer to a memory location containing integer values.\n" |
17951 | "/// \\returns A 128-bit integer vector containing the moved values.\n" |
17952 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17953 | "_mm_loadu_si128(__m128i const *__p)\n" |
17954 | "{\n" |
17955 | " struct __loadu_si128 {\n" |
17956 | " __m128i __v;\n" |
17957 | " } __attribute__((__packed__, __may_alias__));\n" |
17958 | " return ((struct __loadu_si128*)__p)->__v;\n" |
17959 | "}\n" |
17960 | "\n" |
17961 | "/// Returns a vector of [2 x i64] where the lower element is taken from\n" |
17962 | "/// the lower element of the operand, and the upper element is zero.\n" |
17963 | "///\n" |
17964 | "/// \\headerfile <x86intrin.h>\n" |
17965 | "///\n" |
17966 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
17967 | "///\n" |
17968 | "/// \\param __p\n" |
17969 | "/// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of\n" |
17970 | "/// the destination.\n" |
17971 | "/// \\returns A 128-bit vector of [2 x i64]. The lower order bits contain the\n" |
17972 | "/// moved value. The higher order bits are cleared.\n" |
17973 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17974 | "_mm_loadl_epi64(__m128i const *__p)\n" |
17975 | "{\n" |
17976 | " struct __mm_loadl_epi64_struct {\n" |
17977 | " long long __u;\n" |
17978 | " } __attribute__((__packed__, __may_alias__));\n" |
17979 | " return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};\n" |
17980 | "}\n" |
17981 | "\n" |
17982 | "/// Generates a 128-bit vector of [4 x i32] with unspecified content.\n" |
17983 | "/// This could be used as an argument to another intrinsic function where the\n" |
17984 | "/// argument is required but the value is not actually used.\n" |
17985 | "///\n" |
17986 | "/// \\headerfile <x86intrin.h>\n" |
17987 | "///\n" |
17988 | "/// This intrinsic has no corresponding instruction.\n" |
17989 | "///\n" |
17990 | "/// \\returns A 128-bit vector of [4 x i32] with unspecified content.\n" |
17991 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
17992 | "_mm_undefined_si128(void)\n" |
17993 | "{\n" |
17994 | " return (__m128i)__builtin_ia32_undef128();\n" |
17995 | "}\n" |
17996 | "\n" |
17997 | "/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n" |
17998 | "/// the specified 64-bit integer values.\n" |
17999 | "///\n" |
18000 | "/// \\headerfile <x86intrin.h>\n" |
18001 | "///\n" |
18002 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18003 | "/// instruction.\n" |
18004 | "///\n" |
18005 | "/// \\param __q1\n" |
18006 | "/// A 64-bit integer value used to initialize the upper 64 bits of the\n" |
18007 | "/// destination vector of [2 x i64].\n" |
18008 | "/// \\param __q0\n" |
18009 | "/// A 64-bit integer value used to initialize the lower 64 bits of the\n" |
18010 | "/// destination vector of [2 x i64].\n" |
18011 | "/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n" |
18012 | "/// provided in the operands.\n" |
18013 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18014 | "_mm_set_epi64x(long long __q1, long long __q0)\n" |
18015 | "{\n" |
18016 | " return __extension__ (__m128i)(__v2di){ __q0, __q1 };\n" |
18017 | "}\n" |
18018 | "\n" |
18019 | "/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n" |
18020 | "/// the specified 64-bit integer values.\n" |
18021 | "///\n" |
18022 | "/// \\headerfile <x86intrin.h>\n" |
18023 | "///\n" |
18024 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18025 | "/// instruction.\n" |
18026 | "///\n" |
18027 | "/// \\param __q1\n" |
18028 | "/// A 64-bit integer value used to initialize the upper 64 bits of the\n" |
18029 | "/// destination vector of [2 x i64].\n" |
18030 | "/// \\param __q0\n" |
18031 | "/// A 64-bit integer value used to initialize the lower 64 bits of the\n" |
18032 | "/// destination vector of [2 x i64].\n" |
18033 | "/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n" |
18034 | "/// provided in the operands.\n" |
18035 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18036 | "_mm_set_epi64(__m64 __q1, __m64 __q0)\n" |
18037 | "{\n" |
18038 | " return _mm_set_epi64x((long long)__q1, (long long)__q0);\n" |
18039 | "}\n" |
18040 | "\n" |
18041 | "/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with\n" |
18042 | "/// the specified 32-bit integer values.\n" |
18043 | "///\n" |
18044 | "/// \\headerfile <x86intrin.h>\n" |
18045 | "///\n" |
18046 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18047 | "/// instruction.\n" |
18048 | "///\n" |
18049 | "/// \\param __i3\n" |
18050 | "/// A 32-bit integer value used to initialize bits [127:96] of the\n" |
18051 | "/// destination vector.\n" |
18052 | "/// \\param __i2\n" |
18053 | "/// A 32-bit integer value used to initialize bits [95:64] of the destination\n" |
18054 | "/// vector.\n" |
18055 | "/// \\param __i1\n" |
18056 | "/// A 32-bit integer value used to initialize bits [63:32] of the destination\n" |
18057 | "/// vector.\n" |
18058 | "/// \\param __i0\n" |
18059 | "/// A 32-bit integer value used to initialize bits [31:0] of the destination\n" |
18060 | "/// vector.\n" |
18061 | "/// \\returns An initialized 128-bit vector of [4 x i32] containing the values\n" |
18062 | "/// provided in the operands.\n" |
18063 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18064 | "_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)\n" |
18065 | "{\n" |
18066 | " return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3};\n" |
18067 | "}\n" |
18068 | "\n" |
18069 | "/// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with\n" |
18070 | "/// the specified 16-bit integer values.\n" |
18071 | "///\n" |
18072 | "/// \\headerfile <x86intrin.h>\n" |
18073 | "///\n" |
18074 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18075 | "/// instruction.\n" |
18076 | "///\n" |
18077 | "/// \\param __w7\n" |
18078 | "/// A 16-bit integer value used to initialize bits [127:112] of the\n" |
18079 | "/// destination vector.\n" |
18080 | "/// \\param __w6\n" |
18081 | "/// A 16-bit integer value used to initialize bits [111:96] of the\n" |
18082 | "/// destination vector.\n" |
18083 | "/// \\param __w5\n" |
18084 | "/// A 16-bit integer value used to initialize bits [95:80] of the destination\n" |
18085 | "/// vector.\n" |
18086 | "/// \\param __w4\n" |
18087 | "/// A 16-bit integer value used to initialize bits [79:64] of the destination\n" |
18088 | "/// vector.\n" |
18089 | "/// \\param __w3\n" |
18090 | "/// A 16-bit integer value used to initialize bits [63:48] of the destination\n" |
18091 | "/// vector.\n" |
18092 | "/// \\param __w2\n" |
18093 | "/// A 16-bit integer value used to initialize bits [47:32] of the destination\n" |
18094 | "/// vector.\n" |
18095 | "/// \\param __w1\n" |
18096 | "/// A 16-bit integer value used to initialize bits [31:16] of the destination\n" |
18097 | "/// vector.\n" |
18098 | "/// \\param __w0\n" |
18099 | "/// A 16-bit integer value used to initialize bits [15:0] of the destination\n" |
18100 | "/// vector.\n" |
18101 | "/// \\returns An initialized 128-bit vector of [8 x i16] containing the values\n" |
18102 | "/// provided in the operands.\n" |
18103 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18104 | "_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)\n" |
18105 | "{\n" |
18106 | " return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };\n" |
18107 | "}\n" |
18108 | "\n" |
18109 | "/// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with\n" |
18110 | "/// the specified 8-bit integer values.\n" |
18111 | "///\n" |
18112 | "/// \\headerfile <x86intrin.h>\n" |
18113 | "///\n" |
18114 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18115 | "/// instruction.\n" |
18116 | "///\n" |
18117 | "/// \\param __b15\n" |
18118 | "/// Initializes bits [127:120] of the destination vector.\n" |
18119 | "/// \\param __b14\n" |
18120 | "/// Initializes bits [119:112] of the destination vector.\n" |
18121 | "/// \\param __b13\n" |
18122 | "/// Initializes bits [111:104] of the destination vector.\n" |
18123 | "/// \\param __b12\n" |
18124 | "/// Initializes bits [103:96] of the destination vector.\n" |
18125 | "/// \\param __b11\n" |
18126 | "/// Initializes bits [95:88] of the destination vector.\n" |
18127 | "/// \\param __b10\n" |
18128 | "/// Initializes bits [87:80] of the destination vector.\n" |
18129 | "/// \\param __b9\n" |
18130 | "/// Initializes bits [79:72] of the destination vector.\n" |
18131 | "/// \\param __b8\n" |
18132 | "/// Initializes bits [71:64] of the destination vector.\n" |
18133 | "/// \\param __b7\n" |
18134 | "/// Initializes bits [63:56] of the destination vector.\n" |
18135 | "/// \\param __b6\n" |
18136 | "/// Initializes bits [55:48] of the destination vector.\n" |
18137 | "/// \\param __b5\n" |
18138 | "/// Initializes bits [47:40] of the destination vector.\n" |
18139 | "/// \\param __b4\n" |
18140 | "/// Initializes bits [39:32] of the destination vector.\n" |
18141 | "/// \\param __b3\n" |
18142 | "/// Initializes bits [31:24] of the destination vector.\n" |
18143 | "/// \\param __b2\n" |
18144 | "/// Initializes bits [23:16] of the destination vector.\n" |
18145 | "/// \\param __b1\n" |
18146 | "/// Initializes bits [15:8] of the destination vector.\n" |
18147 | "/// \\param __b0\n" |
18148 | "/// Initializes bits [7:0] of the destination vector.\n" |
18149 | "/// \\returns An initialized 128-bit vector of [16 x i8] containing the values\n" |
18150 | "/// provided in the operands.\n" |
18151 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18152 | "_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)\n" |
18153 | "{\n" |
18154 | " return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };\n" |
18155 | "}\n" |
18156 | "\n" |
18157 | "/// Initializes both values in a 128-bit integer vector with the\n" |
18158 | "/// specified 64-bit integer value.\n" |
18159 | "///\n" |
18160 | "/// \\headerfile <x86intrin.h>\n" |
18161 | "///\n" |
18162 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18163 | "/// instruction.\n" |
18164 | "///\n" |
18165 | "/// \\param __q\n" |
18166 | "/// Integer value used to initialize the elements of the destination integer\n" |
18167 | "/// vector.\n" |
18168 | "/// \\returns An initialized 128-bit integer vector of [2 x i64] with both\n" |
18169 | "/// elements containing the value provided in the operand.\n" |
18170 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18171 | "_mm_set1_epi64x(long long __q)\n" |
18172 | "{\n" |
18173 | " return _mm_set_epi64x(__q, __q);\n" |
18174 | "}\n" |
18175 | "\n" |
18176 | "/// Initializes both values in a 128-bit vector of [2 x i64] with the\n" |
18177 | "/// specified 64-bit value.\n" |
18178 | "///\n" |
18179 | "/// \\headerfile <x86intrin.h>\n" |
18180 | "///\n" |
18181 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18182 | "/// instruction.\n" |
18183 | "///\n" |
18184 | "/// \\param __q\n" |
18185 | "/// A 64-bit value used to initialize the elements of the destination integer\n" |
18186 | "/// vector.\n" |
18187 | "/// \\returns An initialized 128-bit vector of [2 x i64] with all elements\n" |
18188 | "/// containing the value provided in the operand.\n" |
18189 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18190 | "_mm_set1_epi64(__m64 __q)\n" |
18191 | "{\n" |
18192 | " return _mm_set_epi64(__q, __q);\n" |
18193 | "}\n" |
18194 | "\n" |
18195 | "/// Initializes all values in a 128-bit vector of [4 x i32] with the\n" |
18196 | "/// specified 32-bit value.\n" |
18197 | "///\n" |
18198 | "/// \\headerfile <x86intrin.h>\n" |
18199 | "///\n" |
18200 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18201 | "/// instruction.\n" |
18202 | "///\n" |
18203 | "/// \\param __i\n" |
18204 | "/// A 32-bit value used to initialize the elements of the destination integer\n" |
18205 | "/// vector.\n" |
18206 | "/// \\returns An initialized 128-bit vector of [4 x i32] with all elements\n" |
18207 | "/// containing the value provided in the operand.\n" |
18208 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18209 | "_mm_set1_epi32(int __i)\n" |
18210 | "{\n" |
18211 | " return _mm_set_epi32(__i, __i, __i, __i);\n" |
18212 | "}\n" |
18213 | "\n" |
18214 | "/// Initializes all values in a 128-bit vector of [8 x i16] with the\n" |
18215 | "/// specified 16-bit value.\n" |
18216 | "///\n" |
18217 | "/// \\headerfile <x86intrin.h>\n" |
18218 | "///\n" |
18219 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18220 | "/// instruction.\n" |
18221 | "///\n" |
18222 | "/// \\param __w\n" |
18223 | "/// A 16-bit value used to initialize the elements of the destination integer\n" |
18224 | "/// vector.\n" |
18225 | "/// \\returns An initialized 128-bit vector of [8 x i16] with all elements\n" |
18226 | "/// containing the value provided in the operand.\n" |
18227 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18228 | "_mm_set1_epi16(short __w)\n" |
18229 | "{\n" |
18230 | " return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);\n" |
18231 | "}\n" |
18232 | "\n" |
18233 | "/// Initializes all values in a 128-bit vector of [16 x i8] with the\n" |
18234 | "/// specified 8-bit value.\n" |
18235 | "///\n" |
18236 | "/// \\headerfile <x86intrin.h>\n" |
18237 | "///\n" |
18238 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18239 | "/// instruction.\n" |
18240 | "///\n" |
18241 | "/// \\param __b\n" |
18242 | "/// An 8-bit value used to initialize the elements of the destination integer\n" |
18243 | "/// vector.\n" |
18244 | "/// \\returns An initialized 128-bit vector of [16 x i8] with all elements\n" |
18245 | "/// containing the value provided in the operand.\n" |
18246 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18247 | "_mm_set1_epi8(char __b)\n" |
18248 | "{\n" |
18249 | " return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b);\n" |
18250 | "}\n" |
18251 | "\n" |
18252 | "/// Constructs a 128-bit integer vector, initialized in reverse order\n" |
18253 | "/// with the specified 64-bit integral values.\n" |
18254 | "///\n" |
18255 | "/// \\headerfile <x86intrin.h>\n" |
18256 | "///\n" |
18257 | "/// This intrinsic does not correspond to a specific instruction.\n" |
18258 | "///\n" |
18259 | "/// \\param __q0\n" |
18260 | "/// A 64-bit integral value used to initialize the lower 64 bits of the\n" |
18261 | "/// result.\n" |
18262 | "/// \\param __q1\n" |
18263 | "/// A 64-bit integral value used to initialize the upper 64 bits of the\n" |
18264 | "/// result.\n" |
18265 | "/// \\returns An initialized 128-bit integer vector.\n" |
18266 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18267 | "_mm_setr_epi64(__m64 __q0, __m64 __q1)\n" |
18268 | "{\n" |
18269 | " return _mm_set_epi64(__q1, __q0);\n" |
18270 | "}\n" |
18271 | "\n" |
18272 | "/// Constructs a 128-bit integer vector, initialized in reverse order\n" |
18273 | "/// with the specified 32-bit integral values.\n" |
18274 | "///\n" |
18275 | "/// \\headerfile <x86intrin.h>\n" |
18276 | "///\n" |
18277 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18278 | "/// instruction.\n" |
18279 | "///\n" |
18280 | "/// \\param __i0\n" |
18281 | "/// A 32-bit integral value used to initialize bits [31:0] of the result.\n" |
18282 | "/// \\param __i1\n" |
18283 | "/// A 32-bit integral value used to initialize bits [63:32] of the result.\n" |
18284 | "/// \\param __i2\n" |
18285 | "/// A 32-bit integral value used to initialize bits [95:64] of the result.\n" |
18286 | "/// \\param __i3\n" |
18287 | "/// A 32-bit integral value used to initialize bits [127:96] of the result.\n" |
18288 | "/// \\returns An initialized 128-bit integer vector.\n" |
18289 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18290 | "_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)\n" |
18291 | "{\n" |
18292 | " return _mm_set_epi32(__i3, __i2, __i1, __i0);\n" |
18293 | "}\n" |
18294 | "\n" |
18295 | "/// Constructs a 128-bit integer vector, initialized in reverse order\n" |
18296 | "/// with the specified 16-bit integral values.\n" |
18297 | "///\n" |
18298 | "/// \\headerfile <x86intrin.h>\n" |
18299 | "///\n" |
18300 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18301 | "/// instruction.\n" |
18302 | "///\n" |
18303 | "/// \\param __w0\n" |
18304 | "/// A 16-bit integral value used to initialize bits [15:0] of the result.\n" |
18305 | "/// \\param __w1\n" |
18306 | "/// A 16-bit integral value used to initialize bits [31:16] of the result.\n" |
18307 | "/// \\param __w2\n" |
18308 | "/// A 16-bit integral value used to initialize bits [47:32] of the result.\n" |
18309 | "/// \\param __w3\n" |
18310 | "/// A 16-bit integral value used to initialize bits [63:48] of the result.\n" |
18311 | "/// \\param __w4\n" |
18312 | "/// A 16-bit integral value used to initialize bits [79:64] of the result.\n" |
18313 | "/// \\param __w5\n" |
18314 | "/// A 16-bit integral value used to initialize bits [95:80] of the result.\n" |
18315 | "/// \\param __w6\n" |
18316 | "/// A 16-bit integral value used to initialize bits [111:96] of the result.\n" |
18317 | "/// \\param __w7\n" |
18318 | "/// A 16-bit integral value used to initialize bits [127:112] of the result.\n" |
18319 | "/// \\returns An initialized 128-bit integer vector.\n" |
18320 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18321 | "_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)\n" |
18322 | "{\n" |
18323 | " return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);\n" |
18324 | "}\n" |
18325 | "\n" |
18326 | "/// Constructs a 128-bit integer vector, initialized in reverse order\n" |
18327 | "/// with the specified 8-bit integral values.\n" |
18328 | "///\n" |
18329 | "/// \\headerfile <x86intrin.h>\n" |
18330 | "///\n" |
18331 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
18332 | "/// instruction.\n" |
18333 | "///\n" |
18334 | "/// \\param __b0\n" |
18335 | "/// An 8-bit integral value used to initialize bits [7:0] of the result.\n" |
18336 | "/// \\param __b1\n" |
18337 | "/// An 8-bit integral value used to initialize bits [15:8] of the result.\n" |
18338 | "/// \\param __b2\n" |
18339 | "/// An 8-bit integral value used to initialize bits [23:16] of the result.\n" |
18340 | "/// \\param __b3\n" |
18341 | "/// An 8-bit integral value used to initialize bits [31:24] of the result.\n" |
18342 | "/// \\param __b4\n" |
18343 | "/// An 8-bit integral value used to initialize bits [39:32] of the result.\n" |
18344 | "/// \\param __b5\n" |
18345 | "/// An 8-bit integral value used to initialize bits [47:40] of the result.\n" |
18346 | "/// \\param __b6\n" |
18347 | "/// An 8-bit integral value used to initialize bits [55:48] of the result.\n" |
18348 | "/// \\param __b7\n" |
18349 | "/// An 8-bit integral value used to initialize bits [63:56] of the result.\n" |
18350 | "/// \\param __b8\n" |
18351 | "/// An 8-bit integral value used to initialize bits [71:64] of the result.\n" |
18352 | "/// \\param __b9\n" |
18353 | "/// An 8-bit integral value used to initialize bits [79:72] of the result.\n" |
18354 | "/// \\param __b10\n" |
18355 | "/// An 8-bit integral value used to initialize bits [87:80] of the result.\n" |
18356 | "/// \\param __b11\n" |
18357 | "/// An 8-bit integral value used to initialize bits [95:88] of the result.\n" |
18358 | "/// \\param __b12\n" |
18359 | "/// An 8-bit integral value used to initialize bits [103:96] of the result.\n" |
18360 | "/// \\param __b13\n" |
18361 | "/// An 8-bit integral value used to initialize bits [111:104] of the result.\n" |
18362 | "/// \\param __b14\n" |
18363 | "/// An 8-bit integral value used to initialize bits [119:112] of the result.\n" |
18364 | "/// \\param __b15\n" |
18365 | "/// An 8-bit integral value used to initialize bits [127:120] of the result.\n" |
18366 | "/// \\returns An initialized 128-bit integer vector.\n" |
18367 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18368 | "_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)\n" |
18369 | "{\n" |
18370 | " return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n" |
18371 | "}\n" |
18372 | "\n" |
18373 | "/// Creates a 128-bit integer vector initialized to zero.\n" |
18374 | "///\n" |
18375 | "/// \\headerfile <x86intrin.h>\n" |
18376 | "///\n" |
18377 | "/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n" |
18378 | "///\n" |
18379 | "/// \\returns An initialized 128-bit integer vector with all elements set to\n" |
18380 | "/// zero.\n" |
18381 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18382 | "_mm_setzero_si128(void)\n" |
18383 | "{\n" |
18384 | " return __extension__ (__m128i)(__v2di){ 0LL, 0LL };\n" |
18385 | "}\n" |
18386 | "\n" |
18387 | "/// Stores a 128-bit integer vector to a memory location aligned on a\n" |
18388 | "/// 128-bit boundary.\n" |
18389 | "///\n" |
18390 | "/// \\headerfile <x86intrin.h>\n" |
18391 | "///\n" |
18392 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n" |
18393 | "///\n" |
18394 | "/// \\param __p\n" |
18395 | "/// A pointer to an aligned memory location that will receive the integer\n" |
18396 | "/// values.\n" |
18397 | "/// \\param __b\n" |
18398 | "/// A 128-bit integer vector containing the values to be moved.\n" |
18399 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
18400 | "_mm_store_si128(__m128i *__p, __m128i __b)\n" |
18401 | "{\n" |
18402 | " *__p = __b;\n" |
18403 | "}\n" |
18404 | "\n" |
18405 | "/// Stores a 128-bit integer vector to an unaligned memory location.\n" |
18406 | "///\n" |
18407 | "/// \\headerfile <x86intrin.h>\n" |
18408 | "///\n" |
18409 | "/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n" |
18410 | "///\n" |
18411 | "/// \\param __p\n" |
18412 | "/// A pointer to a memory location that will receive the integer values.\n" |
18413 | "/// \\param __b\n" |
18414 | "/// A 128-bit integer vector containing the values to be moved.\n" |
18415 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
18416 | "_mm_storeu_si128(__m128i *__p, __m128i __b)\n" |
18417 | "{\n" |
18418 | " struct __storeu_si128 {\n" |
18419 | " __m128i __v;\n" |
18420 | " } __attribute__((__packed__, __may_alias__));\n" |
18421 | " ((struct __storeu_si128*)__p)->__v = __b;\n" |
18422 | "}\n" |
18423 | "\n" |
18424 | "/// Stores a 64-bit integer value from the low element of a 128-bit integer\n" |
18425 | "/// vector.\n" |
18426 | "///\n" |
18427 | "/// \\headerfile <x86intrin.h>\n" |
18428 | "///\n" |
18429 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
18430 | "///\n" |
18431 | "/// \\param __p\n" |
18432 | "/// A pointer to a 64-bit memory location. The address of the memory\n" |
18433 | "/// location does not have to be algned.\n" |
18434 | "/// \\param __b\n" |
18435 | "/// A 128-bit integer vector containing the value to be stored.\n" |
18436 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
18437 | "_mm_storeu_si64(void const *__p, __m128i __b)\n" |
18438 | "{\n" |
18439 | " struct __storeu_si64 {\n" |
18440 | " long long __v;\n" |
18441 | " } __attribute__((__packed__, __may_alias__));\n" |
18442 | " ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];\n" |
18443 | "}\n" |
18444 | "\n" |
18445 | "/// Stores a 32-bit integer value from the low element of a 128-bit integer\n" |
18446 | "/// vector.\n" |
18447 | "///\n" |
18448 | "/// \\headerfile <x86intrin.h>\n" |
18449 | "///\n" |
18450 | "/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n" |
18451 | "///\n" |
18452 | "/// \\param __p\n" |
18453 | "/// A pointer to a 32-bit memory location. The address of the memory\n" |
18454 | "/// location does not have to be aligned.\n" |
18455 | "/// \\param __b\n" |
18456 | "/// A 128-bit integer vector containing the value to be stored.\n" |
18457 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
18458 | "_mm_storeu_si32(void const *__p, __m128i __b)\n" |
18459 | "{\n" |
18460 | " struct __storeu_si32 {\n" |
18461 | " int __v;\n" |
18462 | " } __attribute__((__packed__, __may_alias__));\n" |
18463 | " ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];\n" |
18464 | "}\n" |
18465 | "\n" |
18466 | "/// Stores a 16-bit integer value from the low element of a 128-bit integer\n" |
18467 | "/// vector.\n" |
18468 | "///\n" |
18469 | "/// \\headerfile <x86intrin.h>\n" |
18470 | "///\n" |
18471 | "/// This intrinsic does not correspond to a specific instruction.\n" |
18472 | "///\n" |
18473 | "/// \\param __p\n" |
18474 | "/// A pointer to a 16-bit memory location. The address of the memory\n" |
18475 | "/// location does not have to be aligned.\n" |
18476 | "/// \\param __b\n" |
18477 | "/// A 128-bit integer vector containing the value to be stored.\n" |
18478 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
18479 | "_mm_storeu_si16(void const *__p, __m128i __b)\n" |
18480 | "{\n" |
18481 | " struct __storeu_si16 {\n" |
18482 | " short __v;\n" |
18483 | " } __attribute__((__packed__, __may_alias__));\n" |
18484 | " ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];\n" |
18485 | "}\n" |
18486 | "\n" |
18487 | "/// Moves bytes selected by the mask from the first operand to the\n" |
18488 | "/// specified unaligned memory location. When a mask bit is 1, the\n" |
18489 | "/// corresponding byte is written, otherwise it is not written.\n" |
18490 | "///\n" |
18491 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
18492 | "/// used again soon). Exception and trap behavior for elements not selected\n" |
18493 | "/// for storage to memory are implementation dependent.\n" |
18494 | "///\n" |
18495 | "/// \\headerfile <x86intrin.h>\n" |
18496 | "///\n" |
18497 | "/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>\n" |
18498 | "/// instruction.\n" |
18499 | "///\n" |
18500 | "/// \\param __d\n" |
18501 | "/// A 128-bit integer vector containing the values to be moved.\n" |
18502 | "/// \\param __n\n" |
18503 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
18504 | "/// each byte represents the mask bits.\n" |
18505 | "/// \\param __p\n" |
18506 | "/// A pointer to an unaligned 128-bit memory location where the specified\n" |
18507 | "/// values are moved.\n" |
18508 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
18509 | "_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)\n" |
18510 | "{\n" |
18511 | " __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);\n" |
18512 | "}\n" |
18513 | "\n" |
18514 | "/// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to\n" |
18515 | "/// a memory location.\n" |
18516 | "///\n" |
18517 | "/// \\headerfile <x86intrin.h>\n" |
18518 | "///\n" |
18519 | "/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n" |
18520 | "///\n" |
18521 | "/// \\param __p\n" |
18522 | "/// A pointer to a 64-bit memory location that will receive the lower 64 bits\n" |
18523 | "/// of the integer vector parameter.\n" |
18524 | "/// \\param __a\n" |
18525 | "/// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the\n" |
18526 | "/// value to be stored.\n" |
18527 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
18528 | "_mm_storel_epi64(__m128i *__p, __m128i __a)\n" |
18529 | "{\n" |
18530 | " struct __mm_storel_epi64_struct {\n" |
18531 | " long long __u;\n" |
18532 | " } __attribute__((__packed__, __may_alias__));\n" |
18533 | " ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];\n" |
18534 | "}\n" |
18535 | "\n" |
18536 | "/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit\n" |
18537 | "/// aligned memory location.\n" |
18538 | "///\n" |
18539 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
18540 | "/// used again soon).\n" |
18541 | "///\n" |
18542 | "/// \\headerfile <x86intrin.h>\n" |
18543 | "///\n" |
18544 | "/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n" |
18545 | "///\n" |
18546 | "/// \\param __p\n" |
18547 | "/// A pointer to the 128-bit aligned memory location used to store the value.\n" |
18548 | "/// \\param __a\n" |
18549 | "/// A vector of [2 x double] containing the 64-bit values to be stored.\n" |
18550 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
18551 | "_mm_stream_pd(double *__p, __m128d __a)\n" |
18552 | "{\n" |
18553 | " __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);\n" |
18554 | "}\n" |
18555 | "\n" |
18556 | "/// Stores a 128-bit integer vector to a 128-bit aligned memory location.\n" |
18557 | "///\n" |
18558 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
18559 | "/// used again soon).\n" |
18560 | "///\n" |
18561 | "/// \\headerfile <x86intrin.h>\n" |
18562 | "///\n" |
18563 | "/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n" |
18564 | "///\n" |
18565 | "/// \\param __p\n" |
18566 | "/// A pointer to the 128-bit aligned memory location used to store the value.\n" |
18567 | "/// \\param __a\n" |
18568 | "/// A 128-bit integer vector containing the values to be stored.\n" |
18569 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
18570 | "_mm_stream_si128(__m128i *__p, __m128i __a)\n" |
18571 | "{\n" |
18572 | " __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);\n" |
18573 | "}\n" |
18574 | "\n" |
18575 | "/// Stores a 32-bit integer value in the specified memory location.\n" |
18576 | "///\n" |
18577 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
18578 | "/// used again soon).\n" |
18579 | "///\n" |
18580 | "/// \\headerfile <x86intrin.h>\n" |
18581 | "///\n" |
18582 | "/// This intrinsic corresponds to the <c> MOVNTI </c> instruction.\n" |
18583 | "///\n" |
18584 | "/// \\param __p\n" |
18585 | "/// A pointer to the 32-bit memory location used to store the value.\n" |
18586 | "/// \\param __a\n" |
18587 | "/// A 32-bit integer containing the value to be stored.\n" |
18588 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n" |
18589 | "_mm_stream_si32(int *__p, int __a)\n" |
18590 | "{\n" |
18591 | " __builtin_ia32_movnti(__p, __a);\n" |
18592 | "}\n" |
18593 | "\n" |
18594 | "#ifdef __x86_64__\n" |
18595 | "/// Stores a 64-bit integer value in the specified memory location.\n" |
18596 | "///\n" |
18597 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
18598 | "/// used again soon).\n" |
18599 | "///\n" |
18600 | "/// \\headerfile <x86intrin.h>\n" |
18601 | "///\n" |
18602 | "/// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.\n" |
18603 | "///\n" |
18604 | "/// \\param __p\n" |
18605 | "/// A pointer to the 64-bit memory location used to store the value.\n" |
18606 | "/// \\param __a\n" |
18607 | "/// A 64-bit integer containing the value to be stored.\n" |
18608 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n" |
18609 | "_mm_stream_si64(long long *__p, long long __a)\n" |
18610 | "{\n" |
18611 | " __builtin_ia32_movnti64(__p, __a);\n" |
18612 | "}\n" |
18613 | "#endif\n" |
18614 | "\n" |
18615 | "#if defined(__cplusplus)\n" |
18616 | "extern \"C\" {\n" |
18617 | "#endif\n" |
18618 | "\n" |
18619 | "/// The cache line containing \\a __p is flushed and invalidated from all\n" |
18620 | "/// caches in the coherency domain.\n" |
18621 | "///\n" |
18622 | "/// \\headerfile <x86intrin.h>\n" |
18623 | "///\n" |
18624 | "/// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.\n" |
18625 | "///\n" |
18626 | "/// \\param __p\n" |
18627 | "/// A pointer to the memory location used to identify the cache line to be\n" |
18628 | "/// flushed.\n" |
18629 | "void _mm_clflush(void const * __p);\n" |
18630 | "\n" |
18631 | "/// Forces strong memory ordering (serialization) between load\n" |
18632 | "/// instructions preceding this instruction and load instructions following\n" |
18633 | "/// this instruction, ensuring the system completes all previous loads before\n" |
18634 | "/// executing subsequent loads.\n" |
18635 | "///\n" |
18636 | "/// \\headerfile <x86intrin.h>\n" |
18637 | "///\n" |
18638 | "/// This intrinsic corresponds to the <c> LFENCE </c> instruction.\n" |
18639 | "///\n" |
18640 | "void _mm_lfence(void);\n" |
18641 | "\n" |
18642 | "/// Forces strong memory ordering (serialization) between load and store\n" |
18643 | "/// instructions preceding this instruction and load and store instructions\n" |
18644 | "/// following this instruction, ensuring that the system completes all\n" |
18645 | "/// previous memory accesses before executing subsequent memory accesses.\n" |
18646 | "///\n" |
18647 | "/// \\headerfile <x86intrin.h>\n" |
18648 | "///\n" |
18649 | "/// This intrinsic corresponds to the <c> MFENCE </c> instruction.\n" |
18650 | "///\n" |
18651 | "void _mm_mfence(void);\n" |
18652 | "\n" |
18653 | "#if defined(__cplusplus)\n" |
18654 | "} // extern \"C\"\n" |
18655 | "#endif\n" |
18656 | "\n" |
18657 | "/// Converts 16-bit signed integers from both 128-bit integer vector\n" |
18658 | "/// operands into 8-bit signed integers, and packs the results into the\n" |
18659 | "/// destination. Positive values greater than 0x7F are saturated to 0x7F.\n" |
18660 | "/// Negative values less than 0x80 are saturated to 0x80.\n" |
18661 | "///\n" |
18662 | "/// \\headerfile <x86intrin.h>\n" |
18663 | "///\n" |
18664 | "/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.\n" |
18665 | "///\n" |
18666 | "/// \\param __a\n" |
18667 | "/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n" |
18668 | "/// a signed integer and is converted to a 8-bit signed integer with\n" |
18669 | "/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n" |
18670 | "/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n" |
18671 | "/// written to the lower 64 bits of the result.\n" |
18672 | "/// \\param __b\n" |
18673 | "/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n" |
18674 | "/// a signed integer and is converted to a 8-bit signed integer with\n" |
18675 | "/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n" |
18676 | "/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n" |
18677 | "/// written to the higher 64 bits of the result.\n" |
18678 | "/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n" |
18679 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18680 | "_mm_packs_epi16(__m128i __a, __m128i __b)\n" |
18681 | "{\n" |
18682 | " return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);\n" |
18683 | "}\n" |
18684 | "\n" |
18685 | "/// Converts 32-bit signed integers from both 128-bit integer vector\n" |
18686 | "/// operands into 16-bit signed integers, and packs the results into the\n" |
18687 | "/// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n" |
18688 | "/// Negative values less than 0x8000 are saturated to 0x8000.\n" |
18689 | "///\n" |
18690 | "/// \\headerfile <x86intrin.h>\n" |
18691 | "///\n" |
18692 | "/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.\n" |
18693 | "///\n" |
18694 | "/// \\param __a\n" |
18695 | "/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n" |
18696 | "/// a signed integer and is converted to a 16-bit signed integer with\n" |
18697 | "/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n" |
18698 | "/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n" |
18699 | "/// are written to the lower 64 bits of the result.\n" |
18700 | "/// \\param __b\n" |
18701 | "/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n" |
18702 | "/// a signed integer and is converted to a 16-bit signed integer with\n" |
18703 | "/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n" |
18704 | "/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n" |
18705 | "/// are written to the higher 64 bits of the result.\n" |
18706 | "/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n" |
18707 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18708 | "_mm_packs_epi32(__m128i __a, __m128i __b)\n" |
18709 | "{\n" |
18710 | " return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);\n" |
18711 | "}\n" |
18712 | "\n" |
18713 | "/// Converts 16-bit signed integers from both 128-bit integer vector\n" |
18714 | "/// operands into 8-bit unsigned integers, and packs the results into the\n" |
18715 | "/// destination. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
18716 | "/// than 0x00 are saturated to 0x00.\n" |
18717 | "///\n" |
18718 | "/// \\headerfile <x86intrin.h>\n" |
18719 | "///\n" |
18720 | "/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.\n" |
18721 | "///\n" |
18722 | "/// \\param __a\n" |
18723 | "/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n" |
18724 | "/// a signed integer and is converted to an 8-bit unsigned integer with\n" |
18725 | "/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
18726 | "/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n" |
18727 | "/// written to the lower 64 bits of the result.\n" |
18728 | "/// \\param __b\n" |
18729 | "/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n" |
18730 | "/// a signed integer and is converted to an 8-bit unsigned integer with\n" |
18731 | "/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
18732 | "/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n" |
18733 | "/// written to the higher 64 bits of the result.\n" |
18734 | "/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n" |
18735 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18736 | "_mm_packus_epi16(__m128i __a, __m128i __b)\n" |
18737 | "{\n" |
18738 | " return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);\n" |
18739 | "}\n" |
18740 | "\n" |
18741 | "/// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using\n" |
18742 | "/// the immediate-value parameter as a selector.\n" |
18743 | "///\n" |
18744 | "/// \\headerfile <x86intrin.h>\n" |
18745 | "///\n" |
18746 | "/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n" |
18747 | "///\n" |
18748 | "/// \\param __a\n" |
18749 | "/// A 128-bit integer vector.\n" |
18750 | "/// \\param __imm\n" |
18751 | "/// An immediate value. Bits [2:0] selects values from \\a __a to be assigned\n" |
18752 | "/// to bits[15:0] of the result. \\n\n" |
18753 | "/// 000: assign values from bits [15:0] of \\a __a. \\n\n" |
18754 | "/// 001: assign values from bits [31:16] of \\a __a. \\n\n" |
18755 | "/// 010: assign values from bits [47:32] of \\a __a. \\n\n" |
18756 | "/// 011: assign values from bits [63:48] of \\a __a. \\n\n" |
18757 | "/// 100: assign values from bits [79:64] of \\a __a. \\n\n" |
18758 | "/// 101: assign values from bits [95:80] of \\a __a. \\n\n" |
18759 | "/// 110: assign values from bits [111:96] of \\a __a. \\n\n" |
18760 | "/// 111: assign values from bits [127:112] of \\a __a.\n" |
18761 | "/// \\returns An integer, whose lower 16 bits are selected from the 128-bit\n" |
18762 | "/// integer vector parameter and the remaining bits are assigned zeros.\n" |
18763 | "#define _mm_extract_epi16(a, imm) \\\n" |
18764 | " (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \\\n" |
18765 | " (int)(imm))\n" |
18766 | "\n" |
18767 | "/// Constructs a 128-bit integer vector by first making a copy of the\n" |
18768 | "/// 128-bit integer vector parameter, and then inserting the lower 16 bits\n" |
18769 | "/// of an integer parameter into an offset specified by the immediate-value\n" |
18770 | "/// parameter.\n" |
18771 | "///\n" |
18772 | "/// \\headerfile <x86intrin.h>\n" |
18773 | "///\n" |
18774 | "/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.\n" |
18775 | "///\n" |
18776 | "/// \\param __a\n" |
18777 | "/// A 128-bit integer vector of [8 x i16]. This vector is copied to the\n" |
18778 | "/// result and then one of the eight elements in the result is replaced by\n" |
18779 | "/// the lower 16 bits of \\a __b.\n" |
18780 | "/// \\param __b\n" |
18781 | "/// An integer. The lower 16 bits of this parameter are written to the\n" |
18782 | "/// result beginning at an offset specified by \\a __imm.\n" |
18783 | "/// \\param __imm\n" |
18784 | "/// An immediate value specifying the bit offset in the result at which the\n" |
18785 | "/// lower 16 bits of \\a __b are written.\n" |
18786 | "/// \\returns A 128-bit integer vector containing the constructed values.\n" |
18787 | "#define _mm_insert_epi16(a, b, imm) \\\n" |
18788 | " (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \\\n" |
18789 | " (int)(imm))\n" |
18790 | "\n" |
18791 | "/// Copies the values of the most significant bits from each 8-bit\n" |
18792 | "/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask\n" |
18793 | "/// value, zero-extends the value, and writes it to the destination.\n" |
18794 | "///\n" |
18795 | "/// \\headerfile <x86intrin.h>\n" |
18796 | "///\n" |
18797 | "/// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.\n" |
18798 | "///\n" |
18799 | "/// \\param __a\n" |
18800 | "/// A 128-bit integer vector containing the values with bits to be extracted.\n" |
18801 | "/// \\returns The most significant bits from each 8-bit element in \\a __a,\n" |
18802 | "/// written to bits [15:0]. The other bits are assigned zeros.\n" |
18803 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
18804 | "_mm_movemask_epi8(__m128i __a)\n" |
18805 | "{\n" |
18806 | " return __builtin_ia32_pmovmskb128((__v16qi)__a);\n" |
18807 | "}\n" |
18808 | "\n" |
18809 | "/// Constructs a 128-bit integer vector by shuffling four 32-bit\n" |
18810 | "/// elements of a 128-bit integer vector parameter, using the immediate-value\n" |
18811 | "/// parameter as a specifier.\n" |
18812 | "///\n" |
18813 | "/// \\headerfile <x86intrin.h>\n" |
18814 | "///\n" |
18815 | "/// \\code\n" |
18816 | "/// __m128i _mm_shuffle_epi32(__m128i a, const int imm);\n" |
18817 | "/// \\endcode\n" |
18818 | "///\n" |
18819 | "/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.\n" |
18820 | "///\n" |
18821 | "/// \\param a\n" |
18822 | "/// A 128-bit integer vector containing the values to be copied.\n" |
18823 | "/// \\param imm\n" |
18824 | "/// An immediate value containing an 8-bit value specifying which elements to\n" |
18825 | "/// copy from a. The destinations within the 128-bit destination are assigned\n" |
18826 | "/// values as follows: \\n\n" |
18827 | "/// Bits [1:0] are used to assign values to bits [31:0] of the result. \\n\n" |
18828 | "/// Bits [3:2] are used to assign values to bits [63:32] of the result. \\n\n" |
18829 | "/// Bits [5:4] are used to assign values to bits [95:64] of the result. \\n\n" |
18830 | "/// Bits [7:6] are used to assign values to bits [127:96] of the result. \\n\n" |
18831 | "/// Bit value assignments: \\n\n" |
18832 | "/// 00: assign values from bits [31:0] of \\a a. \\n\n" |
18833 | "/// 01: assign values from bits [63:32] of \\a a. \\n\n" |
18834 | "/// 10: assign values from bits [95:64] of \\a a. \\n\n" |
18835 | "/// 11: assign values from bits [127:96] of \\a a.\n" |
18836 | "/// \\returns A 128-bit integer vector containing the shuffled values.\n" |
18837 | "#define _mm_shuffle_epi32(a, imm) \\\n" |
18838 | " (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))\n" |
18839 | "\n" |
18840 | "/// Constructs a 128-bit integer vector by shuffling four lower 16-bit\n" |
18841 | "/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n" |
18842 | "/// value parameter as a specifier.\n" |
18843 | "///\n" |
18844 | "/// \\headerfile <x86intrin.h>\n" |
18845 | "///\n" |
18846 | "/// \\code\n" |
18847 | "/// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);\n" |
18848 | "/// \\endcode\n" |
18849 | "///\n" |
18850 | "/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.\n" |
18851 | "///\n" |
18852 | "/// \\param a\n" |
18853 | "/// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits\n" |
18854 | "/// [127:64] of the result.\n" |
18855 | "/// \\param imm\n" |
18856 | "/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n" |
18857 | "/// Bits[1:0] are used to assign values to bits [15:0] of the result. \\n\n" |
18858 | "/// Bits[3:2] are used to assign values to bits [31:16] of the result. \\n\n" |
18859 | "/// Bits[5:4] are used to assign values to bits [47:32] of the result. \\n\n" |
18860 | "/// Bits[7:6] are used to assign values to bits [63:48] of the result. \\n\n" |
18861 | "/// Bit value assignments: \\n\n" |
18862 | "/// 00: assign values from bits [15:0] of \\a a. \\n\n" |
18863 | "/// 01: assign values from bits [31:16] of \\a a. \\n\n" |
18864 | "/// 10: assign values from bits [47:32] of \\a a. \\n\n" |
18865 | "/// 11: assign values from bits [63:48] of \\a a. \\n\n" |
18866 | "/// \\returns A 128-bit integer vector containing the shuffled values.\n" |
18867 | "#define _mm_shufflelo_epi16(a, imm) \\\n" |
18868 | " (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))\n" |
18869 | "\n" |
18870 | "/// Constructs a 128-bit integer vector by shuffling four upper 16-bit\n" |
18871 | "/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n" |
18872 | "/// value parameter as a specifier.\n" |
18873 | "///\n" |
18874 | "/// \\headerfile <x86intrin.h>\n" |
18875 | "///\n" |
18876 | "/// \\code\n" |
18877 | "/// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);\n" |
18878 | "/// \\endcode\n" |
18879 | "///\n" |
18880 | "/// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.\n" |
18881 | "///\n" |
18882 | "/// \\param a\n" |
18883 | "/// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits\n" |
18884 | "/// [63:0] of the result.\n" |
18885 | "/// \\param imm\n" |
18886 | "/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n" |
18887 | "/// Bits[1:0] are used to assign values to bits [79:64] of the result. \\n\n" |
18888 | "/// Bits[3:2] are used to assign values to bits [95:80] of the result. \\n\n" |
18889 | "/// Bits[5:4] are used to assign values to bits [111:96] of the result. \\n\n" |
18890 | "/// Bits[7:6] are used to assign values to bits [127:112] of the result. \\n\n" |
18891 | "/// Bit value assignments: \\n\n" |
18892 | "/// 00: assign values from bits [79:64] of \\a a. \\n\n" |
18893 | "/// 01: assign values from bits [95:80] of \\a a. \\n\n" |
18894 | "/// 10: assign values from bits [111:96] of \\a a. \\n\n" |
18895 | "/// 11: assign values from bits [127:112] of \\a a. \\n\n" |
18896 | "/// \\returns A 128-bit integer vector containing the shuffled values.\n" |
18897 | "#define _mm_shufflehi_epi16(a, imm) \\\n" |
18898 | " (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))\n" |
18899 | "\n" |
18900 | "/// Unpacks the high-order (index 8-15) values from two 128-bit vectors\n" |
18901 | "/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n" |
18902 | "///\n" |
18903 | "/// \\headerfile <x86intrin.h>\n" |
18904 | "///\n" |
18905 | "/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>\n" |
18906 | "/// instruction.\n" |
18907 | "///\n" |
18908 | "/// \\param __a\n" |
18909 | "/// A 128-bit vector of [16 x i8].\n" |
18910 | "/// Bits [71:64] are written to bits [7:0] of the result. \\n\n" |
18911 | "/// Bits [79:72] are written to bits [23:16] of the result. \\n\n" |
18912 | "/// Bits [87:80] are written to bits [39:32] of the result. \\n\n" |
18913 | "/// Bits [95:88] are written to bits [55:48] of the result. \\n\n" |
18914 | "/// Bits [103:96] are written to bits [71:64] of the result. \\n\n" |
18915 | "/// Bits [111:104] are written to bits [87:80] of the result. \\n\n" |
18916 | "/// Bits [119:112] are written to bits [103:96] of the result. \\n\n" |
18917 | "/// Bits [127:120] are written to bits [119:112] of the result.\n" |
18918 | "/// \\param __b\n" |
18919 | "/// A 128-bit vector of [16 x i8]. \\n\n" |
18920 | "/// Bits [71:64] are written to bits [15:8] of the result. \\n\n" |
18921 | "/// Bits [79:72] are written to bits [31:24] of the result. \\n\n" |
18922 | "/// Bits [87:80] are written to bits [47:40] of the result. \\n\n" |
18923 | "/// Bits [95:88] are written to bits [63:56] of the result. \\n\n" |
18924 | "/// Bits [103:96] are written to bits [79:72] of the result. \\n\n" |
18925 | "/// Bits [111:104] are written to bits [95:88] of the result. \\n\n" |
18926 | "/// Bits [119:112] are written to bits [111:104] of the result. \\n\n" |
18927 | "/// Bits [127:120] are written to bits [127:120] of the result.\n" |
18928 | "/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n" |
18929 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18930 | "_mm_unpackhi_epi8(__m128i __a, __m128i __b)\n" |
18931 | "{\n" |
18932 | " return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n" |
18933 | "}\n" |
18934 | "\n" |
18935 | "/// Unpacks the high-order (index 4-7) values from two 128-bit vectors of\n" |
18936 | "/// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].\n" |
18937 | "///\n" |
18938 | "/// \\headerfile <x86intrin.h>\n" |
18939 | "///\n" |
18940 | "/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>\n" |
18941 | "/// instruction.\n" |
18942 | "///\n" |
18943 | "/// \\param __a\n" |
18944 | "/// A 128-bit vector of [8 x i16].\n" |
18945 | "/// Bits [79:64] are written to bits [15:0] of the result. \\n\n" |
18946 | "/// Bits [95:80] are written to bits [47:32] of the result. \\n\n" |
18947 | "/// Bits [111:96] are written to bits [79:64] of the result. \\n\n" |
18948 | "/// Bits [127:112] are written to bits [111:96] of the result.\n" |
18949 | "/// \\param __b\n" |
18950 | "/// A 128-bit vector of [8 x i16].\n" |
18951 | "/// Bits [79:64] are written to bits [31:16] of the result. \\n\n" |
18952 | "/// Bits [95:80] are written to bits [63:48] of the result. \\n\n" |
18953 | "/// Bits [111:96] are written to bits [95:80] of the result. \\n\n" |
18954 | "/// Bits [127:112] are written to bits [127:112] of the result.\n" |
18955 | "/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n" |
18956 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18957 | "_mm_unpackhi_epi16(__m128i __a, __m128i __b)\n" |
18958 | "{\n" |
18959 | " return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);\n" |
18960 | "}\n" |
18961 | "\n" |
18962 | "/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n" |
18963 | "/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n" |
18964 | "///\n" |
18965 | "/// \\headerfile <x86intrin.h>\n" |
18966 | "///\n" |
18967 | "/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>\n" |
18968 | "/// instruction.\n" |
18969 | "///\n" |
18970 | "/// \\param __a\n" |
18971 | "/// A 128-bit vector of [4 x i32]. \\n\n" |
18972 | "/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n" |
18973 | "/// Bits [127:96] are written to bits [95:64] of the destination.\n" |
18974 | "/// \\param __b\n" |
18975 | "/// A 128-bit vector of [4 x i32]. \\n\n" |
18976 | "/// Bits [95:64] are written to bits [64:32] of the destination. \\n\n" |
18977 | "/// Bits [127:96] are written to bits [127:96] of the destination.\n" |
18978 | "/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n" |
18979 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
18980 | "_mm_unpackhi_epi32(__m128i __a, __m128i __b)\n" |
18981 | "{\n" |
18982 | " return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);\n" |
18983 | "}\n" |
18984 | "\n" |
18985 | "/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n" |
18986 | "/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n" |
18987 | "///\n" |
18988 | "/// \\headerfile <x86intrin.h>\n" |
18989 | "///\n" |
18990 | "/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>\n" |
18991 | "/// instruction.\n" |
18992 | "///\n" |
18993 | "/// \\param __a\n" |
18994 | "/// A 128-bit vector of [2 x i64]. \\n\n" |
18995 | "/// Bits [127:64] are written to bits [63:0] of the destination.\n" |
18996 | "/// \\param __b\n" |
18997 | "/// A 128-bit vector of [2 x i64]. \\n\n" |
18998 | "/// Bits [127:64] are written to bits [127:64] of the destination.\n" |
18999 | "/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n" |
19000 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
19001 | "_mm_unpackhi_epi64(__m128i __a, __m128i __b)\n" |
19002 | "{\n" |
19003 | " return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);\n" |
19004 | "}\n" |
19005 | "\n" |
19006 | "/// Unpacks the low-order (index 0-7) values from two 128-bit vectors of\n" |
19007 | "/// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n" |
19008 | "///\n" |
19009 | "/// \\headerfile <x86intrin.h>\n" |
19010 | "///\n" |
19011 | "/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>\n" |
19012 | "/// instruction.\n" |
19013 | "///\n" |
19014 | "/// \\param __a\n" |
19015 | "/// A 128-bit vector of [16 x i8]. \\n\n" |
19016 | "/// Bits [7:0] are written to bits [7:0] of the result. \\n\n" |
19017 | "/// Bits [15:8] are written to bits [23:16] of the result. \\n\n" |
19018 | "/// Bits [23:16] are written to bits [39:32] of the result. \\n\n" |
19019 | "/// Bits [31:24] are written to bits [55:48] of the result. \\n\n" |
19020 | "/// Bits [39:32] are written to bits [71:64] of the result. \\n\n" |
19021 | "/// Bits [47:40] are written to bits [87:80] of the result. \\n\n" |
19022 | "/// Bits [55:48] are written to bits [103:96] of the result. \\n\n" |
19023 | "/// Bits [63:56] are written to bits [119:112] of the result.\n" |
19024 | "/// \\param __b\n" |
19025 | "/// A 128-bit vector of [16 x i8].\n" |
19026 | "/// Bits [7:0] are written to bits [15:8] of the result. \\n\n" |
19027 | "/// Bits [15:8] are written to bits [31:24] of the result. \\n\n" |
19028 | "/// Bits [23:16] are written to bits [47:40] of the result. \\n\n" |
19029 | "/// Bits [31:24] are written to bits [63:56] of the result. \\n\n" |
19030 | "/// Bits [39:32] are written to bits [79:72] of the result. \\n\n" |
19031 | "/// Bits [47:40] are written to bits [95:88] of the result. \\n\n" |
19032 | "/// Bits [55:48] are written to bits [111:104] of the result. \\n\n" |
19033 | "/// Bits [63:56] are written to bits [127:120] of the result.\n" |
19034 | "/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n" |
19035 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
19036 | "_mm_unpacklo_epi8(__m128i __a, __m128i __b)\n" |
19037 | "{\n" |
19038 | " return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);\n" |
19039 | "}\n" |
19040 | "\n" |
19041 | "/// Unpacks the low-order (index 0-3) values from each of the two 128-bit\n" |
19042 | "/// vectors of [8 x i16] and interleaves them into a 128-bit vector of\n" |
19043 | "/// [8 x i16].\n" |
19044 | "///\n" |
19045 | "/// \\headerfile <x86intrin.h>\n" |
19046 | "///\n" |
19047 | "/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>\n" |
19048 | "/// instruction.\n" |
19049 | "///\n" |
19050 | "/// \\param __a\n" |
19051 | "/// A 128-bit vector of [8 x i16].\n" |
19052 | "/// Bits [15:0] are written to bits [15:0] of the result. \\n\n" |
19053 | "/// Bits [31:16] are written to bits [47:32] of the result. \\n\n" |
19054 | "/// Bits [47:32] are written to bits [79:64] of the result. \\n\n" |
19055 | "/// Bits [63:48] are written to bits [111:96] of the result.\n" |
19056 | "/// \\param __b\n" |
19057 | "/// A 128-bit vector of [8 x i16].\n" |
19058 | "/// Bits [15:0] are written to bits [31:16] of the result. \\n\n" |
19059 | "/// Bits [31:16] are written to bits [63:48] of the result. \\n\n" |
19060 | "/// Bits [47:32] are written to bits [95:80] of the result. \\n\n" |
19061 | "/// Bits [63:48] are written to bits [127:112] of the result.\n" |
19062 | "/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n" |
19063 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
19064 | "_mm_unpacklo_epi16(__m128i __a, __m128i __b)\n" |
19065 | "{\n" |
19066 | " return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);\n" |
19067 | "}\n" |
19068 | "\n" |
19069 | "/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n" |
19070 | "/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n" |
19071 | "///\n" |
19072 | "/// \\headerfile <x86intrin.h>\n" |
19073 | "///\n" |
19074 | "/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>\n" |
19075 | "/// instruction.\n" |
19076 | "///\n" |
19077 | "/// \\param __a\n" |
19078 | "/// A 128-bit vector of [4 x i32]. \\n\n" |
19079 | "/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n" |
19080 | "/// Bits [63:32] are written to bits [95:64] of the destination.\n" |
19081 | "/// \\param __b\n" |
19082 | "/// A 128-bit vector of [4 x i32]. \\n\n" |
19083 | "/// Bits [31:0] are written to bits [64:32] of the destination. \\n\n" |
19084 | "/// Bits [63:32] are written to bits [127:96] of the destination.\n" |
19085 | "/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n" |
19086 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
19087 | "_mm_unpacklo_epi32(__m128i __a, __m128i __b)\n" |
19088 | "{\n" |
19089 | " return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);\n" |
19090 | "}\n" |
19091 | "\n" |
19092 | "/// Unpacks the low-order 64-bit elements from two 128-bit vectors of\n" |
19093 | "/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n" |
19094 | "///\n" |
19095 | "/// \\headerfile <x86intrin.h>\n" |
19096 | "///\n" |
19097 | "/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>\n" |
19098 | "/// instruction.\n" |
19099 | "///\n" |
19100 | "/// \\param __a\n" |
19101 | "/// A 128-bit vector of [2 x i64]. \\n\n" |
19102 | "/// Bits [63:0] are written to bits [63:0] of the destination. \\n\n" |
19103 | "/// \\param __b\n" |
19104 | "/// A 128-bit vector of [2 x i64]. \\n\n" |
19105 | "/// Bits [63:0] are written to bits [127:64] of the destination. \\n\n" |
19106 | "/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n" |
19107 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
19108 | "_mm_unpacklo_epi64(__m128i __a, __m128i __b)\n" |
19109 | "{\n" |
19110 | " return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);\n" |
19111 | "}\n" |
19112 | "\n" |
19113 | "/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit\n" |
19114 | "/// integer.\n" |
19115 | "///\n" |
19116 | "/// \\headerfile <x86intrin.h>\n" |
19117 | "///\n" |
19118 | "/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.\n" |
19119 | "///\n" |
19120 | "/// \\param __a\n" |
19121 | "/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n" |
19122 | "/// destination.\n" |
19123 | "/// \\returns A 64-bit integer containing the lower 64 bits of the parameter.\n" |
19124 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
19125 | "_mm_movepi64_pi64(__m128i __a)\n" |
19126 | "{\n" |
19127 | " return (__m64)__a[0];\n" |
19128 | "}\n" |
19129 | "\n" |
19130 | "/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the\n" |
19131 | "/// upper bits.\n" |
19132 | "///\n" |
19133 | "/// \\headerfile <x86intrin.h>\n" |
19134 | "///\n" |
19135 | "/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.\n" |
19136 | "///\n" |
19137 | "/// \\param __a\n" |
19138 | "/// A 64-bit value.\n" |
19139 | "/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n" |
19140 | "/// the operand. The upper 64 bits are assigned zeros.\n" |
19141 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
19142 | "_mm_movpi64_epi64(__m64 __a)\n" |
19143 | "{\n" |
19144 | " return __extension__ (__m128i)(__v2di){ (long long)__a, 0 };\n" |
19145 | "}\n" |
19146 | "\n" |
19147 | "/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit\n" |
19148 | "/// integer vector, zeroing the upper bits.\n" |
19149 | "///\n" |
19150 | "/// \\headerfile <x86intrin.h>\n" |
19151 | "///\n" |
19152 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
19153 | "///\n" |
19154 | "/// \\param __a\n" |
19155 | "/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n" |
19156 | "/// destination.\n" |
19157 | "/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n" |
19158 | "/// the operand. The upper 64 bits are assigned zeros.\n" |
19159 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
19160 | "_mm_move_epi64(__m128i __a)\n" |
19161 | "{\n" |
19162 | " return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);\n" |
19163 | "}\n" |
19164 | "\n" |
19165 | "/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n" |
19166 | "/// [2 x double] and interleaves them into a 128-bit vector of [2 x\n" |
19167 | "/// double].\n" |
19168 | "///\n" |
19169 | "/// \\headerfile <x86intrin.h>\n" |
19170 | "///\n" |
19171 | "/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n" |
19172 | "///\n" |
19173 | "/// \\param __a\n" |
19174 | "/// A 128-bit vector of [2 x double]. \\n\n" |
19175 | "/// Bits [127:64] are written to bits [63:0] of the destination.\n" |
19176 | "/// \\param __b\n" |
19177 | "/// A 128-bit vector of [2 x double]. \\n\n" |
19178 | "/// Bits [127:64] are written to bits [127:64] of the destination.\n" |
19179 | "/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n" |
19180 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
19181 | "_mm_unpackhi_pd(__m128d __a, __m128d __b)\n" |
19182 | "{\n" |
19183 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);\n" |
19184 | "}\n" |
19185 | "\n" |
19186 | "/// Unpacks the low-order 64-bit elements from two 128-bit vectors\n" |
19187 | "/// of [2 x double] and interleaves them into a 128-bit vector of [2 x\n" |
19188 | "/// double].\n" |
19189 | "///\n" |
19190 | "/// \\headerfile <x86intrin.h>\n" |
19191 | "///\n" |
19192 | "/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n" |
19193 | "///\n" |
19194 | "/// \\param __a\n" |
19195 | "/// A 128-bit vector of [2 x double]. \\n\n" |
19196 | "/// Bits [63:0] are written to bits [63:0] of the destination.\n" |
19197 | "/// \\param __b\n" |
19198 | "/// A 128-bit vector of [2 x double]. \\n\n" |
19199 | "/// Bits [63:0] are written to bits [127:64] of the destination.\n" |
19200 | "/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n" |
19201 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
19202 | "_mm_unpacklo_pd(__m128d __a, __m128d __b)\n" |
19203 | "{\n" |
19204 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);\n" |
19205 | "}\n" |
19206 | "\n" |
19207 | "/// Extracts the sign bits of the double-precision values in the 128-bit\n" |
19208 | "/// vector of [2 x double], zero-extends the value, and writes it to the\n" |
19209 | "/// low-order bits of the destination.\n" |
19210 | "///\n" |
19211 | "/// \\headerfile <x86intrin.h>\n" |
19212 | "///\n" |
19213 | "/// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.\n" |
19214 | "///\n" |
19215 | "/// \\param __a\n" |
19216 | "/// A 128-bit vector of [2 x double] containing the values with sign bits to\n" |
19217 | "/// be extracted.\n" |
19218 | "/// \\returns The sign bits from each of the double-precision elements in \\a __a,\n" |
19219 | "/// written to bits [1:0]. The remaining bits are assigned values of zero.\n" |
19220 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
19221 | "_mm_movemask_pd(__m128d __a)\n" |
19222 | "{\n" |
19223 | " return __builtin_ia32_movmskpd((__v2df)__a);\n" |
19224 | "}\n" |
19225 | "\n" |
19226 | "\n" |
19227 | "/// Constructs a 128-bit floating-point vector of [2 x double] from two\n" |
19228 | "/// 128-bit vector parameters of [2 x double], using the immediate-value\n" |
19229 | "/// parameter as a specifier.\n" |
19230 | "///\n" |
19231 | "/// \\headerfile <x86intrin.h>\n" |
19232 | "///\n" |
19233 | "/// \\code\n" |
19234 | "/// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);\n" |
19235 | "/// \\endcode\n" |
19236 | "///\n" |
19237 | "/// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.\n" |
19238 | "///\n" |
19239 | "/// \\param a\n" |
19240 | "/// A 128-bit vector of [2 x double].\n" |
19241 | "/// \\param b\n" |
19242 | "/// A 128-bit vector of [2 x double].\n" |
19243 | "/// \\param i\n" |
19244 | "/// An 8-bit immediate value. The least significant two bits specify which\n" |
19245 | "/// elements to copy from \\a a and \\a b: \\n\n" |
19246 | "/// Bit[0] = 0: lower element of \\a a copied to lower element of result. \\n\n" |
19247 | "/// Bit[0] = 1: upper element of \\a a copied to lower element of result. \\n\n" |
19248 | "/// Bit[1] = 0: lower element of \\a b copied to upper element of result. \\n\n" |
19249 | "/// Bit[1] = 1: upper element of \\a b copied to upper element of result. \\n\n" |
19250 | "/// \\returns A 128-bit vector of [2 x double] containing the shuffled values.\n" |
19251 | "#define _mm_shuffle_pd(a, b, i) \\\n" |
19252 | " (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \\\n" |
19253 | " (int)(i))\n" |
19254 | "\n" |
19255 | "/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n" |
19256 | "/// floating-point vector of [4 x float].\n" |
19257 | "///\n" |
19258 | "/// \\headerfile <x86intrin.h>\n" |
19259 | "///\n" |
19260 | "/// This intrinsic has no corresponding instruction.\n" |
19261 | "///\n" |
19262 | "/// \\param __a\n" |
19263 | "/// A 128-bit floating-point vector of [2 x double].\n" |
19264 | "/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n" |
19265 | "/// bitwise pattern as the parameter.\n" |
19266 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
19267 | "_mm_castpd_ps(__m128d __a)\n" |
19268 | "{\n" |
19269 | " return (__m128)__a;\n" |
19270 | "}\n" |
19271 | "\n" |
19272 | "/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n" |
19273 | "/// integer vector.\n" |
19274 | "///\n" |
19275 | "/// \\headerfile <x86intrin.h>\n" |
19276 | "///\n" |
19277 | "/// This intrinsic has no corresponding instruction.\n" |
19278 | "///\n" |
19279 | "/// \\param __a\n" |
19280 | "/// A 128-bit floating-point vector of [2 x double].\n" |
19281 | "/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n" |
19282 | "/// parameter.\n" |
19283 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
19284 | "_mm_castpd_si128(__m128d __a)\n" |
19285 | "{\n" |
19286 | " return (__m128i)__a;\n" |
19287 | "}\n" |
19288 | "\n" |
19289 | "/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n" |
19290 | "/// floating-point vector of [2 x double].\n" |
19291 | "///\n" |
19292 | "/// \\headerfile <x86intrin.h>\n" |
19293 | "///\n" |
19294 | "/// This intrinsic has no corresponding instruction.\n" |
19295 | "///\n" |
19296 | "/// \\param __a\n" |
19297 | "/// A 128-bit floating-point vector of [4 x float].\n" |
19298 | "/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n" |
19299 | "/// bitwise pattern as the parameter.\n" |
19300 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
19301 | "_mm_castps_pd(__m128 __a)\n" |
19302 | "{\n" |
19303 | " return (__m128d)__a;\n" |
19304 | "}\n" |
19305 | "\n" |
19306 | "/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n" |
19307 | "/// integer vector.\n" |
19308 | "///\n" |
19309 | "/// \\headerfile <x86intrin.h>\n" |
19310 | "///\n" |
19311 | "/// This intrinsic has no corresponding instruction.\n" |
19312 | "///\n" |
19313 | "/// \\param __a\n" |
19314 | "/// A 128-bit floating-point vector of [4 x float].\n" |
19315 | "/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n" |
19316 | "/// parameter.\n" |
19317 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
19318 | "_mm_castps_si128(__m128 __a)\n" |
19319 | "{\n" |
19320 | " return (__m128i)__a;\n" |
19321 | "}\n" |
19322 | "\n" |
19323 | "/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n" |
19324 | "/// of [4 x float].\n" |
19325 | "///\n" |
19326 | "/// \\headerfile <x86intrin.h>\n" |
19327 | "///\n" |
19328 | "/// This intrinsic has no corresponding instruction.\n" |
19329 | "///\n" |
19330 | "/// \\param __a\n" |
19331 | "/// A 128-bit integer vector.\n" |
19332 | "/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n" |
19333 | "/// bitwise pattern as the parameter.\n" |
19334 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
19335 | "_mm_castsi128_ps(__m128i __a)\n" |
19336 | "{\n" |
19337 | " return (__m128)__a;\n" |
19338 | "}\n" |
19339 | "\n" |
19340 | "/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n" |
19341 | "/// of [2 x double].\n" |
19342 | "///\n" |
19343 | "/// \\headerfile <x86intrin.h>\n" |
19344 | "///\n" |
19345 | "/// This intrinsic has no corresponding instruction.\n" |
19346 | "///\n" |
19347 | "/// \\param __a\n" |
19348 | "/// A 128-bit integer vector.\n" |
19349 | "/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n" |
19350 | "/// bitwise pattern as the parameter.\n" |
19351 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
19352 | "_mm_castsi128_pd(__m128i __a)\n" |
19353 | "{\n" |
19354 | " return (__m128d)__a;\n" |
19355 | "}\n" |
19356 | "\n" |
19357 | "#if defined(__cplusplus)\n" |
19358 | "extern \"C\" {\n" |
19359 | "#endif\n" |
19360 | "\n" |
19361 | "/// Indicates that a spin loop is being executed for the purposes of\n" |
19362 | "/// optimizing power consumption during the loop.\n" |
19363 | "///\n" |
19364 | "/// \\headerfile <x86intrin.h>\n" |
19365 | "///\n" |
19366 | "/// This intrinsic corresponds to the <c> PAUSE </c> instruction.\n" |
19367 | "///\n" |
19368 | "void _mm_pause(void);\n" |
19369 | "\n" |
19370 | "#if defined(__cplusplus)\n" |
19371 | "} // extern \"C\"\n" |
19372 | "#endif\n" |
19373 | "#undef __DEFAULT_FN_ATTRS\n" |
19374 | "#undef __DEFAULT_FN_ATTRS_MMX\n" |
19375 | "\n" |
19376 | "#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))\n" |
19377 | "\n" |
19378 | "#define _MM_DENORMALS_ZERO_ON (0x0040)\n" |
19379 | "#define _MM_DENORMALS_ZERO_OFF (0x0000)\n" |
19380 | "\n" |
19381 | "#define _MM_DENORMALS_ZERO_MASK (0x0040)\n" |
19382 | "\n" |
19383 | "#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)\n" |
19384 | "#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))\n" |
19385 | "\n" |
19386 | "#endif /* __EMMINTRIN_H */\n" |
19387 | "" } , |
19388 | { "/builtins/f16cintrin.h" , "/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===\n" |
19389 | " *\n" |
19390 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
19391 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
19392 | " * in the Software without restriction, including without limitation the rights\n" |
19393 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
19394 | " * copies of the Software, and to permit persons to whom the Software is\n" |
19395 | " * furnished to do so, subject to the following conditions:\n" |
19396 | " *\n" |
19397 | " * The above copyright notice and this permission notice shall be included in\n" |
19398 | " * all copies or substantial portions of the Software.\n" |
19399 | " *\n" |
19400 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
19401 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
19402 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
19403 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
19404 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
19405 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
19406 | " * THE SOFTWARE.\n" |
19407 | " *\n" |
19408 | " *===-----------------------------------------------------------------------===\n" |
19409 | " */\n" |
19410 | "\n" |
19411 | "#if !defined __IMMINTRIN_H\n" |
19412 | "#error \"Never use <f16cintrin.h> directly; include <immintrin.h> instead.\"\n" |
19413 | "#endif\n" |
19414 | "\n" |
19415 | "#ifndef __F16CINTRIN_H\n" |
19416 | "#define __F16CINTRIN_H\n" |
19417 | "\n" |
19418 | "/* Define the default attributes for the functions in this file. */\n" |
19419 | "#define __DEFAULT_FN_ATTRS128 \\\n" |
19420 | " __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(128)))\n" |
19421 | "#define __DEFAULT_FN_ATTRS256 \\\n" |
19422 | " __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(256)))\n" |
19423 | "\n" |
19424 | "/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,\n" |
19425 | " * but that's because icc can emulate these without f16c using a library call.\n" |
19426 | " * Since we don't do that let's leave these in f16cintrin.h.\n" |
19427 | " */\n" |
19428 | "\n" |
19429 | "/// Converts a 16-bit half-precision float value into a 32-bit float\n" |
19430 | "/// value.\n" |
19431 | "///\n" |
19432 | "/// \\headerfile <x86intrin.h>\n" |
19433 | "///\n" |
19434 | "/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n" |
19435 | "///\n" |
19436 | "/// \\param __a\n" |
19437 | "/// A 16-bit half-precision float value.\n" |
19438 | "/// \\returns The converted 32-bit float value.\n" |
19439 | "static __inline float __DEFAULT_FN_ATTRS128\n" |
19440 | "_cvtsh_ss(unsigned short __a)\n" |
19441 | "{\n" |
19442 | " __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};\n" |
19443 | " __v4sf r = __builtin_ia32_vcvtph2ps(v);\n" |
19444 | " return r[0];\n" |
19445 | "}\n" |
19446 | "\n" |
19447 | "/// Converts a 32-bit single-precision float value to a 16-bit\n" |
19448 | "/// half-precision float value.\n" |
19449 | "///\n" |
19450 | "/// \\headerfile <x86intrin.h>\n" |
19451 | "///\n" |
19452 | "/// \\code\n" |
19453 | "/// unsigned short _cvtss_sh(float a, const int imm);\n" |
19454 | "/// \\endcode\n" |
19455 | "///\n" |
19456 | "/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n" |
19457 | "///\n" |
19458 | "/// \\param a\n" |
19459 | "/// A 32-bit single-precision float value to be converted to a 16-bit\n" |
19460 | "/// half-precision float value.\n" |
19461 | "/// \\param imm\n" |
19462 | "/// An immediate value controlling rounding using bits [2:0]: \\n\n" |
19463 | "/// 000: Nearest \\n\n" |
19464 | "/// 001: Down \\n\n" |
19465 | "/// 010: Up \\n\n" |
19466 | "/// 011: Truncate \\n\n" |
19467 | "/// 1XX: Use MXCSR.RC for rounding\n" |
19468 | "/// \\returns The converted 16-bit half-precision float value.\n" |
19469 | "#define _cvtss_sh(a, imm) \\\n" |
19470 | " (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \\\n" |
19471 | " (imm)))[0])\n" |
19472 | "\n" |
19473 | "/// Converts a 128-bit vector containing 32-bit float values into a\n" |
19474 | "/// 128-bit vector containing 16-bit half-precision float values.\n" |
19475 | "///\n" |
19476 | "/// \\headerfile <x86intrin.h>\n" |
19477 | "///\n" |
19478 | "/// \\code\n" |
19479 | "/// __m128i _mm_cvtps_ph(__m128 a, const int imm);\n" |
19480 | "/// \\endcode\n" |
19481 | "///\n" |
19482 | "/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n" |
19483 | "///\n" |
19484 | "/// \\param a\n" |
19485 | "/// A 128-bit vector containing 32-bit float values.\n" |
19486 | "/// \\param imm\n" |
19487 | "/// An immediate value controlling rounding using bits [2:0]: \\n\n" |
19488 | "/// 000: Nearest \\n\n" |
19489 | "/// 001: Down \\n\n" |
19490 | "/// 010: Up \\n\n" |
19491 | "/// 011: Truncate \\n\n" |
19492 | "/// 1XX: Use MXCSR.RC for rounding\n" |
19493 | "/// \\returns A 128-bit vector containing converted 16-bit half-precision float\n" |
19494 | "/// values. The lower 64 bits are used to store the converted 16-bit\n" |
19495 | "/// half-precision floating-point values.\n" |
19496 | "#define _mm_cvtps_ph(a, imm) \\\n" |
19497 | " (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))\n" |
19498 | "\n" |
19499 | "/// Converts a 128-bit vector containing 16-bit half-precision float\n" |
19500 | "/// values into a 128-bit vector containing 32-bit float values.\n" |
19501 | "///\n" |
19502 | "/// \\headerfile <x86intrin.h>\n" |
19503 | "///\n" |
19504 | "/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n" |
19505 | "///\n" |
19506 | "/// \\param __a\n" |
19507 | "/// A 128-bit vector containing 16-bit half-precision float values. The lower\n" |
19508 | "/// 64 bits are used in the conversion.\n" |
19509 | "/// \\returns A 128-bit vector of [4 x float] containing converted float values.\n" |
19510 | "static __inline __m128 __DEFAULT_FN_ATTRS128\n" |
19511 | "_mm_cvtph_ps(__m128i __a)\n" |
19512 | "{\n" |
19513 | " return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);\n" |
19514 | "}\n" |
19515 | "\n" |
19516 | "/// Converts a 256-bit vector of [8 x float] into a 128-bit vector\n" |
19517 | "/// containing 16-bit half-precision float values.\n" |
19518 | "///\n" |
19519 | "/// \\headerfile <x86intrin.h>\n" |
19520 | "///\n" |
19521 | "/// \\code\n" |
19522 | "/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);\n" |
19523 | "/// \\endcode\n" |
19524 | "///\n" |
19525 | "/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n" |
19526 | "///\n" |
19527 | "/// \\param a\n" |
19528 | "/// A 256-bit vector containing 32-bit single-precision float values to be\n" |
19529 | "/// converted to 16-bit half-precision float values.\n" |
19530 | "/// \\param imm\n" |
19531 | "/// An immediate value controlling rounding using bits [2:0]: \\n\n" |
19532 | "/// 000: Nearest \\n\n" |
19533 | "/// 001: Down \\n\n" |
19534 | "/// 010: Up \\n\n" |
19535 | "/// 011: Truncate \\n\n" |
19536 | "/// 1XX: Use MXCSR.RC for rounding\n" |
19537 | "/// \\returns A 128-bit vector containing the converted 16-bit half-precision\n" |
19538 | "/// float values.\n" |
19539 | "#define _mm256_cvtps_ph(a, imm) \\\n" |
19540 | " (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))\n" |
19541 | "\n" |
19542 | "/// Converts a 128-bit vector containing 16-bit half-precision float\n" |
19543 | "/// values into a 256-bit vector of [8 x float].\n" |
19544 | "///\n" |
19545 | "/// \\headerfile <x86intrin.h>\n" |
19546 | "///\n" |
19547 | "/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n" |
19548 | "///\n" |
19549 | "/// \\param __a\n" |
19550 | "/// A 128-bit vector containing 16-bit half-precision float values to be\n" |
19551 | "/// converted to 32-bit single-precision float values.\n" |
19552 | "/// \\returns A vector of [8 x float] containing the converted 32-bit\n" |
19553 | "/// single-precision float values.\n" |
19554 | "static __inline __m256 __DEFAULT_FN_ATTRS256\n" |
19555 | "_mm256_cvtph_ps(__m128i __a)\n" |
19556 | "{\n" |
19557 | " return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);\n" |
19558 | "}\n" |
19559 | "\n" |
19560 | "#undef __DEFAULT_FN_ATTRS128\n" |
19561 | "#undef __DEFAULT_FN_ATTRS256\n" |
19562 | "\n" |
19563 | "#endif /* __F16CINTRIN_H */\n" |
19564 | "" } , |
19565 | { "/builtins/float.h" , "/*===---- float.h - Characteristics of floating point types ----------------===\n" |
19566 | " *\n" |
19567 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
19568 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
19569 | " * in the Software without restriction, including without limitation the rights\n" |
19570 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
19571 | " * copies of the Software, and to permit persons to whom the Software is\n" |
19572 | " * furnished to do so, subject to the following conditions:\n" |
19573 | " *\n" |
19574 | " * The above copyright notice and this permission notice shall be included in\n" |
19575 | " * all copies or substantial portions of the Software.\n" |
19576 | " *\n" |
19577 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
19578 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
19579 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
19580 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
19581 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
19582 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
19583 | " * THE SOFTWARE.\n" |
19584 | " *\n" |
19585 | " *===-----------------------------------------------------------------------===\n" |
19586 | " */\n" |
19587 | "\n" |
19588 | "#ifndef __CLANG_FLOAT_H\n" |
19589 | "#define __CLANG_FLOAT_H\n" |
19590 | "\n" |
19591 | "/* If we're on MinGW, fall back to the system's float.h, which might have\n" |
19592 | " * additional definitions provided for Windows.\n" |
19593 | " * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx\n" |
19594 | " *\n" |
19595 | " * Also fall back on Darwin to allow additional definitions and\n" |
19596 | " * implementation-defined values.\n" |
19597 | " */\n" |
19598 | "#if (defined(__APPLE__) || (defined(__MINGW32__) || defined(_MSC_VER))) && \\\n" |
19599 | " __STDC_HOSTED__ && __has_include_next(<float.h>)\n" |
19600 | "\n" |
19601 | "/* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level\n" |
19602 | " * of #include_next<float.h> to keep Metrowerks compilers happy. Avoid this\n" |
19603 | " * extra indirection.\n" |
19604 | " */\n" |
19605 | "#ifdef __APPLE__\n" |
19606 | "#define _FLOAT_H_\n" |
19607 | "#endif\n" |
19608 | "\n" |
19609 | "# include_next <float.h>\n" |
19610 | "\n" |
19611 | "/* Undefine anything that we'll be redefining below. */\n" |
19612 | "# undef FLT_EVAL_METHOD\n" |
19613 | "# undef FLT_ROUNDS\n" |
19614 | "# undef FLT_RADIX\n" |
19615 | "# undef FLT_MANT_DIG\n" |
19616 | "# undef DBL_MANT_DIG\n" |
19617 | "# undef LDBL_MANT_DIG\n" |
19618 | "# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n" |
19619 | "# undef DECIMAL_DIG\n" |
19620 | "# endif\n" |
19621 | "# undef FLT_DIG\n" |
19622 | "# undef DBL_DIG\n" |
19623 | "# undef LDBL_DIG\n" |
19624 | "# undef FLT_MIN_EXP\n" |
19625 | "# undef DBL_MIN_EXP\n" |
19626 | "# undef LDBL_MIN_EXP\n" |
19627 | "# undef FLT_MIN_10_EXP\n" |
19628 | "# undef DBL_MIN_10_EXP\n" |
19629 | "# undef LDBL_MIN_10_EXP\n" |
19630 | "# undef FLT_MAX_EXP\n" |
19631 | "# undef DBL_MAX_EXP\n" |
19632 | "# undef LDBL_MAX_EXP\n" |
19633 | "# undef FLT_MAX_10_EXP\n" |
19634 | "# undef DBL_MAX_10_EXP\n" |
19635 | "# undef LDBL_MAX_10_EXP\n" |
19636 | "# undef FLT_MAX\n" |
19637 | "# undef DBL_MAX\n" |
19638 | "# undef LDBL_MAX\n" |
19639 | "# undef FLT_EPSILON\n" |
19640 | "# undef DBL_EPSILON\n" |
19641 | "# undef LDBL_EPSILON\n" |
19642 | "# undef FLT_MIN\n" |
19643 | "# undef DBL_MIN\n" |
19644 | "# undef LDBL_MIN\n" |
19645 | "# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n" |
19646 | "# undef FLT_TRUE_MIN\n" |
19647 | "# undef DBL_TRUE_MIN\n" |
19648 | "# undef LDBL_TRUE_MIN\n" |
19649 | "# undef FLT_DECIMAL_DIG\n" |
19650 | "# undef DBL_DECIMAL_DIG\n" |
19651 | "# undef LDBL_DECIMAL_DIG\n" |
19652 | "# undef FLT_HAS_SUBNORM\n" |
19653 | "# undef DBL_HAS_SUBNORM\n" |
19654 | "# undef LDBL_HAS_SUBNORM\n" |
19655 | "# endif\n" |
19656 | "#endif\n" |
19657 | "\n" |
19658 | "/* Characteristics of floating point types, C99 5.2.4.2.2 */\n" |
19659 | "\n" |
19660 | "#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__\n" |
19661 | "#define FLT_ROUNDS (__builtin_flt_rounds())\n" |
19662 | "#define FLT_RADIX __FLT_RADIX__\n" |
19663 | "\n" |
19664 | "#define FLT_MANT_DIG __FLT_MANT_DIG__\n" |
19665 | "#define DBL_MANT_DIG __DBL_MANT_DIG__\n" |
19666 | "#define LDBL_MANT_DIG __LDBL_MANT_DIG__\n" |
19667 | "\n" |
19668 | "#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n" |
19669 | "# define DECIMAL_DIG __DECIMAL_DIG__\n" |
19670 | "#endif\n" |
19671 | "\n" |
19672 | "#define FLT_DIG __FLT_DIG__\n" |
19673 | "#define DBL_DIG __DBL_DIG__\n" |
19674 | "#define LDBL_DIG __LDBL_DIG__\n" |
19675 | "\n" |
19676 | "#define FLT_MIN_EXP __FLT_MIN_EXP__\n" |
19677 | "#define DBL_MIN_EXP __DBL_MIN_EXP__\n" |
19678 | "#define LDBL_MIN_EXP __LDBL_MIN_EXP__\n" |
19679 | "\n" |
19680 | "#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__\n" |
19681 | "#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__\n" |
19682 | "#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__\n" |
19683 | "\n" |
19684 | "#define FLT_MAX_EXP __FLT_MAX_EXP__\n" |
19685 | "#define DBL_MAX_EXP __DBL_MAX_EXP__\n" |
19686 | "#define LDBL_MAX_EXP __LDBL_MAX_EXP__\n" |
19687 | "\n" |
19688 | "#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__\n" |
19689 | "#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__\n" |
19690 | "#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__\n" |
19691 | "\n" |
19692 | "#define FLT_MAX __FLT_MAX__\n" |
19693 | "#define DBL_MAX __DBL_MAX__\n" |
19694 | "#define LDBL_MAX __LDBL_MAX__\n" |
19695 | "\n" |
19696 | "#define FLT_EPSILON __FLT_EPSILON__\n" |
19697 | "#define DBL_EPSILON __DBL_EPSILON__\n" |
19698 | "#define LDBL_EPSILON __LDBL_EPSILON__\n" |
19699 | "\n" |
19700 | "#define FLT_MIN __FLT_MIN__\n" |
19701 | "#define DBL_MIN __DBL_MIN__\n" |
19702 | "#define LDBL_MIN __LDBL_MIN__\n" |
19703 | "\n" |
19704 | "#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n" |
19705 | "# define FLT_TRUE_MIN __FLT_DENORM_MIN__\n" |
19706 | "# define DBL_TRUE_MIN __DBL_DENORM_MIN__\n" |
19707 | "# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__\n" |
19708 | "# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__\n" |
19709 | "# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__\n" |
19710 | "# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__\n" |
19711 | "# define FLT_HAS_SUBNORM __FLT_HAS_DENORM__\n" |
19712 | "# define DBL_HAS_SUBNORM __DBL_HAS_DENORM__\n" |
19713 | "# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__\n" |
19714 | "#endif\n" |
19715 | "\n" |
19716 | "#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__\n" |
19717 | "# define FLT16_MANT_DIG __FLT16_MANT_DIG__\n" |
19718 | "# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__\n" |
19719 | "# define FLT16_DIG __FLT16_DIG__\n" |
19720 | "# define FLT16_MIN_EXP __FLT16_MIN_EXP__\n" |
19721 | "# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__\n" |
19722 | "# define FLT16_MAX_EXP __FLT16_MAX_EXP__\n" |
19723 | "# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__\n" |
19724 | "# define FLT16_MAX __FLT16_MAX__\n" |
19725 | "# define FLT16_EPSILON __FLT16_EPSILON__\n" |
19726 | "# define FLT16_MIN __FLT16_MIN__\n" |
19727 | "# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__\n" |
19728 | "#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */\n" |
19729 | "\n" |
19730 | "#endif /* __CLANG_FLOAT_H */\n" |
19731 | "" } , |
19732 | { "/builtins/fma4intrin.h" , "/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===\n" |
19733 | " *\n" |
19734 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
19735 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
19736 | " * in the Software without restriction, including without limitation the rights\n" |
19737 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
19738 | " * copies of the Software, and to permit persons to whom the Software is\n" |
19739 | " * furnished to do so, subject to the following conditions:\n" |
19740 | " *\n" |
19741 | " * The above copyright notice and this permission notice shall be included in\n" |
19742 | " * all copies or substantial portions of the Software.\n" |
19743 | " *\n" |
19744 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
19745 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
19746 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
19747 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
19748 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
19749 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
19750 | " * THE SOFTWARE.\n" |
19751 | " *\n" |
19752 | " *===-----------------------------------------------------------------------===\n" |
19753 | " */\n" |
19754 | "\n" |
19755 | "#ifndef __X86INTRIN_H\n" |
19756 | "#error \"Never use <fma4intrin.h> directly; include <x86intrin.h> instead.\"\n" |
19757 | "#endif\n" |
19758 | "\n" |
19759 | "#ifndef __FMA4INTRIN_H\n" |
19760 | "#define __FMA4INTRIN_H\n" |
19761 | "\n" |
19762 | "#include <pmmintrin.h>\n" |
19763 | "\n" |
19764 | "/* Define the default attributes for the functions in this file. */\n" |
19765 | "#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(128)))\n" |
19766 | "#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(256)))\n" |
19767 | "\n" |
19768 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19769 | "_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
19770 | "{\n" |
19771 | " return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
19772 | "}\n" |
19773 | "\n" |
19774 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19775 | "_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
19776 | "{\n" |
19777 | " return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
19778 | "}\n" |
19779 | "\n" |
19780 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19781 | "_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
19782 | "{\n" |
19783 | " return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
19784 | "}\n" |
19785 | "\n" |
19786 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19787 | "_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
19788 | "{\n" |
19789 | " return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
19790 | "}\n" |
19791 | "\n" |
19792 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19793 | "_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
19794 | "{\n" |
19795 | " return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
19796 | "}\n" |
19797 | "\n" |
19798 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19799 | "_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
19800 | "{\n" |
19801 | " return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
19802 | "}\n" |
19803 | "\n" |
19804 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19805 | "_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
19806 | "{\n" |
19807 | " return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
19808 | "}\n" |
19809 | "\n" |
19810 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19811 | "_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
19812 | "{\n" |
19813 | " return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
19814 | "}\n" |
19815 | "\n" |
19816 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19817 | "_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
19818 | "{\n" |
19819 | " return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
19820 | "}\n" |
19821 | "\n" |
19822 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19823 | "_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
19824 | "{\n" |
19825 | " return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
19826 | "}\n" |
19827 | "\n" |
19828 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19829 | "_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
19830 | "{\n" |
19831 | " return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
19832 | "}\n" |
19833 | "\n" |
19834 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19835 | "_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
19836 | "{\n" |
19837 | " return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
19838 | "}\n" |
19839 | "\n" |
19840 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19841 | "_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
19842 | "{\n" |
19843 | " return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
19844 | "}\n" |
19845 | "\n" |
19846 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19847 | "_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
19848 | "{\n" |
19849 | " return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
19850 | "}\n" |
19851 | "\n" |
19852 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19853 | "_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
19854 | "{\n" |
19855 | " return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
19856 | "}\n" |
19857 | "\n" |
19858 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19859 | "_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
19860 | "{\n" |
19861 | " return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
19862 | "}\n" |
19863 | "\n" |
19864 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19865 | "_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
19866 | "{\n" |
19867 | " return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
19868 | "}\n" |
19869 | "\n" |
19870 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19871 | "_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
19872 | "{\n" |
19873 | " return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
19874 | "}\n" |
19875 | "\n" |
19876 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
19877 | "_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
19878 | "{\n" |
19879 | " return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
19880 | "}\n" |
19881 | "\n" |
19882 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
19883 | "_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
19884 | "{\n" |
19885 | " return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
19886 | "}\n" |
19887 | "\n" |
19888 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
19889 | "_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
19890 | "{\n" |
19891 | " return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
19892 | "}\n" |
19893 | "\n" |
19894 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
19895 | "_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
19896 | "{\n" |
19897 | " return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
19898 | "}\n" |
19899 | "\n" |
19900 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
19901 | "_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
19902 | "{\n" |
19903 | " return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
19904 | "}\n" |
19905 | "\n" |
19906 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
19907 | "_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
19908 | "{\n" |
19909 | " return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
19910 | "}\n" |
19911 | "\n" |
19912 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
19913 | "_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
19914 | "{\n" |
19915 | " return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
19916 | "}\n" |
19917 | "\n" |
19918 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
19919 | "_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
19920 | "{\n" |
19921 | " return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
19922 | "}\n" |
19923 | "\n" |
19924 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
19925 | "_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
19926 | "{\n" |
19927 | " return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
19928 | "}\n" |
19929 | "\n" |
19930 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
19931 | "_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
19932 | "{\n" |
19933 | " return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
19934 | "}\n" |
19935 | "\n" |
19936 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
19937 | "_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
19938 | "{\n" |
19939 | " return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
19940 | "}\n" |
19941 | "\n" |
19942 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
19943 | "_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
19944 | "{\n" |
19945 | " return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
19946 | "}\n" |
19947 | "\n" |
19948 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
19949 | "_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
19950 | "{\n" |
19951 | " return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
19952 | "}\n" |
19953 | "\n" |
19954 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
19955 | "_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
19956 | "{\n" |
19957 | " return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
19958 | "}\n" |
19959 | "\n" |
19960 | "#undef __DEFAULT_FN_ATTRS128\n" |
19961 | "#undef __DEFAULT_FN_ATTRS256\n" |
19962 | "\n" |
19963 | "#endif /* __FMA4INTRIN_H */\n" |
19964 | "" } , |
19965 | { "/builtins/fmaintrin.h" , "/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===\n" |
19966 | " *\n" |
19967 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
19968 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
19969 | " * in the Software without restriction, including without limitation the rights\n" |
19970 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
19971 | " * copies of the Software, and to permit persons to whom the Software is\n" |
19972 | " * furnished to do so, subject to the following conditions:\n" |
19973 | " *\n" |
19974 | " * The above copyright notice and this permission notice shall be included in\n" |
19975 | " * all copies or substantial portions of the Software.\n" |
19976 | " *\n" |
19977 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
19978 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
19979 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
19980 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
19981 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
19982 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
19983 | " * THE SOFTWARE.\n" |
19984 | " *\n" |
19985 | " *===-----------------------------------------------------------------------===\n" |
19986 | " */\n" |
19987 | "\n" |
19988 | "#ifndef __IMMINTRIN_H\n" |
19989 | "#error \"Never use <fmaintrin.h> directly; include <immintrin.h> instead.\"\n" |
19990 | "#endif\n" |
19991 | "\n" |
19992 | "#ifndef __FMAINTRIN_H\n" |
19993 | "#define __FMAINTRIN_H\n" |
19994 | "\n" |
19995 | "/* Define the default attributes for the functions in this file. */\n" |
19996 | "#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(128)))\n" |
19997 | "#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(256)))\n" |
19998 | "\n" |
19999 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20000 | "_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
20001 | "{\n" |
20002 | " return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
20003 | "}\n" |
20004 | "\n" |
20005 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20006 | "_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
20007 | "{\n" |
20008 | " return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
20009 | "}\n" |
20010 | "\n" |
20011 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20012 | "_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
20013 | "{\n" |
20014 | " return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
20015 | "}\n" |
20016 | "\n" |
20017 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20018 | "_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
20019 | "{\n" |
20020 | " return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
20021 | "}\n" |
20022 | "\n" |
20023 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20024 | "_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
20025 | "{\n" |
20026 | " return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
20027 | "}\n" |
20028 | "\n" |
20029 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20030 | "_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
20031 | "{\n" |
20032 | " return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
20033 | "}\n" |
20034 | "\n" |
20035 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20036 | "_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
20037 | "{\n" |
20038 | " return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
20039 | "}\n" |
20040 | "\n" |
20041 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20042 | "_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
20043 | "{\n" |
20044 | " return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
20045 | "}\n" |
20046 | "\n" |
20047 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20048 | "_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
20049 | "{\n" |
20050 | " return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
20051 | "}\n" |
20052 | "\n" |
20053 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20054 | "_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
20055 | "{\n" |
20056 | " return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
20057 | "}\n" |
20058 | "\n" |
20059 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20060 | "_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
20061 | "{\n" |
20062 | " return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);\n" |
20063 | "}\n" |
20064 | "\n" |
20065 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20066 | "_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
20067 | "{\n" |
20068 | " return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);\n" |
20069 | "}\n" |
20070 | "\n" |
20071 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20072 | "_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
20073 | "{\n" |
20074 | " return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
20075 | "}\n" |
20076 | "\n" |
20077 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20078 | "_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
20079 | "{\n" |
20080 | " return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
20081 | "}\n" |
20082 | "\n" |
20083 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20084 | "_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
20085 | "{\n" |
20086 | " return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);\n" |
20087 | "}\n" |
20088 | "\n" |
20089 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20090 | "_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
20091 | "{\n" |
20092 | " return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);\n" |
20093 | "}\n" |
20094 | "\n" |
20095 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20096 | "_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
20097 | "{\n" |
20098 | " return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
20099 | "}\n" |
20100 | "\n" |
20101 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20102 | "_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
20103 | "{\n" |
20104 | " return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
20105 | "}\n" |
20106 | "\n" |
20107 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
20108 | "_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
20109 | "{\n" |
20110 | " return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
20111 | "}\n" |
20112 | "\n" |
20113 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
20114 | "_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
20115 | "{\n" |
20116 | " return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
20117 | "}\n" |
20118 | "\n" |
20119 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
20120 | "_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
20121 | "{\n" |
20122 | " return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
20123 | "}\n" |
20124 | "\n" |
20125 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
20126 | "_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
20127 | "{\n" |
20128 | " return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
20129 | "}\n" |
20130 | "\n" |
20131 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
20132 | "_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
20133 | "{\n" |
20134 | " return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
20135 | "}\n" |
20136 | "\n" |
20137 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
20138 | "_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
20139 | "{\n" |
20140 | " return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
20141 | "}\n" |
20142 | "\n" |
20143 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
20144 | "_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
20145 | "{\n" |
20146 | " return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
20147 | "}\n" |
20148 | "\n" |
20149 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
20150 | "_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
20151 | "{\n" |
20152 | " return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
20153 | "}\n" |
20154 | "\n" |
20155 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
20156 | "_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
20157 | "{\n" |
20158 | " return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
20159 | "}\n" |
20160 | "\n" |
20161 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
20162 | "_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
20163 | "{\n" |
20164 | " return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
20165 | "}\n" |
20166 | "\n" |
20167 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
20168 | "_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
20169 | "{\n" |
20170 | " return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
20171 | "}\n" |
20172 | "\n" |
20173 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
20174 | "_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
20175 | "{\n" |
20176 | " return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
20177 | "}\n" |
20178 | "\n" |
20179 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
20180 | "_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
20181 | "{\n" |
20182 | " return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
20183 | "}\n" |
20184 | "\n" |
20185 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
20186 | "_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
20187 | "{\n" |
20188 | " return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
20189 | "}\n" |
20190 | "\n" |
20191 | "#undef __DEFAULT_FN_ATTRS128\n" |
20192 | "#undef __DEFAULT_FN_ATTRS256\n" |
20193 | "\n" |
20194 | "#endif /* __FMAINTRIN_H */\n" |
20195 | "" } , |
20196 | { "/builtins/fxsrintrin.h" , "/*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------===\n" |
20197 | " *\n" |
20198 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
20199 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
20200 | " * in the Software without restriction, including without limitation the rights\n" |
20201 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
20202 | " * copies of the Software, and to permit persons to whom the Software is\n" |
20203 | " * furnished to do so, subject to the following conditions:\n" |
20204 | " *\n" |
20205 | " * The above copyright notice and this permission notice shall be included in\n" |
20206 | " * all copies or substantial portions of the Software.\n" |
20207 | " *\n" |
20208 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
20209 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
20210 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
20211 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
20212 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
20213 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
20214 | " * THE SOFTWARE.\n" |
20215 | " *\n" |
20216 | " *===-----------------------------------------------------------------------===\n" |
20217 | " */\n" |
20218 | "\n" |
20219 | "#ifndef __IMMINTRIN_H\n" |
20220 | "#error \"Never use <fxsrintrin.h> directly; include <immintrin.h> instead.\"\n" |
20221 | "#endif\n" |
20222 | "\n" |
20223 | "#ifndef __FXSRINTRIN_H\n" |
20224 | "#define __FXSRINTRIN_H\n" |
20225 | "\n" |
20226 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"fxsr\")))\n" |
20227 | "\n" |
20228 | "/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n" |
20229 | "/// memory region pointed to by the input parameter \\a __p.\n" |
20230 | "///\n" |
20231 | "/// \\headerfile <x86intrin.h>\n" |
20232 | "///\n" |
20233 | "/// This intrinsic corresponds to the <c> FXSAVE </c> instruction.\n" |
20234 | "///\n" |
20235 | "/// \\param __p\n" |
20236 | "/// A pointer to a 512-byte memory region. The beginning of this memory\n" |
20237 | "/// region should be aligned on a 16-byte boundary.\n" |
20238 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
20239 | "_fxsave(void *__p)\n" |
20240 | "{\n" |
20241 | " __builtin_ia32_fxsave(__p);\n" |
20242 | "}\n" |
20243 | "\n" |
20244 | "/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n" |
20245 | "/// memory region pointed to by the input parameter \\a __p. The contents of\n" |
20246 | "/// this memory region should have been written to by a previous \\c _fxsave\n" |
20247 | "/// or \\c _fxsave64 intrinsic.\n" |
20248 | "///\n" |
20249 | "/// \\headerfile <x86intrin.h>\n" |
20250 | "///\n" |
20251 | "/// This intrinsic corresponds to the <c> FXRSTOR </c> instruction.\n" |
20252 | "///\n" |
20253 | "/// \\param __p\n" |
20254 | "/// A pointer to a 512-byte memory region. The beginning of this memory\n" |
20255 | "/// region should be aligned on a 16-byte boundary.\n" |
20256 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
20257 | "_fxrstor(void *__p)\n" |
20258 | "{\n" |
20259 | " __builtin_ia32_fxrstor(__p);\n" |
20260 | "}\n" |
20261 | "\n" |
20262 | "#ifdef __x86_64__\n" |
20263 | "/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n" |
20264 | "/// memory region pointed to by the input parameter \\a __p.\n" |
20265 | "///\n" |
20266 | "/// \\headerfile <x86intrin.h>\n" |
20267 | "///\n" |
20268 | "/// This intrinsic corresponds to the <c> FXSAVE64 </c> instruction.\n" |
20269 | "///\n" |
20270 | "/// \\param __p\n" |
20271 | "/// A pointer to a 512-byte memory region. The beginning of this memory\n" |
20272 | "/// region should be aligned on a 16-byte boundary.\n" |
20273 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
20274 | "_fxsave64(void *__p)\n" |
20275 | "{\n" |
20276 | " __builtin_ia32_fxsave64(__p);\n" |
20277 | "}\n" |
20278 | "\n" |
20279 | "/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n" |
20280 | "/// memory region pointed to by the input parameter \\a __p. The contents of\n" |
20281 | "/// this memory region should have been written to by a previous \\c _fxsave\n" |
20282 | "/// or \\c _fxsave64 intrinsic.\n" |
20283 | "///\n" |
20284 | "/// \\headerfile <x86intrin.h>\n" |
20285 | "///\n" |
20286 | "/// This intrinsic corresponds to the <c> FXRSTOR64 </c> instruction.\n" |
20287 | "///\n" |
20288 | "/// \\param __p\n" |
20289 | "/// A pointer to a 512-byte memory region. The beginning of this memory\n" |
20290 | "/// region should be aligned on a 16-byte boundary.\n" |
20291 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
20292 | "_fxrstor64(void *__p)\n" |
20293 | "{\n" |
20294 | " __builtin_ia32_fxrstor64(__p);\n" |
20295 | "}\n" |
20296 | "#endif\n" |
20297 | "\n" |
20298 | "#undef __DEFAULT_FN_ATTRS\n" |
20299 | "\n" |
20300 | "#endif\n" |
20301 | "" } , |
20302 | { "/builtins/gfniintrin.h" , "/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===\n" |
20303 | " *\n" |
20304 | " *\n" |
20305 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
20306 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
20307 | " * in the Software without restriction, including without limitation the rights\n" |
20308 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
20309 | " * copies of the Software, and to permit persons to whom the Software is\n" |
20310 | " * furnished to do so, subject to the following conditions:\n" |
20311 | " *\n" |
20312 | " * The above copyright notice and this permission notice shall be included in\n" |
20313 | " * all copies or substantial portions of the Software.\n" |
20314 | " *\n" |
20315 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
20316 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
20317 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
20318 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
20319 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
20320 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
20321 | " * THE SOFTWARE.\n" |
20322 | " *\n" |
20323 | " *===-----------------------------------------------------------------------===\n" |
20324 | " */\n" |
20325 | "#ifndef __IMMINTRIN_H\n" |
20326 | "#error \"Never use <gfniintrin.h> directly; include <immintrin.h> instead.\"\n" |
20327 | "#endif\n" |
20328 | "\n" |
20329 | "#ifndef __GFNIINTRIN_H\n" |
20330 | "#define __GFNIINTRIN_H\n" |
20331 | "\n" |
20332 | "\n" |
20333 | "#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \\\n" |
20334 | " (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \\\n" |
20335 | " (__v16qi)(__m128i)(B), \\\n" |
20336 | " (char)(I))\n" |
20337 | "\n" |
20338 | "#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n" |
20339 | " (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n" |
20340 | " (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \\\n" |
20341 | " (__v16qi)(__m128i)(S))\n" |
20342 | "\n" |
20343 | "\n" |
20344 | "#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n" |
20345 | " (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n" |
20346 | " U, A, B, I)\n" |
20347 | "\n" |
20348 | "\n" |
20349 | "#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \\\n" |
20350 | " (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \\\n" |
20351 | " (__v32qi)(__m256i)(B), \\\n" |
20352 | " (char)(I))\n" |
20353 | "\n" |
20354 | "#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n" |
20355 | " (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n" |
20356 | " (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \\\n" |
20357 | " (__v32qi)(__m256i)(S))\n" |
20358 | "\n" |
20359 | "#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n" |
20360 | " (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n" |
20361 | " U, A, B, I)\n" |
20362 | "\n" |
20363 | "\n" |
20364 | "#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \\\n" |
20365 | " (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \\\n" |
20366 | " (__v64qi)(__m512i)(B), \\\n" |
20367 | " (char)(I))\n" |
20368 | "\n" |
20369 | "#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n" |
20370 | " (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n" |
20371 | " (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \\\n" |
20372 | " (__v64qi)(__m512i)(S))\n" |
20373 | "\n" |
20374 | "#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n" |
20375 | " (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n" |
20376 | " U, A, B, I)\n" |
20377 | "\n" |
20378 | "#define _mm_gf2p8affine_epi64_epi8(A, B, I) \\\n" |
20379 | " (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \\\n" |
20380 | " (__v16qi)(__m128i)(B), \\\n" |
20381 | " (char)(I))\n" |
20382 | "\n" |
20383 | "#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n" |
20384 | " (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n" |
20385 | " (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \\\n" |
20386 | " (__v16qi)(__m128i)(S))\n" |
20387 | "\n" |
20388 | "\n" |
20389 | "#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n" |
20390 | " (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n" |
20391 | " U, A, B, I)\n" |
20392 | "\n" |
20393 | "\n" |
20394 | "#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \\\n" |
20395 | " (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \\\n" |
20396 | " (__v32qi)(__m256i)(B), \\\n" |
20397 | " (char)(I))\n" |
20398 | "\n" |
20399 | "#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n" |
20400 | " (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n" |
20401 | " (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \\\n" |
20402 | " (__v32qi)(__m256i)(S))\n" |
20403 | "\n" |
20404 | "#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n" |
20405 | " (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n" |
20406 | " U, A, B, I)\n" |
20407 | "\n" |
20408 | "\n" |
20409 | "#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \\\n" |
20410 | " (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \\\n" |
20411 | " (__v64qi)(__m512i)(B), \\\n" |
20412 | " (char)(I))\n" |
20413 | "\n" |
20414 | "#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n" |
20415 | " (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n" |
20416 | " (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \\\n" |
20417 | " (__v64qi)(__m512i)(S))\n" |
20418 | "\n" |
20419 | "#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n" |
20420 | " (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n" |
20421 | " U, A, B, I)\n" |
20422 | "\n" |
20423 | "/* Default attributes for simple form (no masking). */\n" |
20424 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"gfni\"), __min_vector_width__(128)))\n" |
20425 | "\n" |
20426 | "/* Default attributes for YMM unmasked form. */\n" |
20427 | "#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__(\"avx,gfni\"), __min_vector_width__(256)))\n" |
20428 | "\n" |
20429 | "/* Default attributes for ZMM forms. */\n" |
20430 | "#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,gfni\"), __min_vector_width__(512)))\n" |
20431 | "\n" |
20432 | "/* Default attributes for VLX forms. */\n" |
20433 | "#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(128)))\n" |
20434 | "#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(256)))\n" |
20435 | "\n" |
20436 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
20437 | "_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)\n" |
20438 | "{\n" |
20439 | " return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,\n" |
20440 | " (__v16qi) __B);\n" |
20441 | "}\n" |
20442 | "\n" |
20443 | "static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n" |
20444 | "_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)\n" |
20445 | "{\n" |
20446 | " return (__m128i) __builtin_ia32_selectb_128(__U,\n" |
20447 | " (__v16qi) _mm_gf2p8mul_epi8(__A, __B),\n" |
20448 | " (__v16qi) __S);\n" |
20449 | "}\n" |
20450 | "\n" |
20451 | "static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n" |
20452 | "_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)\n" |
20453 | "{\n" |
20454 | " return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),\n" |
20455 | " __U, __A, __B);\n" |
20456 | "}\n" |
20457 | "\n" |
20458 | "static __inline__ __m256i __DEFAULT_FN_ATTRS_Y\n" |
20459 | "_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)\n" |
20460 | "{\n" |
20461 | " return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,\n" |
20462 | " (__v32qi) __B);\n" |
20463 | "}\n" |
20464 | "\n" |
20465 | "static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n" |
20466 | "_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)\n" |
20467 | "{\n" |
20468 | " return (__m256i) __builtin_ia32_selectb_256(__U,\n" |
20469 | " (__v32qi) _mm256_gf2p8mul_epi8(__A, __B),\n" |
20470 | " (__v32qi) __S);\n" |
20471 | "}\n" |
20472 | "\n" |
20473 | "static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n" |
20474 | "_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)\n" |
20475 | "{\n" |
20476 | " return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),\n" |
20477 | " __U, __A, __B);\n" |
20478 | "}\n" |
20479 | "\n" |
20480 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n" |
20481 | "_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)\n" |
20482 | "{\n" |
20483 | " return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,\n" |
20484 | " (__v64qi) __B);\n" |
20485 | "}\n" |
20486 | "\n" |
20487 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n" |
20488 | "_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)\n" |
20489 | "{\n" |
20490 | " return (__m512i) __builtin_ia32_selectb_512(__U,\n" |
20491 | " (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),\n" |
20492 | " (__v64qi) __S);\n" |
20493 | "}\n" |
20494 | "\n" |
20495 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n" |
20496 | "_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)\n" |
20497 | "{\n" |
20498 | " return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),\n" |
20499 | " __U, __A, __B);\n" |
20500 | "}\n" |
20501 | "\n" |
20502 | "#undef __DEFAULT_FN_ATTRS\n" |
20503 | "#undef __DEFAULT_FN_ATTRS_Y\n" |
20504 | "#undef __DEFAULT_FN_ATTRS_Z\n" |
20505 | "#undef __DEFAULT_FN_ATTRS_VL128\n" |
20506 | "#undef __DEFAULT_FN_ATTRS_VL256\n" |
20507 | "\n" |
20508 | "#endif /* __GFNIINTRIN_H */\n" |
20509 | "\n" |
20510 | "" } , |
20511 | { "/builtins/htmintrin.h" , "/*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\\\n" |
20512 | " *\n" |
20513 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
20514 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
20515 | " * in the Software without restriction, including without limitation the rights\n" |
20516 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
20517 | " * copies of the Software, and to permit persons to whom the Software is\n" |
20518 | " * furnished to do so, subject to the following conditions:\n" |
20519 | " *\n" |
20520 | " * The above copyright notice and this permission notice shall be included in\n" |
20521 | " * all copies or substantial portions of the Software.\n" |
20522 | " *\n" |
20523 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
20524 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
20525 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
20526 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
20527 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
20528 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
20529 | " * THE SOFTWARE.\n" |
20530 | " *\n" |
20531 | "\\*===----------------------------------------------------------------------===*/\n" |
20532 | "\n" |
20533 | "#ifndef __HTMINTRIN_H\n" |
20534 | "#define __HTMINTRIN_H\n" |
20535 | "\n" |
20536 | "#ifndef __HTM__\n" |
20537 | "#error \"HTM instruction set not enabled\"\n" |
20538 | "#endif\n" |
20539 | "\n" |
20540 | "#ifdef __powerpc__\n" |
20541 | "\n" |
20542 | "#include <stdint.h>\n" |
20543 | "\n" |
20544 | "typedef uint64_t texasr_t;\n" |
20545 | "typedef uint32_t texasru_t;\n" |
20546 | "typedef uint32_t texasrl_t;\n" |
20547 | "typedef uintptr_t tfiar_t;\n" |
20548 | "typedef uintptr_t tfhar_t;\n" |
20549 | "\n" |
20550 | "#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3)\n" |
20551 | "#define _HTM_NONTRANSACTIONAL 0x0\n" |
20552 | "#define _HTM_SUSPENDED 0x1\n" |
20553 | "#define _HTM_TRANSACTIONAL 0x2\n" |
20554 | "\n" |
20555 | "#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n" |
20556 | " (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1))\n" |
20557 | "#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n" |
20558 | " (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))\n" |
20559 | "\n" |
20560 | "#define _TEXASR_FAILURE_CODE(TEXASR) \\\n" |
20561 | " _TEXASR_EXTRACT_BITS(TEXASR, 7, 8)\n" |
20562 | "#define _TEXASRU_FAILURE_CODE(TEXASRU) \\\n" |
20563 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8)\n" |
20564 | "\n" |
20565 | "#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \\\n" |
20566 | " _TEXASR_EXTRACT_BITS(TEXASR, 7, 1)\n" |
20567 | "#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \\\n" |
20568 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)\n" |
20569 | "\n" |
20570 | "#define _TEXASR_DISALLOWED(TEXASR) \\\n" |
20571 | " _TEXASR_EXTRACT_BITS(TEXASR, 8, 1)\n" |
20572 | "#define _TEXASRU_DISALLOWED(TEXASRU) \\\n" |
20573 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1)\n" |
20574 | "\n" |
20575 | "#define _TEXASR_NESTING_OVERFLOW(TEXASR) \\\n" |
20576 | " _TEXASR_EXTRACT_BITS(TEXASR, 9, 1)\n" |
20577 | "#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \\\n" |
20578 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1)\n" |
20579 | "\n" |
20580 | "#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \\\n" |
20581 | " _TEXASR_EXTRACT_BITS(TEXASR, 10, 1)\n" |
20582 | "#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \\\n" |
20583 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1)\n" |
20584 | "\n" |
20585 | "#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \\\n" |
20586 | " _TEXASR_EXTRACT_BITS(TEXASR, 11, 1)\n" |
20587 | "#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \\\n" |
20588 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1)\n" |
20589 | "\n" |
20590 | "#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \\\n" |
20591 | " _TEXASR_EXTRACT_BITS(TEXASR, 12, 1)\n" |
20592 | "#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \\\n" |
20593 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1)\n" |
20594 | "\n" |
20595 | "#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \\\n" |
20596 | " _TEXASR_EXTRACT_BITS(TEXASR, 13, 1)\n" |
20597 | "#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \\\n" |
20598 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1)\n" |
20599 | "\n" |
20600 | "#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \\\n" |
20601 | " _TEXASR_EXTRACT_BITS(TEXASR, 14, 1)\n" |
20602 | "#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \\\n" |
20603 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1)\n" |
20604 | "\n" |
20605 | "#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \\\n" |
20606 | " _TEXASR_EXTRACT_BITS(TEXASR, 15, 1)\n" |
20607 | "#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \\\n" |
20608 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1)\n" |
20609 | "\n" |
20610 | "#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \\\n" |
20611 | " _TEXASR_EXTRACT_BITS(TEXASR, 16, 1)\n" |
20612 | "#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \\\n" |
20613 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1)\n" |
20614 | "\n" |
20615 | "#define _TEXASR_ABORT(TEXASR) \\\n" |
20616 | " _TEXASR_EXTRACT_BITS(TEXASR, 31, 1)\n" |
20617 | "#define _TEXASRU_ABORT(TEXASRU) \\\n" |
20618 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1)\n" |
20619 | "\n" |
20620 | "\n" |
20621 | "#define _TEXASR_SUSPENDED(TEXASR) \\\n" |
20622 | " _TEXASR_EXTRACT_BITS(TEXASR, 32, 1)\n" |
20623 | "\n" |
20624 | "#define _TEXASR_PRIVILEGE(TEXASR) \\\n" |
20625 | " _TEXASR_EXTRACT_BITS(TEXASR, 35, 2)\n" |
20626 | "\n" |
20627 | "#define _TEXASR_FAILURE_SUMMARY(TEXASR) \\\n" |
20628 | " _TEXASR_EXTRACT_BITS(TEXASR, 36, 1)\n" |
20629 | "\n" |
20630 | "#define _TEXASR_TFIAR_EXACT(TEXASR) \\\n" |
20631 | " _TEXASR_EXTRACT_BITS(TEXASR, 37, 1)\n" |
20632 | "\n" |
20633 | "#define _TEXASR_ROT(TEXASR) \\\n" |
20634 | " _TEXASR_EXTRACT_BITS(TEXASR, 38, 1)\n" |
20635 | "\n" |
20636 | "#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \\\n" |
20637 | " _TEXASR_EXTRACT_BITS(TEXASR, 63, 12)\n" |
20638 | "\n" |
20639 | "#endif /* __powerpc */\n" |
20640 | "\n" |
20641 | "#ifdef __s390__\n" |
20642 | "\n" |
20643 | "/* Condition codes generated by tbegin */\n" |
20644 | "#define _HTM_TBEGIN_STARTED 0\n" |
20645 | "#define _HTM_TBEGIN_INDETERMINATE 1\n" |
20646 | "#define _HTM_TBEGIN_TRANSIENT 2\n" |
20647 | "#define _HTM_TBEGIN_PERSISTENT 3\n" |
20648 | "\n" |
20649 | "/* The abort codes below this threshold are reserved for machine use. */\n" |
20650 | "#define _HTM_FIRST_USER_ABORT_CODE 256\n" |
20651 | "\n" |
20652 | "/* The transaction diagnostic block is it is defined in the Principles\n" |
20653 | " of Operation chapter 5-91. */\n" |
20654 | "\n" |
20655 | "struct __htm_tdb {\n" |
20656 | " unsigned char format; /* 0 */\n" |
20657 | " unsigned char flags;\n" |
20658 | " unsigned char reserved1[4];\n" |
20659 | " unsigned short nesting_depth;\n" |
20660 | " unsigned long long abort_code; /* 8 */\n" |
20661 | " unsigned long long conflict_token; /* 16 */\n" |
20662 | " unsigned long long atia; /* 24 */\n" |
20663 | " unsigned char eaid; /* 32 */\n" |
20664 | " unsigned char dxc;\n" |
20665 | " unsigned char reserved2[2];\n" |
20666 | " unsigned int program_int_id;\n" |
20667 | " unsigned long long exception_id; /* 40 */\n" |
20668 | " unsigned long long bea; /* 48 */\n" |
20669 | " unsigned char reserved3[72]; /* 56 */\n" |
20670 | " unsigned long long gprs[16]; /* 128 */\n" |
20671 | "} __attribute__((__packed__, __aligned__ (8)));\n" |
20672 | "\n" |
20673 | "\n" |
20674 | "/* Helper intrinsics to retry tbegin in case of transient failure. */\n" |
20675 | "\n" |
20676 | "static __inline int __attribute__((__always_inline__, __nodebug__))\n" |
20677 | "__builtin_tbegin_retry_null (int __retry)\n" |
20678 | "{\n" |
20679 | " int cc, i = 0;\n" |
20680 | "\n" |
20681 | " while ((cc = __builtin_tbegin(0)) == _HTM_TBEGIN_TRANSIENT\n" |
20682 | " && i++ < __retry)\n" |
20683 | " __builtin_tx_assist(i);\n" |
20684 | "\n" |
20685 | " return cc;\n" |
20686 | "}\n" |
20687 | "\n" |
20688 | "static __inline int __attribute__((__always_inline__, __nodebug__))\n" |
20689 | "__builtin_tbegin_retry_tdb (void *__tdb, int __retry)\n" |
20690 | "{\n" |
20691 | " int cc, i = 0;\n" |
20692 | "\n" |
20693 | " while ((cc = __builtin_tbegin(__tdb)) == _HTM_TBEGIN_TRANSIENT\n" |
20694 | " && i++ < __retry)\n" |
20695 | " __builtin_tx_assist(i);\n" |
20696 | "\n" |
20697 | " return cc;\n" |
20698 | "}\n" |
20699 | "\n" |
20700 | "#define __builtin_tbegin_retry(tdb, retry) \\\n" |
20701 | " (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n" |
20702 | " __builtin_tbegin_retry_null(retry) : \\\n" |
20703 | " __builtin_tbegin_retry_tdb(tdb, retry))\n" |
20704 | "\n" |
20705 | "static __inline int __attribute__((__always_inline__, __nodebug__))\n" |
20706 | "__builtin_tbegin_retry_nofloat_null (int __retry)\n" |
20707 | "{\n" |
20708 | " int cc, i = 0;\n" |
20709 | "\n" |
20710 | " while ((cc = __builtin_tbegin_nofloat(0)) == _HTM_TBEGIN_TRANSIENT\n" |
20711 | " && i++ < __retry)\n" |
20712 | " __builtin_tx_assist(i);\n" |
20713 | "\n" |
20714 | " return cc;\n" |
20715 | "}\n" |
20716 | "\n" |
20717 | "static __inline int __attribute__((__always_inline__, __nodebug__))\n" |
20718 | "__builtin_tbegin_retry_nofloat_tdb (void *__tdb, int __retry)\n" |
20719 | "{\n" |
20720 | " int cc, i = 0;\n" |
20721 | "\n" |
20722 | " while ((cc = __builtin_tbegin_nofloat(__tdb)) == _HTM_TBEGIN_TRANSIENT\n" |
20723 | " && i++ < __retry)\n" |
20724 | " __builtin_tx_assist(i);\n" |
20725 | "\n" |
20726 | " return cc;\n" |
20727 | "}\n" |
20728 | "\n" |
20729 | "#define __builtin_tbegin_retry_nofloat(tdb, retry) \\\n" |
20730 | " (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n" |
20731 | " __builtin_tbegin_retry_nofloat_null(retry) : \\\n" |
20732 | " __builtin_tbegin_retry_nofloat_tdb(tdb, retry))\n" |
20733 | "\n" |
20734 | "#endif /* __s390__ */\n" |
20735 | "\n" |
20736 | "#endif /* __HTMINTRIN_H */\n" |
20737 | "" } , |
20738 | { "/builtins/htmxlintrin.h" , "/*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\\\n" |
20739 | " *\n" |
20740 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
20741 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
20742 | " * in the Software without restriction, including without limitation the rights\n" |
20743 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
20744 | " * copies of the Software, and to permit persons to whom the Software is\n" |
20745 | " * furnished to do so, subject to the following conditions:\n" |
20746 | " *\n" |
20747 | " * The above copyright notice and this permission notice shall be included in\n" |
20748 | " * all copies or substantial portions of the Software.\n" |
20749 | " *\n" |
20750 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
20751 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
20752 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
20753 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
20754 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
20755 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
20756 | " * THE SOFTWARE.\n" |
20757 | " *\n" |
20758 | "\\*===----------------------------------------------------------------------===*/\n" |
20759 | "\n" |
20760 | "#ifndef __HTMXLINTRIN_H\n" |
20761 | "#define __HTMXLINTRIN_H\n" |
20762 | "\n" |
20763 | "#ifndef __HTM__\n" |
20764 | "#error \"HTM instruction set not enabled\"\n" |
20765 | "#endif\n" |
20766 | "\n" |
20767 | "#include <htmintrin.h>\n" |
20768 | "\n" |
20769 | "#ifdef __powerpc__\n" |
20770 | "\n" |
20771 | "#ifdef __cplusplus\n" |
20772 | "extern \"C\" {\n" |
20773 | "#endif\n" |
20774 | "\n" |
20775 | "#define _TEXASR_PTR(TM_BUF) ((texasr_t *)((char *)(TM_BUF) + 0))\n" |
20776 | "#define _TEXASRU_PTR(TM_BUF) ((texasru_t *)((char *)(TM_BUF) + 0))\n" |
20777 | "#define _TEXASRL_PTR(TM_BUF) ((texasrl_t *)((char *)(TM_BUF) + 4))\n" |
20778 | "#define _TFIAR_PTR(TM_BUF) ((tfiar_t *)((char *)(TM_BUF) + 8))\n" |
20779 | "\n" |
20780 | "typedef char TM_buff_type[16];\n" |
20781 | "\n" |
20782 | "/* This macro can be used to determine whether a transaction was successfully\n" |
20783 | " started from the __TM_begin() and __TM_simple_begin() intrinsic functions\n" |
20784 | " below. */\n" |
20785 | "#define _HTM_TBEGIN_STARTED 1\n" |
20786 | "\n" |
20787 | "extern __inline long\n" |
20788 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20789 | "__TM_simple_begin (void)\n" |
20790 | "{\n" |
20791 | " if (__builtin_expect (__builtin_tbegin (0), 1))\n" |
20792 | " return _HTM_TBEGIN_STARTED;\n" |
20793 | " return 0;\n" |
20794 | "}\n" |
20795 | "\n" |
20796 | "extern __inline long\n" |
20797 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20798 | "__TM_begin (void* const __TM_buff)\n" |
20799 | "{\n" |
20800 | " *_TEXASRL_PTR (__TM_buff) = 0;\n" |
20801 | " if (__builtin_expect (__builtin_tbegin (0), 1))\n" |
20802 | " return _HTM_TBEGIN_STARTED;\n" |
20803 | "#ifdef __powerpc64__\n" |
20804 | " *_TEXASR_PTR (__TM_buff) = __builtin_get_texasr ();\n" |
20805 | "#else\n" |
20806 | " *_TEXASRU_PTR (__TM_buff) = __builtin_get_texasru ();\n" |
20807 | " *_TEXASRL_PTR (__TM_buff) = __builtin_get_texasr ();\n" |
20808 | "#endif\n" |
20809 | " *_TFIAR_PTR (__TM_buff) = __builtin_get_tfiar ();\n" |
20810 | " return 0;\n" |
20811 | "}\n" |
20812 | "\n" |
20813 | "extern __inline long\n" |
20814 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20815 | "__TM_end (void)\n" |
20816 | "{\n" |
20817 | " if (__builtin_expect (__builtin_tend (0), 1))\n" |
20818 | " return 1;\n" |
20819 | " return 0;\n" |
20820 | "}\n" |
20821 | "\n" |
20822 | "extern __inline void\n" |
20823 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20824 | "__TM_abort (void)\n" |
20825 | "{\n" |
20826 | " __builtin_tabort (0);\n" |
20827 | "}\n" |
20828 | "\n" |
20829 | "extern __inline void\n" |
20830 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20831 | "__TM_named_abort (unsigned char const __code)\n" |
20832 | "{\n" |
20833 | " __builtin_tabort (__code);\n" |
20834 | "}\n" |
20835 | "\n" |
20836 | "extern __inline void\n" |
20837 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20838 | "__TM_resume (void)\n" |
20839 | "{\n" |
20840 | " __builtin_tresume ();\n" |
20841 | "}\n" |
20842 | "\n" |
20843 | "extern __inline void\n" |
20844 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20845 | "__TM_suspend (void)\n" |
20846 | "{\n" |
20847 | " __builtin_tsuspend ();\n" |
20848 | "}\n" |
20849 | "\n" |
20850 | "extern __inline long\n" |
20851 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20852 | "__TM_is_user_abort (void* const __TM_buff)\n" |
20853 | "{\n" |
20854 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
20855 | " return _TEXASRU_ABORT (texasru);\n" |
20856 | "}\n" |
20857 | "\n" |
20858 | "extern __inline long\n" |
20859 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20860 | "__TM_is_named_user_abort (void* const __TM_buff, unsigned char *__code)\n" |
20861 | "{\n" |
20862 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
20863 | "\n" |
20864 | " *__code = _TEXASRU_FAILURE_CODE (texasru);\n" |
20865 | " return _TEXASRU_ABORT (texasru);\n" |
20866 | "}\n" |
20867 | "\n" |
20868 | "extern __inline long\n" |
20869 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20870 | "__TM_is_illegal (void* const __TM_buff)\n" |
20871 | "{\n" |
20872 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
20873 | " return _TEXASRU_DISALLOWED (texasru);\n" |
20874 | "}\n" |
20875 | "\n" |
20876 | "extern __inline long\n" |
20877 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20878 | "__TM_is_footprint_exceeded (void* const __TM_buff)\n" |
20879 | "{\n" |
20880 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
20881 | " return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);\n" |
20882 | "}\n" |
20883 | "\n" |
20884 | "extern __inline long\n" |
20885 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20886 | "__TM_nesting_depth (void* const __TM_buff)\n" |
20887 | "{\n" |
20888 | " texasrl_t texasrl;\n" |
20889 | "\n" |
20890 | " if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)\n" |
20891 | " {\n" |
20892 | " texasrl = *_TEXASRL_PTR (__TM_buff);\n" |
20893 | " if (!_TEXASR_FAILURE_SUMMARY (texasrl))\n" |
20894 | " texasrl = 0;\n" |
20895 | " }\n" |
20896 | " else\n" |
20897 | " texasrl = (texasrl_t) __builtin_get_texasr ();\n" |
20898 | "\n" |
20899 | " return _TEXASR_TRANSACTION_LEVEL (texasrl);\n" |
20900 | "}\n" |
20901 | "\n" |
20902 | "extern __inline long\n" |
20903 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20904 | "__TM_is_nested_too_deep(void* const __TM_buff)\n" |
20905 | "{\n" |
20906 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
20907 | " return _TEXASRU_NESTING_OVERFLOW (texasru);\n" |
20908 | "}\n" |
20909 | "\n" |
20910 | "extern __inline long\n" |
20911 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20912 | "__TM_is_conflict(void* const __TM_buff)\n" |
20913 | "{\n" |
20914 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
20915 | " /* Return TEXASR bits 11 (Self-Induced Conflict) through\n" |
20916 | " 14 (Translation Invalidation Conflict). */\n" |
20917 | " return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0;\n" |
20918 | "}\n" |
20919 | "\n" |
20920 | "extern __inline long\n" |
20921 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20922 | "__TM_is_failure_persistent(void* const __TM_buff)\n" |
20923 | "{\n" |
20924 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
20925 | " return _TEXASRU_FAILURE_PERSISTENT (texasru);\n" |
20926 | "}\n" |
20927 | "\n" |
20928 | "extern __inline long\n" |
20929 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20930 | "__TM_failure_address(void* const __TM_buff)\n" |
20931 | "{\n" |
20932 | " return *_TFIAR_PTR (__TM_buff);\n" |
20933 | "}\n" |
20934 | "\n" |
20935 | "extern __inline long long\n" |
20936 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
20937 | "__TM_failure_code(void* const __TM_buff)\n" |
20938 | "{\n" |
20939 | " return *_TEXASR_PTR (__TM_buff);\n" |
20940 | "}\n" |
20941 | "\n" |
20942 | "#ifdef __cplusplus\n" |
20943 | "}\n" |
20944 | "#endif\n" |
20945 | "\n" |
20946 | "#endif /* __powerpc__ */\n" |
20947 | "\n" |
20948 | "#ifdef __s390__\n" |
20949 | "\n" |
20950 | "#include <stdint.h>\n" |
20951 | "\n" |
20952 | "/* These intrinsics are being made available for compatibility with\n" |
20953 | " the IBM XL compiler. For documentation please see the \"z/OS XL\n" |
20954 | " C/C++ Programming Guide\" publicly available on the web. */\n" |
20955 | "\n" |
20956 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
20957 | "__TM_simple_begin ()\n" |
20958 | "{\n" |
20959 | " return __builtin_tbegin_nofloat (0);\n" |
20960 | "}\n" |
20961 | "\n" |
20962 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
20963 | "__TM_begin (void* const __tdb)\n" |
20964 | "{\n" |
20965 | " return __builtin_tbegin_nofloat (__tdb);\n" |
20966 | "}\n" |
20967 | "\n" |
20968 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
20969 | "__TM_end ()\n" |
20970 | "{\n" |
20971 | " return __builtin_tend ();\n" |
20972 | "}\n" |
20973 | "\n" |
20974 | "static __inline void __attribute__((__always_inline__))\n" |
20975 | "__TM_abort ()\n" |
20976 | "{\n" |
20977 | " return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE);\n" |
20978 | "}\n" |
20979 | "\n" |
20980 | "static __inline void __attribute__((__always_inline__, __nodebug__))\n" |
20981 | "__TM_named_abort (unsigned char const __code)\n" |
20982 | "{\n" |
20983 | " return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + __code);\n" |
20984 | "}\n" |
20985 | "\n" |
20986 | "static __inline void __attribute__((__always_inline__, __nodebug__))\n" |
20987 | "__TM_non_transactional_store (void* const __addr, long long const __value)\n" |
20988 | "{\n" |
20989 | " __builtin_non_tx_store ((uint64_t*)__addr, (uint64_t)__value);\n" |
20990 | "}\n" |
20991 | "\n" |
20992 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
20993 | "__TM_nesting_depth (void* const __tdb_ptr)\n" |
20994 | "{\n" |
20995 | " int depth = __builtin_tx_nesting_depth ();\n" |
20996 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
20997 | "\n" |
20998 | " if (depth != 0)\n" |
20999 | " return depth;\n" |
21000 | "\n" |
21001 | " if (tdb->format != 1)\n" |
21002 | " return 0;\n" |
21003 | " return tdb->nesting_depth;\n" |
21004 | "}\n" |
21005 | "\n" |
21006 | "/* Transaction failure diagnostics */\n" |
21007 | "\n" |
21008 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
21009 | "__TM_is_user_abort (void* const __tdb_ptr)\n" |
21010 | "{\n" |
21011 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
21012 | "\n" |
21013 | " if (tdb->format != 1)\n" |
21014 | " return 0;\n" |
21015 | "\n" |
21016 | " return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);\n" |
21017 | "}\n" |
21018 | "\n" |
21019 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
21020 | "__TM_is_named_user_abort (void* const __tdb_ptr, unsigned char* __code)\n" |
21021 | "{\n" |
21022 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
21023 | "\n" |
21024 | " if (tdb->format != 1)\n" |
21025 | " return 0;\n" |
21026 | "\n" |
21027 | " if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)\n" |
21028 | " {\n" |
21029 | " *__code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;\n" |
21030 | " return 1;\n" |
21031 | " }\n" |
21032 | " return 0;\n" |
21033 | "}\n" |
21034 | "\n" |
21035 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
21036 | "__TM_is_illegal (void* const __tdb_ptr)\n" |
21037 | "{\n" |
21038 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
21039 | "\n" |
21040 | " return (tdb->format == 1\n" |
21041 | " && (tdb->abort_code == 4 /* unfiltered program interruption */\n" |
21042 | " || tdb->abort_code == 11 /* restricted instruction */));\n" |
21043 | "}\n" |
21044 | "\n" |
21045 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
21046 | "__TM_is_footprint_exceeded (void* const __tdb_ptr)\n" |
21047 | "{\n" |
21048 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
21049 | "\n" |
21050 | " return (tdb->format == 1\n" |
21051 | " && (tdb->abort_code == 7 /* fetch overflow */\n" |
21052 | " || tdb->abort_code == 8 /* store overflow */));\n" |
21053 | "}\n" |
21054 | "\n" |
21055 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
21056 | "__TM_is_nested_too_deep (void* const __tdb_ptr)\n" |
21057 | "{\n" |
21058 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
21059 | "\n" |
21060 | " return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */\n" |
21061 | "}\n" |
21062 | "\n" |
21063 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
21064 | "__TM_is_conflict (void* const __tdb_ptr)\n" |
21065 | "{\n" |
21066 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
21067 | "\n" |
21068 | " return (tdb->format == 1\n" |
21069 | " && (tdb->abort_code == 9 /* fetch conflict */\n" |
21070 | " || tdb->abort_code == 10 /* store conflict */));\n" |
21071 | "}\n" |
21072 | "\n" |
21073 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
21074 | "__TM_is_failure_persistent (long const __result)\n" |
21075 | "{\n" |
21076 | " return __result == _HTM_TBEGIN_PERSISTENT;\n" |
21077 | "}\n" |
21078 | "\n" |
21079 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
21080 | "__TM_failure_address (void* const __tdb_ptr)\n" |
21081 | "{\n" |
21082 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
21083 | " return tdb->atia;\n" |
21084 | "}\n" |
21085 | "\n" |
21086 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
21087 | "__TM_failure_code (void* const __tdb_ptr)\n" |
21088 | "{\n" |
21089 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
21090 | "\n" |
21091 | " return tdb->abort_code;\n" |
21092 | "}\n" |
21093 | "\n" |
21094 | "#endif /* __s390__ */\n" |
21095 | "\n" |
21096 | "#endif /* __HTMXLINTRIN_H */\n" |
21097 | "" } , |
21098 | { "/builtins/ia32intrin.h" , "/* ===-------- ia32intrin.h ---------------------------------------------------===\n" |
21099 | " *\n" |
21100 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
21101 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
21102 | " * in the Software without restriction, including without limitation the rights\n" |
21103 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
21104 | " * copies of the Software, and to permit persons to whom the Software is\n" |
21105 | " * furnished to do so, subject to the following conditions:\n" |
21106 | " *\n" |
21107 | " * The above copyright notice and this permission notice shall be included in\n" |
21108 | " * all copies or substantial portions of the Software.\n" |
21109 | " *\n" |
21110 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
21111 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
21112 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
21113 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
21114 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
21115 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
21116 | " * THE SOFTWARE.\n" |
21117 | " *\n" |
21118 | " *===-----------------------------------------------------------------------===\n" |
21119 | " */\n" |
21120 | "\n" |
21121 | "#ifndef __X86INTRIN_H\n" |
21122 | "#error \"Never use <ia32intrin.h> directly; include <x86intrin.h> instead.\"\n" |
21123 | "#endif\n" |
21124 | "\n" |
21125 | "#ifndef __IA32INTRIN_H\n" |
21126 | "#define __IA32INTRIN_H\n" |
21127 | "\n" |
21128 | "#ifdef __x86_64__\n" |
21129 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n" |
21130 | "__readeflags(void)\n" |
21131 | "{\n" |
21132 | " return __builtin_ia32_readeflags_u64();\n" |
21133 | "}\n" |
21134 | "\n" |
21135 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
21136 | "__writeeflags(unsigned long long __f)\n" |
21137 | "{\n" |
21138 | " __builtin_ia32_writeeflags_u64(__f);\n" |
21139 | "}\n" |
21140 | "\n" |
21141 | "#else /* !__x86_64__ */\n" |
21142 | "static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))\n" |
21143 | "__readeflags(void)\n" |
21144 | "{\n" |
21145 | " return __builtin_ia32_readeflags_u32();\n" |
21146 | "}\n" |
21147 | "\n" |
21148 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
21149 | "__writeeflags(unsigned int __f)\n" |
21150 | "{\n" |
21151 | " __builtin_ia32_writeeflags_u32(__f);\n" |
21152 | "}\n" |
21153 | "#endif /* !__x86_64__ */\n" |
21154 | "\n" |
21155 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n" |
21156 | "__rdpmc(int __A) {\n" |
21157 | " return __builtin_ia32_rdpmc(__A);\n" |
21158 | "}\n" |
21159 | "\n" |
21160 | "/* __rdtscp */\n" |
21161 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n" |
21162 | "__rdtscp(unsigned int *__A) {\n" |
21163 | " return __builtin_ia32_rdtscp(__A);\n" |
21164 | "}\n" |
21165 | "\n" |
21166 | "#define _rdtsc() __rdtsc()\n" |
21167 | "\n" |
21168 | "#define _rdpmc(A) __rdpmc(A)\n" |
21169 | "\n" |
21170 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
21171 | "_wbinvd(void) {\n" |
21172 | " __builtin_ia32_wbinvd();\n" |
21173 | "}\n" |
21174 | "\n" |
21175 | "#endif /* __IA32INTRIN_H */\n" |
21176 | "" } , |
21177 | { "/builtins/immintrin.h" , "/*===---- immintrin.h - Intel intrinsics -----------------------------------===\n" |
21178 | " *\n" |
21179 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
21180 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
21181 | " * in the Software without restriction, including without limitation the rights\n" |
21182 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
21183 | " * copies of the Software, and to permit persons to whom the Software is\n" |
21184 | " * furnished to do so, subject to the following conditions:\n" |
21185 | " *\n" |
21186 | " * The above copyright notice and this permission notice shall be included in\n" |
21187 | " * all copies or substantial portions of the Software.\n" |
21188 | " *\n" |
21189 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
21190 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
21191 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
21192 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
21193 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
21194 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
21195 | " * THE SOFTWARE.\n" |
21196 | " *\n" |
21197 | " *===-----------------------------------------------------------------------===\n" |
21198 | " */\n" |
21199 | "\n" |
21200 | "#ifndef __IMMINTRIN_H\n" |
21201 | "#define __IMMINTRIN_H\n" |
21202 | "\n" |
21203 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)\n" |
21204 | "#include <mmintrin.h>\n" |
21205 | "#endif\n" |
21206 | "\n" |
21207 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)\n" |
21208 | "#include <xmmintrin.h>\n" |
21209 | "#endif\n" |
21210 | "\n" |
21211 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)\n" |
21212 | "#include <emmintrin.h>\n" |
21213 | "#endif\n" |
21214 | "\n" |
21215 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)\n" |
21216 | "#include <pmmintrin.h>\n" |
21217 | "#endif\n" |
21218 | "\n" |
21219 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)\n" |
21220 | "#include <tmmintrin.h>\n" |
21221 | "#endif\n" |
21222 | "\n" |
21223 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21224 | " (defined(__SSE4_2__) || defined(__SSE4_1__))\n" |
21225 | "#include <smmintrin.h>\n" |
21226 | "#endif\n" |
21227 | "\n" |
21228 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21229 | " (defined(__AES__) || defined(__PCLMUL__))\n" |
21230 | "#include <wmmintrin.h>\n" |
21231 | "#endif\n" |
21232 | "\n" |
21233 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)\n" |
21234 | "#include <clflushoptintrin.h>\n" |
21235 | "#endif\n" |
21236 | "\n" |
21237 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)\n" |
21238 | "#include <clwbintrin.h>\n" |
21239 | "#endif\n" |
21240 | "\n" |
21241 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)\n" |
21242 | "#include <avxintrin.h>\n" |
21243 | "#endif\n" |
21244 | "\n" |
21245 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)\n" |
21246 | "#include <avx2intrin.h>\n" |
21247 | "#endif\n" |
21248 | "\n" |
21249 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)\n" |
21250 | "#include <f16cintrin.h>\n" |
21251 | "#endif\n" |
21252 | "\n" |
21253 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)\n" |
21254 | "#include <vpclmulqdqintrin.h>\n" |
21255 | "#endif\n" |
21256 | "\n" |
21257 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)\n" |
21258 | "#include <bmiintrin.h>\n" |
21259 | "#endif\n" |
21260 | "\n" |
21261 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)\n" |
21262 | "#include <bmi2intrin.h>\n" |
21263 | "#endif\n" |
21264 | "\n" |
21265 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)\n" |
21266 | "#include <lzcntintrin.h>\n" |
21267 | "#endif\n" |
21268 | "\n" |
21269 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)\n" |
21270 | "#include <popcntintrin.h>\n" |
21271 | "#endif\n" |
21272 | "\n" |
21273 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)\n" |
21274 | "#include <fmaintrin.h>\n" |
21275 | "#endif\n" |
21276 | "\n" |
21277 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)\n" |
21278 | "#include <avx512fintrin.h>\n" |
21279 | "#endif\n" |
21280 | "\n" |
21281 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)\n" |
21282 | "#include <avx512vlintrin.h>\n" |
21283 | "#endif\n" |
21284 | "\n" |
21285 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)\n" |
21286 | "#include <avx512bwintrin.h>\n" |
21287 | "#endif\n" |
21288 | "\n" |
21289 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)\n" |
21290 | "#include <avx512bitalgintrin.h>\n" |
21291 | "#endif\n" |
21292 | "\n" |
21293 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)\n" |
21294 | "#include <avx512cdintrin.h>\n" |
21295 | "#endif\n" |
21296 | "\n" |
21297 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)\n" |
21298 | "#include <avx512vpopcntdqintrin.h>\n" |
21299 | "#endif\n" |
21300 | "\n" |
21301 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21302 | " (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))\n" |
21303 | "#include <avx512vpopcntdqvlintrin.h>\n" |
21304 | "#endif\n" |
21305 | "\n" |
21306 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)\n" |
21307 | "#include <avx512vnniintrin.h>\n" |
21308 | "#endif\n" |
21309 | "\n" |
21310 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21311 | " (defined(__AVX512VL__) && defined(__AVX512VNNI__))\n" |
21312 | "#include <avx512vlvnniintrin.h>\n" |
21313 | "#endif\n" |
21314 | "\n" |
21315 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)\n" |
21316 | "#include <avx512dqintrin.h>\n" |
21317 | "#endif\n" |
21318 | "\n" |
21319 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21320 | " (defined(__AVX512VL__) && defined(__AVX512BITALG__))\n" |
21321 | "#include <avx512vlbitalgintrin.h>\n" |
21322 | "#endif\n" |
21323 | "\n" |
21324 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21325 | " (defined(__AVX512VL__) && defined(__AVX512BW__))\n" |
21326 | "#include <avx512vlbwintrin.h>\n" |
21327 | "#endif\n" |
21328 | "\n" |
21329 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21330 | " (defined(__AVX512VL__) && defined(__AVX512CD__))\n" |
21331 | "#include <avx512vlcdintrin.h>\n" |
21332 | "#endif\n" |
21333 | "\n" |
21334 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21335 | " (defined(__AVX512VL__) && defined(__AVX512DQ__))\n" |
21336 | "#include <avx512vldqintrin.h>\n" |
21337 | "#endif\n" |
21338 | "\n" |
21339 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)\n" |
21340 | "#include <avx512erintrin.h>\n" |
21341 | "#endif\n" |
21342 | "\n" |
21343 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)\n" |
21344 | "#include <avx512ifmaintrin.h>\n" |
21345 | "#endif\n" |
21346 | "\n" |
21347 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21348 | " (defined(__AVX512IFMA__) && defined(__AVX512VL__))\n" |
21349 | "#include <avx512ifmavlintrin.h>\n" |
21350 | "#endif\n" |
21351 | "\n" |
21352 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)\n" |
21353 | "#include <avx512vbmiintrin.h>\n" |
21354 | "#endif\n" |
21355 | "\n" |
21356 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21357 | " (defined(__AVX512VBMI__) && defined(__AVX512VL__))\n" |
21358 | "#include <avx512vbmivlintrin.h>\n" |
21359 | "#endif\n" |
21360 | "\n" |
21361 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)\n" |
21362 | "#include <avx512vbmi2intrin.h>\n" |
21363 | "#endif\n" |
21364 | "\n" |
21365 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21366 | " (defined(__AVX512VBMI2__) && defined(__AVX512VL__))\n" |
21367 | "#include <avx512vlvbmi2intrin.h>\n" |
21368 | "#endif\n" |
21369 | "\n" |
21370 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)\n" |
21371 | "#include <avx512pfintrin.h>\n" |
21372 | "#endif\n" |
21373 | "\n" |
21374 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)\n" |
21375 | "#include <pkuintrin.h>\n" |
21376 | "#endif\n" |
21377 | "\n" |
21378 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)\n" |
21379 | "#include <vaesintrin.h>\n" |
21380 | "#endif\n" |
21381 | "\n" |
21382 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)\n" |
21383 | "#include <gfniintrin.h>\n" |
21384 | "#endif\n" |
21385 | "\n" |
21386 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)\n" |
21387 | "/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).\n" |
21388 | "///\n" |
21389 | "/// \\headerfile <immintrin.h>\n" |
21390 | "///\n" |
21391 | "/// This intrinsic corresponds to the <c> RDPID </c> instruction.\n" |
21392 | "static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"rdpid\")))\n" |
21393 | "_rdpid_u32(void) {\n" |
21394 | " return __builtin_ia32_rdpid();\n" |
21395 | "}\n" |
21396 | "#endif // __RDPID__\n" |
21397 | "\n" |
21398 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)\n" |
21399 | "static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n" |
21400 | "_rdrand16_step(unsigned short *__p)\n" |
21401 | "{\n" |
21402 | " return __builtin_ia32_rdrand16_step(__p);\n" |
21403 | "}\n" |
21404 | "\n" |
21405 | "static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n" |
21406 | "_rdrand32_step(unsigned int *__p)\n" |
21407 | "{\n" |
21408 | " return __builtin_ia32_rdrand32_step(__p);\n" |
21409 | "}\n" |
21410 | "\n" |
21411 | "#ifdef __x86_64__\n" |
21412 | "static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n" |
21413 | "_rdrand64_step(unsigned long long *__p)\n" |
21414 | "{\n" |
21415 | " return __builtin_ia32_rdrand64_step(__p);\n" |
21416 | "}\n" |
21417 | "#endif\n" |
21418 | "#endif /* __RDRND__ */\n" |
21419 | "\n" |
21420 | "/* __bit_scan_forward */\n" |
21421 | "static __inline__ int __attribute__((__always_inline__, __nodebug__))\n" |
21422 | "_bit_scan_forward(int __A) {\n" |
21423 | " return __builtin_ctz(__A);\n" |
21424 | "}\n" |
21425 | "\n" |
21426 | "/* __bit_scan_reverse */\n" |
21427 | "static __inline__ int __attribute__((__always_inline__, __nodebug__))\n" |
21428 | "_bit_scan_reverse(int __A) {\n" |
21429 | " return 31 - __builtin_clz(__A);\n" |
21430 | "}\n" |
21431 | "\n" |
21432 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)\n" |
21433 | "#ifdef __x86_64__\n" |
21434 | "static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
21435 | "_readfsbase_u32(void)\n" |
21436 | "{\n" |
21437 | " return __builtin_ia32_rdfsbase32();\n" |
21438 | "}\n" |
21439 | "\n" |
21440 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
21441 | "_readfsbase_u64(void)\n" |
21442 | "{\n" |
21443 | " return __builtin_ia32_rdfsbase64();\n" |
21444 | "}\n" |
21445 | "\n" |
21446 | "static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
21447 | "_readgsbase_u32(void)\n" |
21448 | "{\n" |
21449 | " return __builtin_ia32_rdgsbase32();\n" |
21450 | "}\n" |
21451 | "\n" |
21452 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
21453 | "_readgsbase_u64(void)\n" |
21454 | "{\n" |
21455 | " return __builtin_ia32_rdgsbase64();\n" |
21456 | "}\n" |
21457 | "\n" |
21458 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
21459 | "_writefsbase_u32(unsigned int __V)\n" |
21460 | "{\n" |
21461 | " __builtin_ia32_wrfsbase32(__V);\n" |
21462 | "}\n" |
21463 | "\n" |
21464 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
21465 | "_writefsbase_u64(unsigned long long __V)\n" |
21466 | "{\n" |
21467 | " __builtin_ia32_wrfsbase64(__V);\n" |
21468 | "}\n" |
21469 | "\n" |
21470 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
21471 | "_writegsbase_u32(unsigned int __V)\n" |
21472 | "{\n" |
21473 | " __builtin_ia32_wrgsbase32(__V);\n" |
21474 | "}\n" |
21475 | "\n" |
21476 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
21477 | "_writegsbase_u64(unsigned long long __V)\n" |
21478 | "{\n" |
21479 | " __builtin_ia32_wrgsbase64(__V);\n" |
21480 | "}\n" |
21481 | "\n" |
21482 | "#endif\n" |
21483 | "#endif /* __FSGSBASE__ */\n" |
21484 | "\n" |
21485 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)\n" |
21486 | "\n" |
21487 | "/* The structs used below are to force the load/store to be unaligned. This\n" |
21488 | " * is accomplished with the __packed__ attribute. The __may_alias__ prevents\n" |
21489 | " * tbaa metadata from being generated based on the struct and the type of the\n" |
21490 | " * field inside of it.\n" |
21491 | " */\n" |
21492 | "\n" |
21493 | "static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n" |
21494 | "_loadbe_i16(void const * __P) {\n" |
21495 | " struct __loadu_i16 {\n" |
21496 | " short __v;\n" |
21497 | " } __attribute__((__packed__, __may_alias__));\n" |
21498 | " return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);\n" |
21499 | "}\n" |
21500 | "\n" |
21501 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n" |
21502 | "_storebe_i16(void * __P, short __D) {\n" |
21503 | " struct __storeu_i16 {\n" |
21504 | " short __v;\n" |
21505 | " } __attribute__((__packed__, __may_alias__));\n" |
21506 | " ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);\n" |
21507 | "}\n" |
21508 | "\n" |
21509 | "static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n" |
21510 | "_loadbe_i32(void const * __P) {\n" |
21511 | " struct __loadu_i32 {\n" |
21512 | " int __v;\n" |
21513 | " } __attribute__((__packed__, __may_alias__));\n" |
21514 | " return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);\n" |
21515 | "}\n" |
21516 | "\n" |
21517 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n" |
21518 | "_storebe_i32(void * __P, int __D) {\n" |
21519 | " struct __storeu_i32 {\n" |
21520 | " int __v;\n" |
21521 | " } __attribute__((__packed__, __may_alias__));\n" |
21522 | " ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);\n" |
21523 | "}\n" |
21524 | "\n" |
21525 | "#ifdef __x86_64__\n" |
21526 | "static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n" |
21527 | "_loadbe_i64(void const * __P) {\n" |
21528 | " struct __loadu_i64 {\n" |
21529 | " long long __v;\n" |
21530 | " } __attribute__((__packed__, __may_alias__));\n" |
21531 | " return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);\n" |
21532 | "}\n" |
21533 | "\n" |
21534 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n" |
21535 | "_storebe_i64(void * __P, long long __D) {\n" |
21536 | " struct __storeu_i64 {\n" |
21537 | " long long __v;\n" |
21538 | " } __attribute__((__packed__, __may_alias__));\n" |
21539 | " ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);\n" |
21540 | "}\n" |
21541 | "#endif\n" |
21542 | "#endif /* __MOVBE */\n" |
21543 | "\n" |
21544 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)\n" |
21545 | "#include <rtmintrin.h>\n" |
21546 | "#include <xtestintrin.h>\n" |
21547 | "#endif\n" |
21548 | "\n" |
21549 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)\n" |
21550 | "#include <shaintrin.h>\n" |
21551 | "#endif\n" |
21552 | "\n" |
21553 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)\n" |
21554 | "#include <fxsrintrin.h>\n" |
21555 | "#endif\n" |
21556 | "\n" |
21557 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)\n" |
21558 | "#include <xsaveintrin.h>\n" |
21559 | "#endif\n" |
21560 | "\n" |
21561 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)\n" |
21562 | "#include <xsaveoptintrin.h>\n" |
21563 | "#endif\n" |
21564 | "\n" |
21565 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)\n" |
21566 | "#include <xsavecintrin.h>\n" |
21567 | "#endif\n" |
21568 | "\n" |
21569 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)\n" |
21570 | "#include <xsavesintrin.h>\n" |
21571 | "#endif\n" |
21572 | "\n" |
21573 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)\n" |
21574 | "#include <cetintrin.h>\n" |
21575 | "#endif\n" |
21576 | "\n" |
21577 | "/* Some intrinsics inside adxintrin.h are available only on processors with ADX,\n" |
21578 | " * whereas others are also available at all times. */\n" |
21579 | "#include <adxintrin.h>\n" |
21580 | "\n" |
21581 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)\n" |
21582 | "#include <rdseedintrin.h>\n" |
21583 | "#endif\n" |
21584 | "\n" |
21585 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)\n" |
21586 | "#include <wbnoinvdintrin.h>\n" |
21587 | "#endif\n" |
21588 | "\n" |
21589 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)\n" |
21590 | "#include <cldemoteintrin.h>\n" |
21591 | "#endif\n" |
21592 | "\n" |
21593 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)\n" |
21594 | "#include <waitpkgintrin.h>\n" |
21595 | "#endif\n" |
21596 | "\n" |
21597 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
21598 | " defined(__MOVDIRI__) || defined(__MOVDIR64B__)\n" |
21599 | "#include <movdirintrin.h>\n" |
21600 | "#endif\n" |
21601 | "\n" |
21602 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)\n" |
21603 | "#include <pconfigintrin.h>\n" |
21604 | "#endif\n" |
21605 | "\n" |
21606 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)\n" |
21607 | "#include <sgxintrin.h>\n" |
21608 | "#endif\n" |
21609 | "\n" |
21610 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)\n" |
21611 | "#include <ptwriteintrin.h>\n" |
21612 | "#endif\n" |
21613 | "\n" |
21614 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)\n" |
21615 | "#include <invpcidintrin.h>\n" |
21616 | "#endif\n" |
21617 | "\n" |
21618 | "#ifdef _MSC_VER\n" |
21619 | "/* Define the default attributes for these intrinsics */\n" |
21620 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n" |
21621 | "#ifdef __cplusplus\n" |
21622 | "extern \"C\" {\n" |
21623 | "#endif\n" |
21624 | "/*----------------------------------------------------------------------------*\\\n" |
21625 | "|* Interlocked Exchange HLE\n" |
21626 | "\\*----------------------------------------------------------------------------*/\n" |
21627 | "#if defined(__i386__) || defined(__x86_64__)\n" |
21628 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
21629 | "_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {\n" |
21630 | " __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n" |
21631 | " : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n" |
21632 | " return _Value;\n" |
21633 | "}\n" |
21634 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
21635 | "_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {\n" |
21636 | " __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n" |
21637 | " : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n" |
21638 | " return _Value;\n" |
21639 | "}\n" |
21640 | "#endif\n" |
21641 | "#if defined(__x86_64__)\n" |
21642 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
21643 | "_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {\n" |
21644 | " __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n" |
21645 | " : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n" |
21646 | " return _Value;\n" |
21647 | "}\n" |
21648 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
21649 | "_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {\n" |
21650 | " __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n" |
21651 | " : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n" |
21652 | " return _Value;\n" |
21653 | "}\n" |
21654 | "#endif\n" |
21655 | "/*----------------------------------------------------------------------------*\\\n" |
21656 | "|* Interlocked Compare Exchange HLE\n" |
21657 | "\\*----------------------------------------------------------------------------*/\n" |
21658 | "#if defined(__i386__) || defined(__x86_64__)\n" |
21659 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
21660 | "_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,\n" |
21661 | " long _Exchange, long _Comparand) {\n" |
21662 | " __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n" |
21663 | " : \"+a\" (_Comparand), \"+m\" (*_Destination)\n" |
21664 | " : \"r\" (_Exchange) : \"memory\");\n" |
21665 | " return _Comparand;\n" |
21666 | "}\n" |
21667 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
21668 | "_InterlockedCompareExchange_HLERelease(long volatile *_Destination,\n" |
21669 | " long _Exchange, long _Comparand) {\n" |
21670 | " __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n" |
21671 | " : \"+a\" (_Comparand), \"+m\" (*_Destination)\n" |
21672 | " : \"r\" (_Exchange) : \"memory\");\n" |
21673 | " return _Comparand;\n" |
21674 | "}\n" |
21675 | "#endif\n" |
21676 | "#if defined(__x86_64__)\n" |
21677 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
21678 | "_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,\n" |
21679 | " __int64 _Exchange, __int64 _Comparand) {\n" |
21680 | " __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n" |
21681 | " : \"+a\" (_Comparand), \"+m\" (*_Destination)\n" |
21682 | " : \"r\" (_Exchange) : \"memory\");\n" |
21683 | " return _Comparand;\n" |
21684 | "}\n" |
21685 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
21686 | "_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,\n" |
21687 | " __int64 _Exchange, __int64 _Comparand) {\n" |
21688 | " __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n" |
21689 | " : \"+a\" (_Comparand), \"+m\" (*_Destination)\n" |
21690 | " : \"r\" (_Exchange) : \"memory\");\n" |
21691 | " return _Comparand;\n" |
21692 | "}\n" |
21693 | "#endif\n" |
21694 | "#ifdef __cplusplus\n" |
21695 | "}\n" |
21696 | "#endif\n" |
21697 | "\n" |
21698 | "#undef __DEFAULT_FN_ATTRS\n" |
21699 | "\n" |
21700 | "#endif /* _MSC_VER */\n" |
21701 | "\n" |
21702 | "#endif /* __IMMINTRIN_H */\n" |
21703 | "" } , |
21704 | { "/builtins/intrin.h" , "/* ===-------- intrin.h ---------------------------------------------------===\n" |
21705 | " *\n" |
21706 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
21707 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
21708 | " * in the Software without restriction, including without limitation the rights\n" |
21709 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
21710 | " * copies of the Software, and to permit persons to whom the Software is\n" |
21711 | " * furnished to do so, subject to the following conditions:\n" |
21712 | " *\n" |
21713 | " * The above copyright notice and this permission notice shall be included in\n" |
21714 | " * all copies or substantial portions of the Software.\n" |
21715 | " *\n" |
21716 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
21717 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
21718 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
21719 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
21720 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
21721 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
21722 | " * THE SOFTWARE.\n" |
21723 | " *\n" |
21724 | " *===-----------------------------------------------------------------------===\n" |
21725 | " */\n" |
21726 | "\n" |
21727 | "/* Only include this if we're compiling for the windows platform. */\n" |
21728 | "#ifndef _MSC_VER\n" |
21729 | "#include_next <intrin.h>\n" |
21730 | "#else\n" |
21731 | "\n" |
21732 | "#ifndef __INTRIN_H\n" |
21733 | "#define __INTRIN_H\n" |
21734 | "\n" |
21735 | "/* First include the standard intrinsics. */\n" |
21736 | "#if defined(__i386__) || defined(__x86_64__)\n" |
21737 | "#include <x86intrin.h>\n" |
21738 | "#endif\n" |
21739 | "\n" |
21740 | "#if defined(__arm__)\n" |
21741 | "#include <armintr.h>\n" |
21742 | "#endif\n" |
21743 | "\n" |
21744 | "#if defined(__aarch64__)\n" |
21745 | "#include <arm64intr.h>\n" |
21746 | "#endif\n" |
21747 | "\n" |
21748 | "/* For the definition of jmp_buf. */\n" |
21749 | "#if __STDC_HOSTED__\n" |
21750 | "#include <setjmp.h>\n" |
21751 | "#endif\n" |
21752 | "\n" |
21753 | "/* Define the default attributes for the functions in this file. */\n" |
21754 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n" |
21755 | "\n" |
21756 | "#ifdef __cplusplus\n" |
21757 | "extern \"C\" {\n" |
21758 | "#endif\n" |
21759 | "\n" |
21760 | "#if defined(__MMX__)\n" |
21761 | "/* And the random ones that aren't in those files. */\n" |
21762 | "__m64 _m_from_float(float);\n" |
21763 | "float _m_to_float(__m64);\n" |
21764 | "#endif\n" |
21765 | "\n" |
21766 | "/* Other assorted instruction intrinsics. */\n" |
21767 | "void __addfsbyte(unsigned long, unsigned char);\n" |
21768 | "void __addfsdword(unsigned long, unsigned long);\n" |
21769 | "void __addfsword(unsigned long, unsigned short);\n" |
21770 | "void __code_seg(const char *);\n" |
21771 | "static __inline__\n" |
21772 | "void __cpuid(int[4], int);\n" |
21773 | "static __inline__\n" |
21774 | "void __cpuidex(int[4], int, int);\n" |
21775 | "static __inline__\n" |
21776 | "__int64 __emul(int, int);\n" |
21777 | "static __inline__\n" |
21778 | "unsigned __int64 __emulu(unsigned int, unsigned int);\n" |
21779 | "unsigned int __getcallerseflags(void);\n" |
21780 | "static __inline__\n" |
21781 | "void __halt(void);\n" |
21782 | "unsigned char __inbyte(unsigned short);\n" |
21783 | "void __inbytestring(unsigned short, unsigned char *, unsigned long);\n" |
21784 | "void __incfsbyte(unsigned long);\n" |
21785 | "void __incfsdword(unsigned long);\n" |
21786 | "void __incfsword(unsigned long);\n" |
21787 | "unsigned long __indword(unsigned short);\n" |
21788 | "void __indwordstring(unsigned short, unsigned long *, unsigned long);\n" |
21789 | "void __int2c(void);\n" |
21790 | "void __invlpg(void *);\n" |
21791 | "unsigned short __inword(unsigned short);\n" |
21792 | "void __inwordstring(unsigned short, unsigned short *, unsigned long);\n" |
21793 | "void __lidt(void *);\n" |
21794 | "unsigned __int64 __ll_lshift(unsigned __int64, int);\n" |
21795 | "__int64 __ll_rshift(__int64, int);\n" |
21796 | "static __inline__\n" |
21797 | "void __movsb(unsigned char *, unsigned char const *, size_t);\n" |
21798 | "static __inline__\n" |
21799 | "void __movsd(unsigned long *, unsigned long const *, size_t);\n" |
21800 | "static __inline__\n" |
21801 | "void __movsw(unsigned short *, unsigned short const *, size_t);\n" |
21802 | "static __inline__\n" |
21803 | "void __nop(void);\n" |
21804 | "void __nvreg_restore_fence(void);\n" |
21805 | "void __nvreg_save_fence(void);\n" |
21806 | "void __outbyte(unsigned short, unsigned char);\n" |
21807 | "void __outbytestring(unsigned short, unsigned char *, unsigned long);\n" |
21808 | "void __outdword(unsigned short, unsigned long);\n" |
21809 | "void __outdwordstring(unsigned short, unsigned long *, unsigned long);\n" |
21810 | "void __outword(unsigned short, unsigned short);\n" |
21811 | "void __outwordstring(unsigned short, unsigned short *, unsigned long);\n" |
21812 | "unsigned long __readcr0(void);\n" |
21813 | "unsigned long __readcr2(void);\n" |
21814 | "static __inline__\n" |
21815 | "unsigned long __readcr3(void);\n" |
21816 | "unsigned long __readcr4(void);\n" |
21817 | "unsigned long __readcr8(void);\n" |
21818 | "unsigned int __readdr(unsigned int);\n" |
21819 | "#ifdef __i386__\n" |
21820 | "static __inline__\n" |
21821 | "unsigned char __readfsbyte(unsigned long);\n" |
21822 | "static __inline__\n" |
21823 | "unsigned __int64 __readfsqword(unsigned long);\n" |
21824 | "static __inline__\n" |
21825 | "unsigned short __readfsword(unsigned long);\n" |
21826 | "#endif\n" |
21827 | "static __inline__\n" |
21828 | "unsigned __int64 __readmsr(unsigned long);\n" |
21829 | "unsigned __int64 __readpmc(unsigned long);\n" |
21830 | "unsigned long __segmentlimit(unsigned long);\n" |
21831 | "void __sidt(void *);\n" |
21832 | "static __inline__\n" |
21833 | "void __stosb(unsigned char *, unsigned char, size_t);\n" |
21834 | "static __inline__\n" |
21835 | "void __stosd(unsigned long *, unsigned long, size_t);\n" |
21836 | "static __inline__\n" |
21837 | "void __stosw(unsigned short *, unsigned short, size_t);\n" |
21838 | "void __svm_clgi(void);\n" |
21839 | "void __svm_invlpga(void *, int);\n" |
21840 | "void __svm_skinit(int);\n" |
21841 | "void __svm_stgi(void);\n" |
21842 | "void __svm_vmload(size_t);\n" |
21843 | "void __svm_vmrun(size_t);\n" |
21844 | "void __svm_vmsave(size_t);\n" |
21845 | "void __ud2(void);\n" |
21846 | "unsigned __int64 __ull_rshift(unsigned __int64, int);\n" |
21847 | "void __vmx_off(void);\n" |
21848 | "void __vmx_vmptrst(unsigned __int64 *);\n" |
21849 | "void __wbinvd(void);\n" |
21850 | "void __writecr0(unsigned int);\n" |
21851 | "static __inline__\n" |
21852 | "void __writecr3(unsigned int);\n" |
21853 | "void __writecr4(unsigned int);\n" |
21854 | "void __writecr8(unsigned int);\n" |
21855 | "void __writedr(unsigned int, unsigned int);\n" |
21856 | "void __writefsbyte(unsigned long, unsigned char);\n" |
21857 | "void __writefsdword(unsigned long, unsigned long);\n" |
21858 | "void __writefsqword(unsigned long, unsigned __int64);\n" |
21859 | "void __writefsword(unsigned long, unsigned short);\n" |
21860 | "void __writemsr(unsigned long, unsigned __int64);\n" |
21861 | "static __inline__\n" |
21862 | "void *_AddressOfReturnAddress(void);\n" |
21863 | "static __inline__\n" |
21864 | "unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);\n" |
21865 | "static __inline__\n" |
21866 | "unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);\n" |
21867 | "unsigned char _bittest(long const *, long);\n" |
21868 | "unsigned char _bittestandcomplement(long *, long);\n" |
21869 | "unsigned char _bittestandreset(long *, long);\n" |
21870 | "unsigned char _bittestandset(long *, long);\n" |
21871 | "void __cdecl _disable(void);\n" |
21872 | "void __cdecl _enable(void);\n" |
21873 | "long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);\n" |
21874 | "unsigned char _interlockedbittestandreset(long volatile *, long);\n" |
21875 | "unsigned char _interlockedbittestandset(long volatile *, long);\n" |
21876 | "void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,\n" |
21877 | " void *);\n" |
21878 | "void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,\n" |
21879 | " void *);\n" |
21880 | "long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);\n" |
21881 | "long _InterlockedExchangeAdd_HLERelease(long volatile *, long);\n" |
21882 | "__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);\n" |
21883 | "__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);\n" |
21884 | "void __cdecl _invpcid(unsigned int, void *);\n" |
21885 | "static __inline__ void\n" |
21886 | "__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n" |
21887 | "_ReadBarrier(void);\n" |
21888 | "static __inline__ void\n" |
21889 | "__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n" |
21890 | "_ReadWriteBarrier(void);\n" |
21891 | "unsigned int _rorx_u32(unsigned int, const unsigned int);\n" |
21892 | "int _sarx_i32(int, unsigned int);\n" |
21893 | "#if __STDC_HOSTED__\n" |
21894 | "int __cdecl _setjmp(jmp_buf);\n" |
21895 | "#endif\n" |
21896 | "unsigned int _shlx_u32(unsigned int, unsigned int);\n" |
21897 | "unsigned int _shrx_u32(unsigned int, unsigned int);\n" |
21898 | "void _Store_HLERelease(long volatile *, long);\n" |
21899 | "void _Store64_HLERelease(__int64 volatile *, __int64);\n" |
21900 | "void _StorePointer_HLERelease(void *volatile *, void *);\n" |
21901 | "static __inline__ void\n" |
21902 | "__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n" |
21903 | "_WriteBarrier(void);\n" |
21904 | "unsigned __int32 xbegin(void);\n" |
21905 | "void _xend(void);\n" |
21906 | "static __inline__\n" |
21907 | "#define _XCR_XFEATURE_ENABLED_MASK 0\n" |
21908 | "unsigned __int64 __cdecl _xgetbv(unsigned int);\n" |
21909 | "void __cdecl _xsetbv(unsigned int, unsigned __int64);\n" |
21910 | "\n" |
21911 | "/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */\n" |
21912 | "#ifdef __x86_64__\n" |
21913 | "void __addgsbyte(unsigned long, unsigned char);\n" |
21914 | "void __addgsdword(unsigned long, unsigned long);\n" |
21915 | "void __addgsqword(unsigned long, unsigned __int64);\n" |
21916 | "void __addgsword(unsigned long, unsigned short);\n" |
21917 | "static __inline__\n" |
21918 | "void __faststorefence(void);\n" |
21919 | "void __incgsbyte(unsigned long);\n" |
21920 | "void __incgsdword(unsigned long);\n" |
21921 | "void __incgsqword(unsigned long);\n" |
21922 | "void __incgsword(unsigned long);\n" |
21923 | "static __inline__\n" |
21924 | "void __movsq(unsigned long long *, unsigned long long const *, size_t);\n" |
21925 | "static __inline__\n" |
21926 | "unsigned char __readgsbyte(unsigned long);\n" |
21927 | "static __inline__\n" |
21928 | "unsigned long __readgsdword(unsigned long);\n" |
21929 | "static __inline__\n" |
21930 | "unsigned __int64 __readgsqword(unsigned long);\n" |
21931 | "unsigned short __readgsword(unsigned long);\n" |
21932 | "unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,\n" |
21933 | " unsigned __int64 _HighPart,\n" |
21934 | " unsigned char _Shift);\n" |
21935 | "unsigned __int64 __shiftright128(unsigned __int64 _LowPart,\n" |
21936 | " unsigned __int64 _HighPart,\n" |
21937 | " unsigned char _Shift);\n" |
21938 | "static __inline__\n" |
21939 | "void __stosq(unsigned __int64 *, unsigned __int64, size_t);\n" |
21940 | "unsigned char __vmx_on(unsigned __int64 *);\n" |
21941 | "unsigned char __vmx_vmclear(unsigned __int64 *);\n" |
21942 | "unsigned char __vmx_vmlaunch(void);\n" |
21943 | "unsigned char __vmx_vmptrld(unsigned __int64 *);\n" |
21944 | "unsigned char __vmx_vmread(size_t, size_t *);\n" |
21945 | "unsigned char __vmx_vmresume(void);\n" |
21946 | "unsigned char __vmx_vmwrite(size_t, size_t);\n" |
21947 | "void __writegsbyte(unsigned long, unsigned char);\n" |
21948 | "void __writegsdword(unsigned long, unsigned long);\n" |
21949 | "void __writegsqword(unsigned long, unsigned __int64);\n" |
21950 | "void __writegsword(unsigned long, unsigned short);\n" |
21951 | "unsigned char _bittest64(__int64 const *, __int64);\n" |
21952 | "unsigned char _bittestandcomplement64(__int64 *, __int64);\n" |
21953 | "unsigned char _bittestandreset64(__int64 *, __int64);\n" |
21954 | "unsigned char _bittestandset64(__int64 *, __int64);\n" |
21955 | "long _InterlockedAnd_np(long volatile *_Value, long _Mask);\n" |
21956 | "short _InterlockedAnd16_np(short volatile *_Value, short _Mask);\n" |
21957 | "__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);\n" |
21958 | "char _InterlockedAnd8_np(char volatile *_Value, char _Mask);\n" |
21959 | "unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);\n" |
21960 | "unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);\n" |
21961 | "long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,\n" |
21962 | " long _Comparand);\n" |
21963 | "unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,\n" |
21964 | " __int64 _ExchangeHigh,\n" |
21965 | " __int64 _ExchangeLow,\n" |
21966 | " __int64 *_CompareandResult);\n" |
21967 | "unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,\n" |
21968 | " __int64 _ExchangeHigh,\n" |
21969 | " __int64 _ExchangeLow,\n" |
21970 | " __int64 *_ComparandResult);\n" |
21971 | "short _InterlockedCompareExchange16_np(short volatile *_Destination,\n" |
21972 | " short _Exchange, short _Comparand);\n" |
21973 | "__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,\n" |
21974 | " __int64 _Exchange, __int64 _Comparand);\n" |
21975 | "void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,\n" |
21976 | " void *_Exchange, void *_Comparand);\n" |
21977 | "long _InterlockedOr_np(long volatile *_Value, long _Mask);\n" |
21978 | "short _InterlockedOr16_np(short volatile *_Value, short _Mask);\n" |
21979 | "__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask);\n" |
21980 | "char _InterlockedOr8_np(char volatile *_Value, char _Mask);\n" |
21981 | "long _InterlockedXor_np(long volatile *_Value, long _Mask);\n" |
21982 | "short _InterlockedXor16_np(short volatile *_Value, short _Mask);\n" |
21983 | "__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);\n" |
21984 | "char _InterlockedXor8_np(char volatile *_Value, char _Mask);\n" |
21985 | "unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);\n" |
21986 | "__int64 _sarx_i64(__int64, unsigned int);\n" |
21987 | "unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);\n" |
21988 | "unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);\n" |
21989 | "static __inline__\n" |
21990 | "__int64 __mulh(__int64, __int64);\n" |
21991 | "static __inline__\n" |
21992 | "unsigned __int64 __umulh(unsigned __int64, unsigned __int64);\n" |
21993 | "static __inline__\n" |
21994 | "__int64 _mul128(__int64, __int64, __int64*);\n" |
21995 | "static __inline__\n" |
21996 | "unsigned __int64 _umul128(unsigned __int64,\n" |
21997 | " unsigned __int64,\n" |
21998 | " unsigned __int64*);\n" |
21999 | "\n" |
22000 | "#endif /* __x86_64__ */\n" |
22001 | "\n" |
22002 | "#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)\n" |
22003 | "\n" |
22004 | "static __inline__\n" |
22005 | "unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);\n" |
22006 | "static __inline__\n" |
22007 | "unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);\n" |
22008 | "\n" |
22009 | "static __inline__\n" |
22010 | "__int64 _InterlockedDecrement64(__int64 volatile *_Addend);\n" |
22011 | "static __inline__\n" |
22012 | "__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value);\n" |
22013 | "static __inline__\n" |
22014 | "__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value);\n" |
22015 | "static __inline__\n" |
22016 | "__int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value);\n" |
22017 | "static __inline__\n" |
22018 | "__int64 _InterlockedIncrement64(__int64 volatile *_Addend);\n" |
22019 | "static __inline__\n" |
22020 | "__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask);\n" |
22021 | "static __inline__\n" |
22022 | "__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask);\n" |
22023 | "static __inline__\n" |
22024 | "__int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);\n" |
22025 | "\n" |
22026 | "#endif\n" |
22027 | "\n" |
22028 | "/*----------------------------------------------------------------------------*\\\n" |
22029 | "|* Interlocked Exchange Add\n" |
22030 | "\\*----------------------------------------------------------------------------*/\n" |
22031 | "#if defined(__arm__) || defined(__aarch64__)\n" |
22032 | "char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);\n" |
22033 | "char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);\n" |
22034 | "char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);\n" |
22035 | "short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);\n" |
22036 | "short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);\n" |
22037 | "short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);\n" |
22038 | "long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);\n" |
22039 | "long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);\n" |
22040 | "long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);\n" |
22041 | "__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value);\n" |
22042 | "__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);\n" |
22043 | "__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value);\n" |
22044 | "#endif\n" |
22045 | "/*----------------------------------------------------------------------------*\\\n" |
22046 | "|* Interlocked Increment\n" |
22047 | "\\*----------------------------------------------------------------------------*/\n" |
22048 | "#if defined(__arm__) || defined(__aarch64__)\n" |
22049 | "short _InterlockedIncrement16_acq(short volatile *_Value);\n" |
22050 | "short _InterlockedIncrement16_nf(short volatile *_Value);\n" |
22051 | "short _InterlockedIncrement16_rel(short volatile *_Value);\n" |
22052 | "long _InterlockedIncrement_acq(long volatile *_Value);\n" |
22053 | "long _InterlockedIncrement_nf(long volatile *_Value);\n" |
22054 | "long _InterlockedIncrement_rel(long volatile *_Value);\n" |
22055 | "__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);\n" |
22056 | "__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);\n" |
22057 | "__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);\n" |
22058 | "#endif\n" |
22059 | "/*----------------------------------------------------------------------------*\\\n" |
22060 | "|* Interlocked Decrement\n" |
22061 | "\\*----------------------------------------------------------------------------*/\n" |
22062 | "#if defined(__arm__) || defined(__aarch64__)\n" |
22063 | "short _InterlockedDecrement16_acq(short volatile *_Value);\n" |
22064 | "short _InterlockedDecrement16_nf(short volatile *_Value);\n" |
22065 | "short _InterlockedDecrement16_rel(short volatile *_Value);\n" |
22066 | "long _InterlockedDecrement_acq(long volatile *_Value);\n" |
22067 | "long _InterlockedDecrement_nf(long volatile *_Value);\n" |
22068 | "long _InterlockedDecrement_rel(long volatile *_Value);\n" |
22069 | "__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);\n" |
22070 | "__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);\n" |
22071 | "__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);\n" |
22072 | "#endif\n" |
22073 | "/*----------------------------------------------------------------------------*\\\n" |
22074 | "|* Interlocked And\n" |
22075 | "\\*----------------------------------------------------------------------------*/\n" |
22076 | "#if defined(__arm__) || defined(__aarch64__)\n" |
22077 | "char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);\n" |
22078 | "char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);\n" |
22079 | "char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);\n" |
22080 | "short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);\n" |
22081 | "short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);\n" |
22082 | "short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);\n" |
22083 | "long _InterlockedAnd_acq(long volatile *_Value, long _Mask);\n" |
22084 | "long _InterlockedAnd_nf(long volatile *_Value, long _Mask);\n" |
22085 | "long _InterlockedAnd_rel(long volatile *_Value, long _Mask);\n" |
22086 | "__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);\n" |
22087 | "__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);\n" |
22088 | "__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);\n" |
22089 | "#endif\n" |
22090 | "/*----------------------------------------------------------------------------*\\\n" |
22091 | "|* Bit Counting and Testing\n" |
22092 | "\\*----------------------------------------------------------------------------*/\n" |
22093 | "#if defined(__arm__) || defined(__aarch64__)\n" |
22094 | "unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,\n" |
22095 | " long _BitPos);\n" |
22096 | "unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,\n" |
22097 | " long _BitPos);\n" |
22098 | "unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,\n" |
22099 | " long _BitPos);\n" |
22100 | "unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,\n" |
22101 | " long _BitPos);\n" |
22102 | "unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,\n" |
22103 | " long _BitPos);\n" |
22104 | "unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,\n" |
22105 | " long _BitPos);\n" |
22106 | "#endif\n" |
22107 | "/*----------------------------------------------------------------------------*\\\n" |
22108 | "|* Interlocked Or\n" |
22109 | "\\*----------------------------------------------------------------------------*/\n" |
22110 | "#if defined(__arm__) || defined(__aarch64__)\n" |
22111 | "char _InterlockedOr8_acq(char volatile *_Value, char _Mask);\n" |
22112 | "char _InterlockedOr8_nf(char volatile *_Value, char _Mask);\n" |
22113 | "char _InterlockedOr8_rel(char volatile *_Value, char _Mask);\n" |
22114 | "short _InterlockedOr16_acq(short volatile *_Value, short _Mask);\n" |
22115 | "short _InterlockedOr16_nf(short volatile *_Value, short _Mask);\n" |
22116 | "short _InterlockedOr16_rel(short volatile *_Value, short _Mask);\n" |
22117 | "long _InterlockedOr_acq(long volatile *_Value, long _Mask);\n" |
22118 | "long _InterlockedOr_nf(long volatile *_Value, long _Mask);\n" |
22119 | "long _InterlockedOr_rel(long volatile *_Value, long _Mask);\n" |
22120 | "__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);\n" |
22121 | "__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);\n" |
22122 | "__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);\n" |
22123 | "#endif\n" |
22124 | "/*----------------------------------------------------------------------------*\\\n" |
22125 | "|* Interlocked Xor\n" |
22126 | "\\*----------------------------------------------------------------------------*/\n" |
22127 | "#if defined(__arm__) || defined(__aarch64__)\n" |
22128 | "char _InterlockedXor8_acq(char volatile *_Value, char _Mask);\n" |
22129 | "char _InterlockedXor8_nf(char volatile *_Value, char _Mask);\n" |
22130 | "char _InterlockedXor8_rel(char volatile *_Value, char _Mask);\n" |
22131 | "short _InterlockedXor16_acq(short volatile *_Value, short _Mask);\n" |
22132 | "short _InterlockedXor16_nf(short volatile *_Value, short _Mask);\n" |
22133 | "short _InterlockedXor16_rel(short volatile *_Value, short _Mask);\n" |
22134 | "long _InterlockedXor_acq(long volatile *_Value, long _Mask);\n" |
22135 | "long _InterlockedXor_nf(long volatile *_Value, long _Mask);\n" |
22136 | "long _InterlockedXor_rel(long volatile *_Value, long _Mask);\n" |
22137 | "__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);\n" |
22138 | "__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);\n" |
22139 | "__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);\n" |
22140 | "#endif\n" |
22141 | "/*----------------------------------------------------------------------------*\\\n" |
22142 | "|* Interlocked Exchange\n" |
22143 | "\\*----------------------------------------------------------------------------*/\n" |
22144 | "#if defined(__arm__) || defined(__aarch64__)\n" |
22145 | "char _InterlockedExchange8_acq(char volatile *_Target, char _Value);\n" |
22146 | "char _InterlockedExchange8_nf(char volatile *_Target, char _Value);\n" |
22147 | "char _InterlockedExchange8_rel(char volatile *_Target, char _Value);\n" |
22148 | "short _InterlockedExchange16_acq(short volatile *_Target, short _Value);\n" |
22149 | "short _InterlockedExchange16_nf(short volatile *_Target, short _Value);\n" |
22150 | "short _InterlockedExchange16_rel(short volatile *_Target, short _Value);\n" |
22151 | "long _InterlockedExchange_acq(long volatile *_Target, long _Value);\n" |
22152 | "long _InterlockedExchange_nf(long volatile *_Target, long _Value);\n" |
22153 | "long _InterlockedExchange_rel(long volatile *_Target, long _Value);\n" |
22154 | "__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);\n" |
22155 | "__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);\n" |
22156 | "__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);\n" |
22157 | "#endif\n" |
22158 | "/*----------------------------------------------------------------------------*\\\n" |
22159 | "|* Interlocked Compare Exchange\n" |
22160 | "\\*----------------------------------------------------------------------------*/\n" |
22161 | "#if defined(__arm__) || defined(__aarch64__)\n" |
22162 | "char _InterlockedCompareExchange8_acq(char volatile *_Destination,\n" |
22163 | " char _Exchange, char _Comparand);\n" |
22164 | "char _InterlockedCompareExchange8_nf(char volatile *_Destination,\n" |
22165 | " char _Exchange, char _Comparand);\n" |
22166 | "char _InterlockedCompareExchange8_rel(char volatile *_Destination,\n" |
22167 | " char _Exchange, char _Comparand);\n" |
22168 | "short _InterlockedCompareExchange16_acq(short volatile *_Destination,\n" |
22169 | " short _Exchange, short _Comparand);\n" |
22170 | "short _InterlockedCompareExchange16_nf(short volatile *_Destination,\n" |
22171 | " short _Exchange, short _Comparand);\n" |
22172 | "short _InterlockedCompareExchange16_rel(short volatile *_Destination,\n" |
22173 | " short _Exchange, short _Comparand);\n" |
22174 | "long _InterlockedCompareExchange_acq(long volatile *_Destination,\n" |
22175 | " long _Exchange, long _Comparand);\n" |
22176 | "long _InterlockedCompareExchange_nf(long volatile *_Destination,\n" |
22177 | " long _Exchange, long _Comparand);\n" |
22178 | "long _InterlockedCompareExchange_rel(long volatile *_Destination,\n" |
22179 | " long _Exchange, long _Comparand);\n" |
22180 | "__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,\n" |
22181 | " __int64 _Exchange, __int64 _Comparand);\n" |
22182 | "__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,\n" |
22183 | " __int64 _Exchange, __int64 _Comparand);\n" |
22184 | "__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,\n" |
22185 | " __int64 _Exchange, __int64 _Comparand);\n" |
22186 | "#endif\n" |
22187 | "\n" |
22188 | "/*----------------------------------------------------------------------------*\\\n" |
22189 | "|* movs, stos\n" |
22190 | "\\*----------------------------------------------------------------------------*/\n" |
22191 | "#if defined(__i386__) || defined(__x86_64__)\n" |
22192 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22193 | "__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) {\n" |
22194 | " __asm__ __volatile__(\"rep movsb\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n" |
22195 | " : : \"memory\");\n" |
22196 | "}\n" |
22197 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22198 | "__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {\n" |
22199 | " __asm__ __volatile__(\"rep movsl\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n" |
22200 | " : : \"memory\");\n" |
22201 | "}\n" |
22202 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22203 | "__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {\n" |
22204 | " __asm__ __volatile__(\"rep movsw\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n" |
22205 | " : : \"memory\");\n" |
22206 | "}\n" |
22207 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22208 | "__stosd(unsigned long *__dst, unsigned long __x, size_t __n) {\n" |
22209 | " __asm__ __volatile__(\"rep stosl\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n" |
22210 | " : \"memory\");\n" |
22211 | "}\n" |
22212 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22213 | "__stosw(unsigned short *__dst, unsigned short __x, size_t __n) {\n" |
22214 | " __asm__ __volatile__(\"rep stosw\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n" |
22215 | " : \"memory\");\n" |
22216 | "}\n" |
22217 | "#endif\n" |
22218 | "#ifdef __x86_64__\n" |
22219 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22220 | "__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) {\n" |
22221 | " __asm__ __volatile__(\"rep movsq\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n" |
22222 | " : : \"memory\");\n" |
22223 | "}\n" |
22224 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22225 | "__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {\n" |
22226 | " __asm__ __volatile__(\"rep stosq\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n" |
22227 | " : \"memory\");\n" |
22228 | "}\n" |
22229 | "#endif\n" |
22230 | "\n" |
22231 | "/*----------------------------------------------------------------------------*\\\n" |
22232 | "|* Misc\n" |
22233 | "\\*----------------------------------------------------------------------------*/\n" |
22234 | "#if defined(__i386__) || defined(__x86_64__)\n" |
22235 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22236 | "__cpuid(int __info[4], int __level) {\n" |
22237 | " __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n" |
22238 | " : \"a\"(__level), \"c\"(0));\n" |
22239 | "}\n" |
22240 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22241 | "__cpuidex(int __info[4], int __level, int __ecx) {\n" |
22242 | " __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n" |
22243 | " : \"a\"(__level), \"c\"(__ecx));\n" |
22244 | "}\n" |
22245 | "static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS\n" |
22246 | "_xgetbv(unsigned int __xcr_no) {\n" |
22247 | " unsigned int __eax, __edx;\n" |
22248 | " __asm__ (\"xgetbv\" : \"=a\" (__eax), \"=d\" (__edx) : \"c\" (__xcr_no));\n" |
22249 | " return ((unsigned __int64)__edx << 32) | __eax;\n" |
22250 | "}\n" |
22251 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22252 | "__halt(void) {\n" |
22253 | " __asm__ volatile (\"hlt\");\n" |
22254 | "}\n" |
22255 | "#endif\n" |
22256 | "\n" |
22257 | "#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)\n" |
22258 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22259 | "__nop(void) {\n" |
22260 | " __asm__ volatile (\"nop\");\n" |
22261 | "}\n" |
22262 | "#endif\n" |
22263 | "\n" |
22264 | "/*----------------------------------------------------------------------------*\\\n" |
22265 | "|* MS AArch64 specific\n" |
22266 | "\\*----------------------------------------------------------------------------*/\n" |
22267 | "#if defined(__aarch64__)\n" |
22268 | "unsigned __int64 __getReg(int);\n" |
22269 | "long _InterlockedAdd(long volatile *Addend, long Value);\n" |
22270 | "__int64 _ReadStatusReg(int);\n" |
22271 | "void _WriteStatusReg(int, __int64);\n" |
22272 | "\n" |
22273 | "static inline unsigned short _byteswap_ushort (unsigned short val) {\n" |
22274 | " return __builtin_bswap16(val);\n" |
22275 | "}\n" |
22276 | "static inline unsigned long _byteswap_ulong (unsigned long val) {\n" |
22277 | " return __builtin_bswap32(val);\n" |
22278 | "}\n" |
22279 | "static inline unsigned __int64 _byteswap_uint64 (unsigned __int64 val) {\n" |
22280 | " return __builtin_bswap64(val);\n" |
22281 | "}\n" |
22282 | "#endif\n" |
22283 | "\n" |
22284 | "/*----------------------------------------------------------------------------*\\\n" |
22285 | "|* Privileged intrinsics\n" |
22286 | "\\*----------------------------------------------------------------------------*/\n" |
22287 | "#if defined(__i386__) || defined(__x86_64__)\n" |
22288 | "static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS\n" |
22289 | "__readmsr(unsigned long __register) {\n" |
22290 | " // Loads the contents of a 64-bit model specific register (MSR) specified in\n" |
22291 | " // the ECX register into registers EDX:EAX. The EDX register is loaded with\n" |
22292 | " // the high-order 32 bits of the MSR and the EAX register is loaded with the\n" |
22293 | " // low-order 32 bits. If less than 64 bits are implemented in the MSR being\n" |
22294 | " // read, the values returned to EDX:EAX in unimplemented bit locations are\n" |
22295 | " // undefined.\n" |
22296 | " unsigned long __edx;\n" |
22297 | " unsigned long __eax;\n" |
22298 | " __asm__ (\"rdmsr\" : \"=d\"(__edx), \"=a\"(__eax) : \"c\"(__register));\n" |
22299 | " return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;\n" |
22300 | "}\n" |
22301 | "\n" |
22302 | "static __inline__ unsigned long __DEFAULT_FN_ATTRS\n" |
22303 | "__readcr3(void) {\n" |
22304 | " unsigned long __cr3_val;\n" |
22305 | " __asm__ __volatile__ (\"mov %%cr3, %0\" : \"=q\"(__cr3_val) : : \"memory\");\n" |
22306 | " return __cr3_val;\n" |
22307 | "}\n" |
22308 | "\n" |
22309 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22310 | "__writecr3(unsigned int __cr3_val) {\n" |
22311 | " __asm__ (\"mov %0, %%cr3\" : : \"q\"(__cr3_val) : \"memory\");\n" |
22312 | "}\n" |
22313 | "#endif\n" |
22314 | "\n" |
22315 | "#ifdef __cplusplus\n" |
22316 | "}\n" |
22317 | "#endif\n" |
22318 | "\n" |
22319 | "#undef __DEFAULT_FN_ATTRS\n" |
22320 | "\n" |
22321 | "#endif /* __INTRIN_H */\n" |
22322 | "#endif /* _MSC_VER */\n" |
22323 | "" } , |
22324 | { "/builtins/inttypes.h" , "/*===---- inttypes.h - Standard header for integer printf macros ----------===*\\\n" |
22325 | " *\n" |
22326 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
22327 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
22328 | " * in the Software without restriction, including without limitation the rights\n" |
22329 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
22330 | " * copies of the Software, and to permit persons to whom the Software is\n" |
22331 | " * furnished to do so, subject to the following conditions:\n" |
22332 | " *\n" |
22333 | " * The above copyright notice and this permission notice shall be included in\n" |
22334 | " * all copies or substantial portions of the Software.\n" |
22335 | " *\n" |
22336 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
22337 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
22338 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
22339 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
22340 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
22341 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
22342 | " * THE SOFTWARE.\n" |
22343 | " *\n" |
22344 | "\\*===----------------------------------------------------------------------===*/\n" |
22345 | "\n" |
22346 | "#ifndef __CLANG_INTTYPES_H\n" |
22347 | "#define __CLANG_INTTYPES_H\n" |
22348 | "\n" |
22349 | "#if defined(_MSC_VER) && _MSC_VER < 1800\n" |
22350 | "#error MSVC does not have inttypes.h prior to Visual Studio 2013\n" |
22351 | "#endif\n" |
22352 | "\n" |
22353 | "#include_next <inttypes.h>\n" |
22354 | "\n" |
22355 | "#if defined(_MSC_VER) && _MSC_VER < 1900\n" |
22356 | "/* MSVC headers define int32_t as int, but PRIx32 as \"lx\" instead of \"x\".\n" |
22357 | " * This triggers format warnings, so fix it up here. */\n" |
22358 | "#undef PRId32\n" |
22359 | "#undef PRIdLEAST32\n" |
22360 | "#undef PRIdFAST32\n" |
22361 | "#undef PRIi32\n" |
22362 | "#undef PRIiLEAST32\n" |
22363 | "#undef PRIiFAST32\n" |
22364 | "#undef PRIo32\n" |
22365 | "#undef PRIoLEAST32\n" |
22366 | "#undef PRIoFAST32\n" |
22367 | "#undef PRIu32\n" |
22368 | "#undef PRIuLEAST32\n" |
22369 | "#undef PRIuFAST32\n" |
22370 | "#undef PRIx32\n" |
22371 | "#undef PRIxLEAST32\n" |
22372 | "#undef PRIxFAST32\n" |
22373 | "#undef PRIX32\n" |
22374 | "#undef PRIXLEAST32\n" |
22375 | "#undef PRIXFAST32\n" |
22376 | "\n" |
22377 | "#undef SCNd32\n" |
22378 | "#undef SCNdLEAST32\n" |
22379 | "#undef SCNdFAST32\n" |
22380 | "#undef SCNi32\n" |
22381 | "#undef SCNiLEAST32\n" |
22382 | "#undef SCNiFAST32\n" |
22383 | "#undef SCNo32\n" |
22384 | "#undef SCNoLEAST32\n" |
22385 | "#undef SCNoFAST32\n" |
22386 | "#undef SCNu32\n" |
22387 | "#undef SCNuLEAST32\n" |
22388 | "#undef SCNuFAST32\n" |
22389 | "#undef SCNx32\n" |
22390 | "#undef SCNxLEAST32\n" |
22391 | "#undef SCNxFAST32\n" |
22392 | "\n" |
22393 | "#define PRId32 \"d\"\n" |
22394 | "#define PRIdLEAST32 \"d\"\n" |
22395 | "#define PRIdFAST32 \"d\"\n" |
22396 | "#define PRIi32 \"i\"\n" |
22397 | "#define PRIiLEAST32 \"i\"\n" |
22398 | "#define PRIiFAST32 \"i\"\n" |
22399 | "#define PRIo32 \"o\"\n" |
22400 | "#define PRIoLEAST32 \"o\"\n" |
22401 | "#define PRIoFAST32 \"o\"\n" |
22402 | "#define PRIu32 \"u\"\n" |
22403 | "#define PRIuLEAST32 \"u\"\n" |
22404 | "#define PRIuFAST32 \"u\"\n" |
22405 | "#define PRIx32 \"x\"\n" |
22406 | "#define PRIxLEAST32 \"x\"\n" |
22407 | "#define PRIxFAST32 \"x\"\n" |
22408 | "#define PRIX32 \"X\"\n" |
22409 | "#define PRIXLEAST32 \"X\"\n" |
22410 | "#define PRIXFAST32 \"X\"\n" |
22411 | "\n" |
22412 | "#define SCNd32 \"d\"\n" |
22413 | "#define SCNdLEAST32 \"d\"\n" |
22414 | "#define SCNdFAST32 \"d\"\n" |
22415 | "#define SCNi32 \"i\"\n" |
22416 | "#define SCNiLEAST32 \"i\"\n" |
22417 | "#define SCNiFAST32 \"i\"\n" |
22418 | "#define SCNo32 \"o\"\n" |
22419 | "#define SCNoLEAST32 \"o\"\n" |
22420 | "#define SCNoFAST32 \"o\"\n" |
22421 | "#define SCNu32 \"u\"\n" |
22422 | "#define SCNuLEAST32 \"u\"\n" |
22423 | "#define SCNuFAST32 \"u\"\n" |
22424 | "#define SCNx32 \"x\"\n" |
22425 | "#define SCNxLEAST32 \"x\"\n" |
22426 | "#define SCNxFAST32 \"x\"\n" |
22427 | "#endif\n" |
22428 | "\n" |
22429 | "#endif /* __CLANG_INTTYPES_H */\n" |
22430 | "" } , |
22431 | { "/builtins/invpcidintrin.h" , "/*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------===\n" |
22432 | " *\n" |
22433 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
22434 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
22435 | " * in the Software without restriction, including without limitation the rights\n" |
22436 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
22437 | " * copies of the Software, and to permit persons to whom the Software is\n" |
22438 | " * furnished to do so, subject to the following conditions:\n" |
22439 | " *\n" |
22440 | " * The above copyright notice and this permission notice shall be included in\n" |
22441 | " * all copies or substantial portions of the Software.\n" |
22442 | " *\n" |
22443 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
22444 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
22445 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
22446 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
22447 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
22448 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
22449 | " * THE SOFTWARE.\n" |
22450 | " *\n" |
22451 | " *===-----------------------------------------------------------------------===\n" |
22452 | " */\n" |
22453 | "\n" |
22454 | "#ifndef __IMMINTRIN_H\n" |
22455 | "#error \"Never use <invpcidintrin.h> directly; include <immintrin.h> instead.\"\n" |
22456 | "#endif\n" |
22457 | "\n" |
22458 | "#ifndef __INVPCIDINTRIN_H\n" |
22459 | "#define __INVPCIDINTRIN_H\n" |
22460 | "\n" |
22461 | "static __inline__ void\n" |
22462 | " __attribute__((__always_inline__, __nodebug__, __target__(\"invpcid\")))\n" |
22463 | "_invpcid(unsigned int __type, void *__descriptor) {\n" |
22464 | " __builtin_ia32_invpcid(__type, __descriptor);\n" |
22465 | "}\n" |
22466 | "\n" |
22467 | "#endif /* __INVPCIDINTRIN_H */\n" |
22468 | "" } , |
22469 | { "/builtins/iso646.h" , "/*===---- iso646.h - Standard header for alternate spellings of operators---===\n" |
22470 | " *\n" |
22471 | " * Copyright (c) 2008 Eli Friedman\n" |
22472 | " *\n" |
22473 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
22474 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
22475 | " * in the Software without restriction, including without limitation the rights\n" |
22476 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
22477 | " * copies of the Software, and to permit persons to whom the Software is\n" |
22478 | " * furnished to do so, subject to the following conditions:\n" |
22479 | " *\n" |
22480 | " * The above copyright notice and this permission notice shall be included in\n" |
22481 | " * all copies or substantial portions of the Software.\n" |
22482 | " *\n" |
22483 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
22484 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
22485 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
22486 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
22487 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
22488 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
22489 | " * THE SOFTWARE.\n" |
22490 | " *\n" |
22491 | " *===-----------------------------------------------------------------------===\n" |
22492 | " */\n" |
22493 | "\n" |
22494 | "#ifndef __ISO646_H\n" |
22495 | "#define __ISO646_H\n" |
22496 | "\n" |
22497 | "#ifndef __cplusplus\n" |
22498 | "#define and &&\n" |
22499 | "#define and_eq &=\n" |
22500 | "#define bitand &\n" |
22501 | "#define bitor |\n" |
22502 | "#define compl ~\n" |
22503 | "#define not !\n" |
22504 | "#define not_eq !=\n" |
22505 | "#define or ||\n" |
22506 | "#define or_eq |=\n" |
22507 | "#define xor ^\n" |
22508 | "#define xor_eq ^=\n" |
22509 | "#endif\n" |
22510 | "\n" |
22511 | "#endif /* __ISO646_H */\n" |
22512 | "" } , |
22513 | { "/builtins/limits.h" , "/*===---- limits.h - Standard header for integer sizes --------------------===*\\\n" |
22514 | " *\n" |
22515 | " * Copyright (c) 2009 Chris Lattner\n" |
22516 | " *\n" |
22517 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
22518 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
22519 | " * in the Software without restriction, including without limitation the rights\n" |
22520 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
22521 | " * copies of the Software, and to permit persons to whom the Software is\n" |
22522 | " * furnished to do so, subject to the following conditions:\n" |
22523 | " *\n" |
22524 | " * The above copyright notice and this permission notice shall be included in\n" |
22525 | " * all copies or substantial portions of the Software.\n" |
22526 | " *\n" |
22527 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
22528 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
22529 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
22530 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
22531 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
22532 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
22533 | " * THE SOFTWARE.\n" |
22534 | " *\n" |
22535 | "\\*===----------------------------------------------------------------------===*/\n" |
22536 | "\n" |
22537 | "#ifndef __CLANG_LIMITS_H\n" |
22538 | "#define __CLANG_LIMITS_H\n" |
22539 | "\n" |
22540 | "/* The system's limits.h may, in turn, try to #include_next GCC's limits.h.\n" |
22541 | " Avert this #include_next madness. */\n" |
22542 | "#if defined __GNUC__ && !defined _GCC_LIMITS_H_\n" |
22543 | "#define _GCC_LIMITS_H_\n" |
22544 | "#endif\n" |
22545 | "\n" |
22546 | "/* System headers include a number of constants from POSIX in <limits.h>.\n" |
22547 | " Include it if we're hosted. */\n" |
22548 | "#if __STDC_HOSTED__ && __has_include_next(<limits.h>)\n" |
22549 | "#include_next <limits.h>\n" |
22550 | "#endif\n" |
22551 | "\n" |
22552 | "/* Many system headers try to \"help us out\" by defining these. No really, we\n" |
22553 | " know how big each datatype is. */\n" |
22554 | "#undef SCHAR_MIN\n" |
22555 | "#undef SCHAR_MAX\n" |
22556 | "#undef UCHAR_MAX\n" |
22557 | "#undef SHRT_MIN\n" |
22558 | "#undef SHRT_MAX\n" |
22559 | "#undef USHRT_MAX\n" |
22560 | "#undef INT_MIN\n" |
22561 | "#undef INT_MAX\n" |
22562 | "#undef UINT_MAX\n" |
22563 | "#undef LONG_MIN\n" |
22564 | "#undef LONG_MAX\n" |
22565 | "#undef ULONG_MAX\n" |
22566 | "\n" |
22567 | "#undef CHAR_BIT\n" |
22568 | "#undef CHAR_MIN\n" |
22569 | "#undef CHAR_MAX\n" |
22570 | "\n" |
22571 | "/* C90/99 5.2.4.2.1 */\n" |
22572 | "#define SCHAR_MAX __SCHAR_MAX__\n" |
22573 | "#define SHRT_MAX __SHRT_MAX__\n" |
22574 | "#define INT_MAX __INT_MAX__\n" |
22575 | "#define LONG_MAX __LONG_MAX__\n" |
22576 | "\n" |
22577 | "#define SCHAR_MIN (-__SCHAR_MAX__-1)\n" |
22578 | "#define SHRT_MIN (-__SHRT_MAX__ -1)\n" |
22579 | "#define INT_MIN (-__INT_MAX__ -1)\n" |
22580 | "#define LONG_MIN (-__LONG_MAX__ -1L)\n" |
22581 | "\n" |
22582 | "#define UCHAR_MAX (__SCHAR_MAX__*2 +1)\n" |
22583 | "#define USHRT_MAX (__SHRT_MAX__ *2 +1)\n" |
22584 | "#define UINT_MAX (__INT_MAX__ *2U +1U)\n" |
22585 | "#define ULONG_MAX (__LONG_MAX__ *2UL+1UL)\n" |
22586 | "\n" |
22587 | "#ifndef MB_LEN_MAX\n" |
22588 | "#define MB_LEN_MAX 1\n" |
22589 | "#endif\n" |
22590 | "\n" |
22591 | "#define CHAR_BIT __CHAR_BIT__\n" |
22592 | "\n" |
22593 | "#ifdef __CHAR_UNSIGNED__ /* -funsigned-char */\n" |
22594 | "#define CHAR_MIN 0\n" |
22595 | "#define CHAR_MAX UCHAR_MAX\n" |
22596 | "#else\n" |
22597 | "#define CHAR_MIN SCHAR_MIN\n" |
22598 | "#define CHAR_MAX __SCHAR_MAX__\n" |
22599 | "#endif\n" |
22600 | "\n" |
22601 | "/* C99 5.2.4.2.1: Added long long.\n" |
22602 | " C++11 18.3.3.2: same contents as the Standard C Library header <limits.h>.\n" |
22603 | " */\n" |
22604 | "#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L\n" |
22605 | "\n" |
22606 | "#undef LLONG_MIN\n" |
22607 | "#undef LLONG_MAX\n" |
22608 | "#undef ULLONG_MAX\n" |
22609 | "\n" |
22610 | "#define LLONG_MAX __LONG_LONG_MAX__\n" |
22611 | "#define LLONG_MIN (-__LONG_LONG_MAX__-1LL)\n" |
22612 | "#define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n" |
22613 | "#endif\n" |
22614 | "\n" |
22615 | "/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad\n" |
22616 | " that we don't have something like #pragma poison that could be used to\n" |
22617 | " deprecate a macro - the code should just use LLONG_MAX and friends.\n" |
22618 | " */\n" |
22619 | "#if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__)\n" |
22620 | "\n" |
22621 | "#undef LONG_LONG_MIN\n" |
22622 | "#undef LONG_LONG_MAX\n" |
22623 | "#undef ULONG_LONG_MAX\n" |
22624 | "\n" |
22625 | "#define LONG_LONG_MAX __LONG_LONG_MAX__\n" |
22626 | "#define LONG_LONG_MIN (-__LONG_LONG_MAX__-1LL)\n" |
22627 | "#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n" |
22628 | "#endif\n" |
22629 | "\n" |
22630 | "#endif /* __CLANG_LIMITS_H */\n" |
22631 | "" } , |
22632 | { "/builtins/lwpintrin.h" , "/*===---- lwpintrin.h - LWP intrinsics -------------------------------------===\n" |
22633 | " *\n" |
22634 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
22635 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
22636 | " * in the Software without restriction, including without limitation the rights\n" |
22637 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
22638 | " * copies of the Software, and to permit persons to whom the Software is\n" |
22639 | " * furnished to do so, subject to the following conditions:\n" |
22640 | " *\n" |
22641 | " * The above copyright notice and this permission notice shall be included in\n" |
22642 | " * all copies or substantial portions of the Software.\n" |
22643 | " *\n" |
22644 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
22645 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
22646 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
22647 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
22648 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
22649 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
22650 | " * THE SOFTWARE.\n" |
22651 | " *\n" |
22652 | " *===-----------------------------------------------------------------------===\n" |
22653 | " */\n" |
22654 | "\n" |
22655 | "#ifndef __X86INTRIN_H\n" |
22656 | "#error \"Never use <lwpintrin.h> directly; include <x86intrin.h> instead.\"\n" |
22657 | "#endif\n" |
22658 | "\n" |
22659 | "#ifndef __LWPINTRIN_H\n" |
22660 | "#define __LWPINTRIN_H\n" |
22661 | "\n" |
22662 | "/* Define the default attributes for the functions in this file. */\n" |
22663 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lwp\")))\n" |
22664 | "\n" |
22665 | "/// Parses the LWPCB at the specified address and enables\n" |
22666 | "/// profiling if valid.\n" |
22667 | "///\n" |
22668 | "/// \\headerfile <x86intrin.h>\n" |
22669 | "///\n" |
22670 | "/// This intrinsic corresponds to the <c> LLWPCB </c> instruction.\n" |
22671 | "///\n" |
22672 | "/// \\param __addr\n" |
22673 | "/// Address to the new Lightweight Profiling Control Block (LWPCB). If the\n" |
22674 | "/// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables\n" |
22675 | "/// Lightweight Profiling.\n" |
22676 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
22677 | "__llwpcb (void *__addr)\n" |
22678 | "{\n" |
22679 | " __builtin_ia32_llwpcb(__addr);\n" |
22680 | "}\n" |
22681 | "\n" |
22682 | "/// Flushes the LWP state to memory and returns the address of the LWPCB.\n" |
22683 | "///\n" |
22684 | "/// \\headerfile <x86intrin.h>\n" |
22685 | "///\n" |
22686 | "/// This intrinsic corresponds to the <c> SLWPCB </c> instruction.\n" |
22687 | "///\n" |
22688 | "/// \\return\n" |
22689 | "/// Address to the current Lightweight Profiling Control Block (LWPCB).\n" |
22690 | "/// If LWP is not currently enabled, returns NULL.\n" |
22691 | "static __inline__ void* __DEFAULT_FN_ATTRS\n" |
22692 | "__slwpcb (void)\n" |
22693 | "{\n" |
22694 | " return __builtin_ia32_slwpcb();\n" |
22695 | "}\n" |
22696 | "\n" |
22697 | "/// Inserts programmed event record into the LWP event ring buffer\n" |
22698 | "/// and advances the ring buffer pointer.\n" |
22699 | "///\n" |
22700 | "/// \\headerfile <x86intrin.h>\n" |
22701 | "///\n" |
22702 | "/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n" |
22703 | "///\n" |
22704 | "/// \\param DATA2\n" |
22705 | "/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n" |
22706 | "/// \\param DATA1\n" |
22707 | "/// A 32-bit value is inserted into the 32-bit Data1 field.\n" |
22708 | "/// \\param FLAGS\n" |
22709 | "/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n" |
22710 | "/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n" |
22711 | "/// the event record overwrites the last record in the buffer, the MissedEvents\n" |
22712 | "/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n" |
22713 | "/// 1 is returned. Otherwise 0 is returned.\n" |
22714 | "#define __lwpins32(DATA2, DATA1, FLAGS) \\\n" |
22715 | " (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n" |
22716 | " (unsigned int) (FLAGS)))\n" |
22717 | "\n" |
22718 | "/// Decrements the LWP programmed value sample event counter. If the result is\n" |
22719 | "/// negative, inserts an event record into the LWP event ring buffer in memory\n" |
22720 | "/// and advances the ring buffer pointer.\n" |
22721 | "///\n" |
22722 | "/// \\headerfile <x86intrin.h>\n" |
22723 | "///\n" |
22724 | "/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n" |
22725 | "///\n" |
22726 | "/// \\param DATA2\n" |
22727 | "/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n" |
22728 | "/// \\param DATA1\n" |
22729 | "/// A 32-bit value is inserted into the 32-bit Data1 field.\n" |
22730 | "/// \\param FLAGS\n" |
22731 | "/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n" |
22732 | "#define __lwpval32(DATA2, DATA1, FLAGS) \\\n" |
22733 | " (__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n" |
22734 | " (unsigned int) (FLAGS)))\n" |
22735 | "\n" |
22736 | "#ifdef __x86_64__\n" |
22737 | "\n" |
22738 | "/// Inserts programmed event record into the LWP event ring buffer\n" |
22739 | "/// and advances the ring buffer pointer.\n" |
22740 | "///\n" |
22741 | "/// \\headerfile <x86intrin.h>\n" |
22742 | "///\n" |
22743 | "/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n" |
22744 | "///\n" |
22745 | "/// \\param DATA2\n" |
22746 | "/// A 64-bit value is inserted into the 64-bit Data2 field.\n" |
22747 | "/// \\param DATA1\n" |
22748 | "/// A 32-bit value is inserted into the 32-bit Data1 field.\n" |
22749 | "/// \\param FLAGS\n" |
22750 | "/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n" |
22751 | "/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n" |
22752 | "/// the event record overwrites the last record in the buffer, the MissedEvents\n" |
22753 | "/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n" |
22754 | "/// 1 is returned. Otherwise 0 is returned.\n" |
22755 | "#define __lwpins64(DATA2, DATA1, FLAGS) \\\n" |
22756 | " (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n" |
22757 | " (unsigned int) (FLAGS)))\n" |
22758 | "\n" |
22759 | "/// Decrements the LWP programmed value sample event counter. If the result is\n" |
22760 | "/// negative, inserts an event record into the LWP event ring buffer in memory\n" |
22761 | "/// and advances the ring buffer pointer.\n" |
22762 | "///\n" |
22763 | "/// \\headerfile <x86intrin.h>\n" |
22764 | "///\n" |
22765 | "/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n" |
22766 | "///\n" |
22767 | "/// \\param DATA2\n" |
22768 | "/// A 64-bit value is and inserted into the 64-bit Data2 field.\n" |
22769 | "/// \\param DATA1\n" |
22770 | "/// A 32-bit value is inserted into the 32-bit Data1 field.\n" |
22771 | "/// \\param FLAGS\n" |
22772 | "/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n" |
22773 | "#define __lwpval64(DATA2, DATA1, FLAGS) \\\n" |
22774 | " (__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n" |
22775 | " (unsigned int) (FLAGS)))\n" |
22776 | "\n" |
22777 | "#endif\n" |
22778 | "\n" |
22779 | "#undef __DEFAULT_FN_ATTRS\n" |
22780 | "\n" |
22781 | "#endif /* __LWPINTRIN_H */\n" |
22782 | "" } , |
22783 | { "/builtins/lzcntintrin.h" , "/*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------===\n" |
22784 | " *\n" |
22785 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
22786 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
22787 | " * in the Software without restriction, including without limitation the rights\n" |
22788 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
22789 | " * copies of the Software, and to permit persons to whom the Software is\n" |
22790 | " * furnished to do so, subject to the following conditions:\n" |
22791 | " *\n" |
22792 | " * The above copyright notice and this permission notice shall be included in\n" |
22793 | " * all copies or substantial portions of the Software.\n" |
22794 | " *\n" |
22795 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
22796 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
22797 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
22798 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
22799 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
22800 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
22801 | " * THE SOFTWARE.\n" |
22802 | " *\n" |
22803 | " *===-----------------------------------------------------------------------===\n" |
22804 | " */\n" |
22805 | "\n" |
22806 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
22807 | "#error \"Never use <lzcntintrin.h> directly; include <x86intrin.h> instead.\"\n" |
22808 | "#endif\n" |
22809 | "\n" |
22810 | "#ifndef __LZCNTINTRIN_H\n" |
22811 | "#define __LZCNTINTRIN_H\n" |
22812 | "\n" |
22813 | "/* Define the default attributes for the functions in this file. */\n" |
22814 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lzcnt\")))\n" |
22815 | "\n" |
22816 | "#ifndef _MSC_VER\n" |
22817 | "/// Counts the number of leading zero bits in the operand.\n" |
22818 | "///\n" |
22819 | "/// \\headerfile <x86intrin.h>\n" |
22820 | "///\n" |
22821 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
22822 | "///\n" |
22823 | "/// \\param __X\n" |
22824 | "/// An unsigned 16-bit integer whose leading zeros are to be counted.\n" |
22825 | "/// \\returns An unsigned 16-bit integer containing the number of leading zero\n" |
22826 | "/// bits in the operand.\n" |
22827 | "#define __lzcnt16(X) __builtin_ia32_lzcnt_u16((unsigned short)(X))\n" |
22828 | "#endif // _MSC_VER\n" |
22829 | "\n" |
22830 | "/// Counts the number of leading zero bits in the operand.\n" |
22831 | "///\n" |
22832 | "/// \\headerfile <x86intrin.h>\n" |
22833 | "///\n" |
22834 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
22835 | "///\n" |
22836 | "/// \\param __X\n" |
22837 | "/// An unsigned 32-bit integer whose leading zeros are to be counted.\n" |
22838 | "/// \\returns An unsigned 32-bit integer containing the number of leading zero\n" |
22839 | "/// bits in the operand.\n" |
22840 | "/// \\see _lzcnt_u32\n" |
22841 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
22842 | "__lzcnt32(unsigned int __X)\n" |
22843 | "{\n" |
22844 | " return __builtin_ia32_lzcnt_u32(__X);\n" |
22845 | "}\n" |
22846 | "\n" |
22847 | "/// Counts the number of leading zero bits in the operand.\n" |
22848 | "///\n" |
22849 | "/// \\headerfile <x86intrin.h>\n" |
22850 | "///\n" |
22851 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
22852 | "///\n" |
22853 | "/// \\param __X\n" |
22854 | "/// An unsigned 32-bit integer whose leading zeros are to be counted.\n" |
22855 | "/// \\returns An unsigned 32-bit integer containing the number of leading zero\n" |
22856 | "/// bits in the operand.\n" |
22857 | "/// \\see __lzcnt32\n" |
22858 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
22859 | "_lzcnt_u32(unsigned int __X)\n" |
22860 | "{\n" |
22861 | " return __builtin_ia32_lzcnt_u32(__X);\n" |
22862 | "}\n" |
22863 | "\n" |
22864 | "#ifdef __x86_64__\n" |
22865 | "#ifndef _MSC_VER\n" |
22866 | "/// Counts the number of leading zero bits in the operand.\n" |
22867 | "///\n" |
22868 | "/// \\headerfile <x86intrin.h>\n" |
22869 | "///\n" |
22870 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
22871 | "///\n" |
22872 | "/// \\param __X\n" |
22873 | "/// An unsigned 64-bit integer whose leading zeros are to be counted.\n" |
22874 | "/// \\returns An unsigned 64-bit integer containing the number of leading zero\n" |
22875 | "/// bits in the operand.\n" |
22876 | "/// \\see _lzcnt_u64\n" |
22877 | "#define __lzcnt64(X) __builtin_ia32_lzcnt_u64((unsigned long long)(X))\n" |
22878 | "#endif // _MSC_VER\n" |
22879 | "\n" |
22880 | "/// Counts the number of leading zero bits in the operand.\n" |
22881 | "///\n" |
22882 | "/// \\headerfile <x86intrin.h>\n" |
22883 | "///\n" |
22884 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
22885 | "///\n" |
22886 | "/// \\param __X\n" |
22887 | "/// An unsigned 64-bit integer whose leading zeros are to be counted.\n" |
22888 | "/// \\returns An unsigned 64-bit integer containing the number of leading zero\n" |
22889 | "/// bits in the operand.\n" |
22890 | "/// \\see __lzcnt64\n" |
22891 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
22892 | "_lzcnt_u64(unsigned long long __X)\n" |
22893 | "{\n" |
22894 | " return __builtin_ia32_lzcnt_u64(__X);\n" |
22895 | "}\n" |
22896 | "#endif\n" |
22897 | "\n" |
22898 | "#undef __DEFAULT_FN_ATTRS\n" |
22899 | "\n" |
22900 | "#endif /* __LZCNTINTRIN_H */\n" |
22901 | "" } , |
22902 | { "/builtins/mm3dnow.h" , "/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------===\n" |
22903 | " *\n" |
22904 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
22905 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
22906 | " * in the Software without restriction, including without limitation the rights\n" |
22907 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
22908 | " * copies of the Software, and to permit persons to whom the Software is\n" |
22909 | " * furnished to do so, subject to the following conditions:\n" |
22910 | " *\n" |
22911 | " * The above copyright notice and this permission notice shall be included in\n" |
22912 | " * all copies or substantial portions of the Software.\n" |
22913 | " *\n" |
22914 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
22915 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
22916 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
22917 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
22918 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
22919 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
22920 | " * THE SOFTWARE.\n" |
22921 | " *\n" |
22922 | " *===-----------------------------------------------------------------------===\n" |
22923 | " */\n" |
22924 | "\n" |
22925 | "#ifndef _MM3DNOW_H_INCLUDED\n" |
22926 | "#define _MM3DNOW_H_INCLUDED\n" |
22927 | "\n" |
22928 | "#include <mmintrin.h>\n" |
22929 | "#include <prfchwintrin.h>\n" |
22930 | "\n" |
22931 | "typedef float __v2sf __attribute__((__vector_size__(8)));\n" |
22932 | "\n" |
22933 | "/* Define the default attributes for the functions in this file. */\n" |
22934 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\"), __min_vector_width__(64)))\n" |
22935 | "\n" |
22936 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\")))\n" |
22937 | "_m_femms(void) {\n" |
22938 | " __builtin_ia32_femms();\n" |
22939 | "}\n" |
22940 | "\n" |
22941 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22942 | "_m_pavgusb(__m64 __m1, __m64 __m2) {\n" |
22943 | " return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);\n" |
22944 | "}\n" |
22945 | "\n" |
22946 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22947 | "_m_pf2id(__m64 __m) {\n" |
22948 | " return (__m64)__builtin_ia32_pf2id((__v2sf)__m);\n" |
22949 | "}\n" |
22950 | "\n" |
22951 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22952 | "_m_pfacc(__m64 __m1, __m64 __m2) {\n" |
22953 | " return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);\n" |
22954 | "}\n" |
22955 | "\n" |
22956 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22957 | "_m_pfadd(__m64 __m1, __m64 __m2) {\n" |
22958 | " return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);\n" |
22959 | "}\n" |
22960 | "\n" |
22961 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22962 | "_m_pfcmpeq(__m64 __m1, __m64 __m2) {\n" |
22963 | " return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);\n" |
22964 | "}\n" |
22965 | "\n" |
22966 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22967 | "_m_pfcmpge(__m64 __m1, __m64 __m2) {\n" |
22968 | " return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);\n" |
22969 | "}\n" |
22970 | "\n" |
22971 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22972 | "_m_pfcmpgt(__m64 __m1, __m64 __m2) {\n" |
22973 | " return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);\n" |
22974 | "}\n" |
22975 | "\n" |
22976 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22977 | "_m_pfmax(__m64 __m1, __m64 __m2) {\n" |
22978 | " return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);\n" |
22979 | "}\n" |
22980 | "\n" |
22981 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22982 | "_m_pfmin(__m64 __m1, __m64 __m2) {\n" |
22983 | " return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);\n" |
22984 | "}\n" |
22985 | "\n" |
22986 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22987 | "_m_pfmul(__m64 __m1, __m64 __m2) {\n" |
22988 | " return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);\n" |
22989 | "}\n" |
22990 | "\n" |
22991 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22992 | "_m_pfrcp(__m64 __m) {\n" |
22993 | " return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);\n" |
22994 | "}\n" |
22995 | "\n" |
22996 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
22997 | "_m_pfrcpit1(__m64 __m1, __m64 __m2) {\n" |
22998 | " return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);\n" |
22999 | "}\n" |
23000 | "\n" |
23001 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23002 | "_m_pfrcpit2(__m64 __m1, __m64 __m2) {\n" |
23003 | " return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);\n" |
23004 | "}\n" |
23005 | "\n" |
23006 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23007 | "_m_pfrsqrt(__m64 __m) {\n" |
23008 | " return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);\n" |
23009 | "}\n" |
23010 | "\n" |
23011 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23012 | "_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {\n" |
23013 | " return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);\n" |
23014 | "}\n" |
23015 | "\n" |
23016 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23017 | "_m_pfsub(__m64 __m1, __m64 __m2) {\n" |
23018 | " return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);\n" |
23019 | "}\n" |
23020 | "\n" |
23021 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23022 | "_m_pfsubr(__m64 __m1, __m64 __m2) {\n" |
23023 | " return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);\n" |
23024 | "}\n" |
23025 | "\n" |
23026 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23027 | "_m_pi2fd(__m64 __m) {\n" |
23028 | " return (__m64)__builtin_ia32_pi2fd((__v2si)__m);\n" |
23029 | "}\n" |
23030 | "\n" |
23031 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23032 | "_m_pmulhrw(__m64 __m1, __m64 __m2) {\n" |
23033 | " return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);\n" |
23034 | "}\n" |
23035 | "\n" |
23036 | "/* Handle the 3dnowa instructions here. */\n" |
23037 | "#undef __DEFAULT_FN_ATTRS\n" |
23038 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnowa\"), __min_vector_width__(64)))\n" |
23039 | "\n" |
23040 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23041 | "_m_pf2iw(__m64 __m) {\n" |
23042 | " return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);\n" |
23043 | "}\n" |
23044 | "\n" |
23045 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23046 | "_m_pfnacc(__m64 __m1, __m64 __m2) {\n" |
23047 | " return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);\n" |
23048 | "}\n" |
23049 | "\n" |
23050 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23051 | "_m_pfpnacc(__m64 __m1, __m64 __m2) {\n" |
23052 | " return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);\n" |
23053 | "}\n" |
23054 | "\n" |
23055 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23056 | "_m_pi2fw(__m64 __m) {\n" |
23057 | " return (__m64)__builtin_ia32_pi2fw((__v2si)__m);\n" |
23058 | "}\n" |
23059 | "\n" |
23060 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23061 | "_m_pswapdsf(__m64 __m) {\n" |
23062 | " return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);\n" |
23063 | "}\n" |
23064 | "\n" |
23065 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23066 | "_m_pswapdsi(__m64 __m) {\n" |
23067 | " return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);\n" |
23068 | "}\n" |
23069 | "\n" |
23070 | "#undef __DEFAULT_FN_ATTRS\n" |
23071 | "\n" |
23072 | "#endif\n" |
23073 | "" } , |
23074 | { "/builtins/mm_malloc.h" , "/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------===\n" |
23075 | " *\n" |
23076 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
23077 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
23078 | " * in the Software without restriction, including without limitation the rights\n" |
23079 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
23080 | " * copies of the Software, and to permit persons to whom the Software is\n" |
23081 | " * furnished to do so, subject to the following conditions:\n" |
23082 | " *\n" |
23083 | " * The above copyright notice and this permission notice shall be included in\n" |
23084 | " * all copies or substantial portions of the Software.\n" |
23085 | " *\n" |
23086 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
23087 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
23088 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
23089 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
23090 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
23091 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
23092 | " * THE SOFTWARE.\n" |
23093 | " *\n" |
23094 | " *===-----------------------------------------------------------------------===\n" |
23095 | " */\n" |
23096 | "\n" |
23097 | "#ifndef __MM_MALLOC_H\n" |
23098 | "#define __MM_MALLOC_H\n" |
23099 | "\n" |
23100 | "#include <stdlib.h>\n" |
23101 | "\n" |
23102 | "#ifdef _WIN32\n" |
23103 | "#include <malloc.h>\n" |
23104 | "#else\n" |
23105 | "#ifndef __cplusplus\n" |
23106 | "extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n" |
23107 | "#else\n" |
23108 | "// Some systems (e.g. those with GNU libc) declare posix_memalign with an\n" |
23109 | "// exception specifier. Via an \"egregious workaround\" in\n" |
23110 | "// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid\n" |
23111 | "// redeclaration of glibc's declaration.\n" |
23112 | "extern \"C\" int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n" |
23113 | "#endif\n" |
23114 | "#endif\n" |
23115 | "\n" |
23116 | "#if !(defined(_WIN32) && defined(_mm_malloc))\n" |
23117 | "static __inline__ void *__attribute__((__always_inline__, __nodebug__,\n" |
23118 | " __malloc__))\n" |
23119 | "_mm_malloc(size_t __size, size_t __align)\n" |
23120 | "{\n" |
23121 | " if (__align == 1) {\n" |
23122 | " return malloc(__size);\n" |
23123 | " }\n" |
23124 | "\n" |
23125 | " if (!(__align & (__align - 1)) && __align < sizeof(void *))\n" |
23126 | " __align = sizeof(void *);\n" |
23127 | "\n" |
23128 | " void *__mallocedMemory;\n" |
23129 | "#if defined(__MINGW32__)\n" |
23130 | " __mallocedMemory = __mingw_aligned_malloc(__size, __align);\n" |
23131 | "#elif defined(_WIN32)\n" |
23132 | " __mallocedMemory = _aligned_malloc(__size, __align);\n" |
23133 | "#else\n" |
23134 | " if (posix_memalign(&__mallocedMemory, __align, __size))\n" |
23135 | " return 0;\n" |
23136 | "#endif\n" |
23137 | "\n" |
23138 | " return __mallocedMemory;\n" |
23139 | "}\n" |
23140 | "\n" |
23141 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
23142 | "_mm_free(void *__p)\n" |
23143 | "{\n" |
23144 | " free(__p);\n" |
23145 | "}\n" |
23146 | "#endif\n" |
23147 | "\n" |
23148 | "#endif /* __MM_MALLOC_H */\n" |
23149 | "" } , |
23150 | { "/builtins/mmintrin.h" , "/*===---- mmintrin.h - MMX intrinsics --------------------------------------===\n" |
23151 | " *\n" |
23152 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
23153 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
23154 | " * in the Software without restriction, including without limitation the rights\n" |
23155 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
23156 | " * copies of the Software, and to permit persons to whom the Software is\n" |
23157 | " * furnished to do so, subject to the following conditions:\n" |
23158 | " *\n" |
23159 | " * The above copyright notice and this permission notice shall be included in\n" |
23160 | " * all copies or substantial portions of the Software.\n" |
23161 | " *\n" |
23162 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
23163 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
23164 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
23165 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
23166 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
23167 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
23168 | " * THE SOFTWARE.\n" |
23169 | " *\n" |
23170 | " *===-----------------------------------------------------------------------===\n" |
23171 | " */\n" |
23172 | "\n" |
23173 | "#ifndef __MMINTRIN_H\n" |
23174 | "#define __MMINTRIN_H\n" |
23175 | "\n" |
23176 | "typedef long long __m64 __attribute__((__vector_size__(8)));\n" |
23177 | "\n" |
23178 | "typedef long long __v1di __attribute__((__vector_size__(8)));\n" |
23179 | "typedef int __v2si __attribute__((__vector_size__(8)));\n" |
23180 | "typedef short __v4hi __attribute__((__vector_size__(8)));\n" |
23181 | "typedef char __v8qi __attribute__((__vector_size__(8)));\n" |
23182 | "\n" |
23183 | "/* Define the default attributes for the functions in this file. */\n" |
23184 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\"), __min_vector_width__(64)))\n" |
23185 | "\n" |
23186 | "/// Clears the MMX state by setting the state of the x87 stack registers\n" |
23187 | "/// to empty.\n" |
23188 | "///\n" |
23189 | "/// \\headerfile <x86intrin.h>\n" |
23190 | "///\n" |
23191 | "/// This intrinsic corresponds to the <c> EMMS </c> instruction.\n" |
23192 | "///\n" |
23193 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\")))\n" |
23194 | "_mm_empty(void)\n" |
23195 | "{\n" |
23196 | " __builtin_ia32_emms();\n" |
23197 | "}\n" |
23198 | "\n" |
23199 | "/// Constructs a 64-bit integer vector, setting the lower 32 bits to the\n" |
23200 | "/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.\n" |
23201 | "///\n" |
23202 | "/// \\headerfile <x86intrin.h>\n" |
23203 | "///\n" |
23204 | "/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n" |
23205 | "///\n" |
23206 | "/// \\param __i\n" |
23207 | "/// A 32-bit integer value.\n" |
23208 | "/// \\returns A 64-bit integer vector. The lower 32 bits contain the value of the\n" |
23209 | "/// parameter. The upper 32 bits are set to 0.\n" |
23210 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23211 | "_mm_cvtsi32_si64(int __i)\n" |
23212 | "{\n" |
23213 | " return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);\n" |
23214 | "}\n" |
23215 | "\n" |
23216 | "/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit\n" |
23217 | "/// signed integer.\n" |
23218 | "///\n" |
23219 | "/// \\headerfile <x86intrin.h>\n" |
23220 | "///\n" |
23221 | "/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n" |
23222 | "///\n" |
23223 | "/// \\param __m\n" |
23224 | "/// A 64-bit integer vector.\n" |
23225 | "/// \\returns A 32-bit signed integer value containing the lower 32 bits of the\n" |
23226 | "/// parameter.\n" |
23227 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
23228 | "_mm_cvtsi64_si32(__m64 __m)\n" |
23229 | "{\n" |
23230 | " return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);\n" |
23231 | "}\n" |
23232 | "\n" |
23233 | "/// Casts a 64-bit signed integer value into a 64-bit integer vector.\n" |
23234 | "///\n" |
23235 | "/// \\headerfile <x86intrin.h>\n" |
23236 | "///\n" |
23237 | "/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n" |
23238 | "///\n" |
23239 | "/// \\param __i\n" |
23240 | "/// A 64-bit signed integer.\n" |
23241 | "/// \\returns A 64-bit integer vector containing the same bitwise pattern as the\n" |
23242 | "/// parameter.\n" |
23243 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23244 | "_mm_cvtsi64_m64(long long __i)\n" |
23245 | "{\n" |
23246 | " return (__m64)__i;\n" |
23247 | "}\n" |
23248 | "\n" |
23249 | "/// Casts a 64-bit integer vector into a 64-bit signed integer value.\n" |
23250 | "///\n" |
23251 | "/// \\headerfile <x86intrin.h>\n" |
23252 | "///\n" |
23253 | "/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n" |
23254 | "///\n" |
23255 | "/// \\param __m\n" |
23256 | "/// A 64-bit integer vector.\n" |
23257 | "/// \\returns A 64-bit signed integer containing the same bitwise pattern as the\n" |
23258 | "/// parameter.\n" |
23259 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
23260 | "_mm_cvtm64_si64(__m64 __m)\n" |
23261 | "{\n" |
23262 | " return (long long)__m;\n" |
23263 | "}\n" |
23264 | "\n" |
23265 | "/// Converts 16-bit signed integers from both 64-bit integer vector\n" |
23266 | "/// parameters of [4 x i16] into 8-bit signed integer values, and constructs\n" |
23267 | "/// a 64-bit integer vector of [8 x i8] as the result. Positive values\n" |
23268 | "/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80\n" |
23269 | "/// are saturated to 0x80.\n" |
23270 | "///\n" |
23271 | "/// \\headerfile <x86intrin.h>\n" |
23272 | "///\n" |
23273 | "/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.\n" |
23274 | "///\n" |
23275 | "/// \\param __m1\n" |
23276 | "/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n" |
23277 | "/// 16-bit signed integer and is converted to an 8-bit signed integer with\n" |
23278 | "/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n" |
23279 | "/// Negative values less than 0x80 are saturated to 0x80. The converted\n" |
23280 | "/// [4 x i8] values are written to the lower 32 bits of the result.\n" |
23281 | "/// \\param __m2\n" |
23282 | "/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n" |
23283 | "/// 16-bit signed integer and is converted to an 8-bit signed integer with\n" |
23284 | "/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n" |
23285 | "/// Negative values less than 0x80 are saturated to 0x80. The converted\n" |
23286 | "/// [4 x i8] values are written to the upper 32 bits of the result.\n" |
23287 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n" |
23288 | "/// values.\n" |
23289 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23290 | "_mm_packs_pi16(__m64 __m1, __m64 __m2)\n" |
23291 | "{\n" |
23292 | " return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);\n" |
23293 | "}\n" |
23294 | "\n" |
23295 | "/// Converts 32-bit signed integers from both 64-bit integer vector\n" |
23296 | "/// parameters of [2 x i32] into 16-bit signed integer values, and constructs\n" |
23297 | "/// a 64-bit integer vector of [4 x i16] as the result. Positive values\n" |
23298 | "/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than\n" |
23299 | "/// 0x8000 are saturated to 0x8000.\n" |
23300 | "///\n" |
23301 | "/// \\headerfile <x86intrin.h>\n" |
23302 | "///\n" |
23303 | "/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.\n" |
23304 | "///\n" |
23305 | "/// \\param __m1\n" |
23306 | "/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n" |
23307 | "/// 32-bit signed integer and is converted to a 16-bit signed integer with\n" |
23308 | "/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n" |
23309 | "/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n" |
23310 | "/// [2 x i16] values are written to the lower 32 bits of the result.\n" |
23311 | "/// \\param __m2\n" |
23312 | "/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n" |
23313 | "/// 32-bit signed integer and is converted to a 16-bit signed integer with\n" |
23314 | "/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n" |
23315 | "/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n" |
23316 | "/// [2 x i16] values are written to the upper 32 bits of the result.\n" |
23317 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n" |
23318 | "/// values.\n" |
23319 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23320 | "_mm_packs_pi32(__m64 __m1, __m64 __m2)\n" |
23321 | "{\n" |
23322 | " return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);\n" |
23323 | "}\n" |
23324 | "\n" |
23325 | "/// Converts 16-bit signed integers from both 64-bit integer vector\n" |
23326 | "/// parameters of [4 x i16] into 8-bit unsigned integer values, and\n" |
23327 | "/// constructs a 64-bit integer vector of [8 x i8] as the result. Values\n" |
23328 | "/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated\n" |
23329 | "/// to 0.\n" |
23330 | "///\n" |
23331 | "/// \\headerfile <x86intrin.h>\n" |
23332 | "///\n" |
23333 | "/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.\n" |
23334 | "///\n" |
23335 | "/// \\param __m1\n" |
23336 | "/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n" |
23337 | "/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n" |
23338 | "/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
23339 | "/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n" |
23340 | "/// the lower 32 bits of the result.\n" |
23341 | "/// \\param __m2\n" |
23342 | "/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n" |
23343 | "/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n" |
23344 | "/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
23345 | "/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n" |
23346 | "/// the upper 32 bits of the result.\n" |
23347 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n" |
23348 | "/// values.\n" |
23349 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23350 | "_mm_packs_pu16(__m64 __m1, __m64 __m2)\n" |
23351 | "{\n" |
23352 | " return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);\n" |
23353 | "}\n" |
23354 | "\n" |
23355 | "/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]\n" |
23356 | "/// and interleaves them into a 64-bit integer vector of [8 x i8].\n" |
23357 | "///\n" |
23358 | "/// \\headerfile <x86intrin.h>\n" |
23359 | "///\n" |
23360 | "/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.\n" |
23361 | "///\n" |
23362 | "/// \\param __m1\n" |
23363 | "/// A 64-bit integer vector of [8 x i8]. \\n\n" |
23364 | "/// Bits [39:32] are written to bits [7:0] of the result. \\n\n" |
23365 | "/// Bits [47:40] are written to bits [23:16] of the result. \\n\n" |
23366 | "/// Bits [55:48] are written to bits [39:32] of the result. \\n\n" |
23367 | "/// Bits [63:56] are written to bits [55:48] of the result.\n" |
23368 | "/// \\param __m2\n" |
23369 | "/// A 64-bit integer vector of [8 x i8].\n" |
23370 | "/// Bits [39:32] are written to bits [15:8] of the result. \\n\n" |
23371 | "/// Bits [47:40] are written to bits [31:24] of the result. \\n\n" |
23372 | "/// Bits [55:48] are written to bits [47:40] of the result. \\n\n" |
23373 | "/// Bits [63:56] are written to bits [63:56] of the result.\n" |
23374 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n" |
23375 | "/// values.\n" |
23376 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23377 | "_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)\n" |
23378 | "{\n" |
23379 | " return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);\n" |
23380 | "}\n" |
23381 | "\n" |
23382 | "/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n" |
23383 | "/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n" |
23384 | "///\n" |
23385 | "/// \\headerfile <x86intrin.h>\n" |
23386 | "///\n" |
23387 | "/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.\n" |
23388 | "///\n" |
23389 | "/// \\param __m1\n" |
23390 | "/// A 64-bit integer vector of [4 x i16].\n" |
23391 | "/// Bits [47:32] are written to bits [15:0] of the result. \\n\n" |
23392 | "/// Bits [63:48] are written to bits [47:32] of the result.\n" |
23393 | "/// \\param __m2\n" |
23394 | "/// A 64-bit integer vector of [4 x i16].\n" |
23395 | "/// Bits [47:32] are written to bits [31:16] of the result. \\n\n" |
23396 | "/// Bits [63:48] are written to bits [63:48] of the result.\n" |
23397 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n" |
23398 | "/// values.\n" |
23399 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23400 | "_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)\n" |
23401 | "{\n" |
23402 | " return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);\n" |
23403 | "}\n" |
23404 | "\n" |
23405 | "/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n" |
23406 | "/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n" |
23407 | "///\n" |
23408 | "/// \\headerfile <x86intrin.h>\n" |
23409 | "///\n" |
23410 | "/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.\n" |
23411 | "///\n" |
23412 | "/// \\param __m1\n" |
23413 | "/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n" |
23414 | "/// the lower 32 bits of the result.\n" |
23415 | "/// \\param __m2\n" |
23416 | "/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n" |
23417 | "/// the upper 32 bits of the result.\n" |
23418 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n" |
23419 | "/// values.\n" |
23420 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23421 | "_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)\n" |
23422 | "{\n" |
23423 | " return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);\n" |
23424 | "}\n" |
23425 | "\n" |
23426 | "/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]\n" |
23427 | "/// and interleaves them into a 64-bit integer vector of [8 x i8].\n" |
23428 | "///\n" |
23429 | "/// \\headerfile <x86intrin.h>\n" |
23430 | "///\n" |
23431 | "/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.\n" |
23432 | "///\n" |
23433 | "/// \\param __m1\n" |
23434 | "/// A 64-bit integer vector of [8 x i8].\n" |
23435 | "/// Bits [7:0] are written to bits [7:0] of the result. \\n\n" |
23436 | "/// Bits [15:8] are written to bits [23:16] of the result. \\n\n" |
23437 | "/// Bits [23:16] are written to bits [39:32] of the result. \\n\n" |
23438 | "/// Bits [31:24] are written to bits [55:48] of the result.\n" |
23439 | "/// \\param __m2\n" |
23440 | "/// A 64-bit integer vector of [8 x i8].\n" |
23441 | "/// Bits [7:0] are written to bits [15:8] of the result. \\n\n" |
23442 | "/// Bits [15:8] are written to bits [31:24] of the result. \\n\n" |
23443 | "/// Bits [23:16] are written to bits [47:40] of the result. \\n\n" |
23444 | "/// Bits [31:24] are written to bits [63:56] of the result.\n" |
23445 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n" |
23446 | "/// values.\n" |
23447 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23448 | "_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)\n" |
23449 | "{\n" |
23450 | " return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);\n" |
23451 | "}\n" |
23452 | "\n" |
23453 | "/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n" |
23454 | "/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n" |
23455 | "///\n" |
23456 | "/// \\headerfile <x86intrin.h>\n" |
23457 | "///\n" |
23458 | "/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.\n" |
23459 | "///\n" |
23460 | "/// \\param __m1\n" |
23461 | "/// A 64-bit integer vector of [4 x i16].\n" |
23462 | "/// Bits [15:0] are written to bits [15:0] of the result. \\n\n" |
23463 | "/// Bits [31:16] are written to bits [47:32] of the result.\n" |
23464 | "/// \\param __m2\n" |
23465 | "/// A 64-bit integer vector of [4 x i16].\n" |
23466 | "/// Bits [15:0] are written to bits [31:16] of the result. \\n\n" |
23467 | "/// Bits [31:16] are written to bits [63:48] of the result.\n" |
23468 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n" |
23469 | "/// values.\n" |
23470 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23471 | "_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)\n" |
23472 | "{\n" |
23473 | " return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);\n" |
23474 | "}\n" |
23475 | "\n" |
23476 | "/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n" |
23477 | "/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n" |
23478 | "///\n" |
23479 | "/// \\headerfile <x86intrin.h>\n" |
23480 | "///\n" |
23481 | "/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.\n" |
23482 | "///\n" |
23483 | "/// \\param __m1\n" |
23484 | "/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n" |
23485 | "/// the lower 32 bits of the result.\n" |
23486 | "/// \\param __m2\n" |
23487 | "/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n" |
23488 | "/// the upper 32 bits of the result.\n" |
23489 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n" |
23490 | "/// values.\n" |
23491 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23492 | "_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)\n" |
23493 | "{\n" |
23494 | " return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);\n" |
23495 | "}\n" |
23496 | "\n" |
23497 | "/// Adds each 8-bit integer element of the first 64-bit integer vector\n" |
23498 | "/// of [8 x i8] to the corresponding 8-bit integer element of the second\n" |
23499 | "/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are\n" |
23500 | "/// packed into a 64-bit integer vector of [8 x i8].\n" |
23501 | "///\n" |
23502 | "/// \\headerfile <x86intrin.h>\n" |
23503 | "///\n" |
23504 | "/// This intrinsic corresponds to the <c> PADDB </c> instruction.\n" |
23505 | "///\n" |
23506 | "/// \\param __m1\n" |
23507 | "/// A 64-bit integer vector of [8 x i8].\n" |
23508 | "/// \\param __m2\n" |
23509 | "/// A 64-bit integer vector of [8 x i8].\n" |
23510 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the sums of both\n" |
23511 | "/// parameters.\n" |
23512 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23513 | "_mm_add_pi8(__m64 __m1, __m64 __m2)\n" |
23514 | "{\n" |
23515 | " return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);\n" |
23516 | "}\n" |
23517 | "\n" |
23518 | "/// Adds each 16-bit integer element of the first 64-bit integer vector\n" |
23519 | "/// of [4 x i16] to the corresponding 16-bit integer element of the second\n" |
23520 | "/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are\n" |
23521 | "/// packed into a 64-bit integer vector of [4 x i16].\n" |
23522 | "///\n" |
23523 | "/// \\headerfile <x86intrin.h>\n" |
23524 | "///\n" |
23525 | "/// This intrinsic corresponds to the <c> PADDW </c> instruction.\n" |
23526 | "///\n" |
23527 | "/// \\param __m1\n" |
23528 | "/// A 64-bit integer vector of [4 x i16].\n" |
23529 | "/// \\param __m2\n" |
23530 | "/// A 64-bit integer vector of [4 x i16].\n" |
23531 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the sums of both\n" |
23532 | "/// parameters.\n" |
23533 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23534 | "_mm_add_pi16(__m64 __m1, __m64 __m2)\n" |
23535 | "{\n" |
23536 | " return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);\n" |
23537 | "}\n" |
23538 | "\n" |
23539 | "/// Adds each 32-bit integer element of the first 64-bit integer vector\n" |
23540 | "/// of [2 x i32] to the corresponding 32-bit integer element of the second\n" |
23541 | "/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are\n" |
23542 | "/// packed into a 64-bit integer vector of [2 x i32].\n" |
23543 | "///\n" |
23544 | "/// \\headerfile <x86intrin.h>\n" |
23545 | "///\n" |
23546 | "/// This intrinsic corresponds to the <c> PADDD </c> instruction.\n" |
23547 | "///\n" |
23548 | "/// \\param __m1\n" |
23549 | "/// A 64-bit integer vector of [2 x i32].\n" |
23550 | "/// \\param __m2\n" |
23551 | "/// A 64-bit integer vector of [2 x i32].\n" |
23552 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of both\n" |
23553 | "/// parameters.\n" |
23554 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23555 | "_mm_add_pi32(__m64 __m1, __m64 __m2)\n" |
23556 | "{\n" |
23557 | " return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);\n" |
23558 | "}\n" |
23559 | "\n" |
23560 | "/// Adds each 8-bit signed integer element of the first 64-bit integer\n" |
23561 | "/// vector of [8 x i8] to the corresponding 8-bit signed integer element of\n" |
23562 | "/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than\n" |
23563 | "/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to\n" |
23564 | "/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].\n" |
23565 | "///\n" |
23566 | "/// \\headerfile <x86intrin.h>\n" |
23567 | "///\n" |
23568 | "/// This intrinsic corresponds to the <c> PADDSB </c> instruction.\n" |
23569 | "///\n" |
23570 | "/// \\param __m1\n" |
23571 | "/// A 64-bit integer vector of [8 x i8].\n" |
23572 | "/// \\param __m2\n" |
23573 | "/// A 64-bit integer vector of [8 x i8].\n" |
23574 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated sums\n" |
23575 | "/// of both parameters.\n" |
23576 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23577 | "_mm_adds_pi8(__m64 __m1, __m64 __m2)\n" |
23578 | "{\n" |
23579 | " return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);\n" |
23580 | "}\n" |
23581 | "\n" |
23582 | "/// Adds each 16-bit signed integer element of the first 64-bit integer\n" |
23583 | "/// vector of [4 x i16] to the corresponding 16-bit signed integer element of\n" |
23584 | "/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than\n" |
23585 | "/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are\n" |
23586 | "/// saturated to 0x8000. The results are packed into a 64-bit integer vector\n" |
23587 | "/// of [4 x i16].\n" |
23588 | "///\n" |
23589 | "/// \\headerfile <x86intrin.h>\n" |
23590 | "///\n" |
23591 | "/// This intrinsic corresponds to the <c> PADDSW </c> instruction.\n" |
23592 | "///\n" |
23593 | "/// \\param __m1\n" |
23594 | "/// A 64-bit integer vector of [4 x i16].\n" |
23595 | "/// \\param __m2\n" |
23596 | "/// A 64-bit integer vector of [4 x i16].\n" |
23597 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated sums\n" |
23598 | "/// of both parameters.\n" |
23599 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23600 | "_mm_adds_pi16(__m64 __m1, __m64 __m2)\n" |
23601 | "{\n" |
23602 | " return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);\n" |
23603 | "}\n" |
23604 | "\n" |
23605 | "/// Adds each 8-bit unsigned integer element of the first 64-bit integer\n" |
23606 | "/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of\n" |
23607 | "/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are\n" |
23608 | "/// saturated to 0xFF. The results are packed into a 64-bit integer vector of\n" |
23609 | "/// [8 x i8].\n" |
23610 | "///\n" |
23611 | "/// \\headerfile <x86intrin.h>\n" |
23612 | "///\n" |
23613 | "/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.\n" |
23614 | "///\n" |
23615 | "/// \\param __m1\n" |
23616 | "/// A 64-bit integer vector of [8 x i8].\n" |
23617 | "/// \\param __m2\n" |
23618 | "/// A 64-bit integer vector of [8 x i8].\n" |
23619 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n" |
23620 | "/// unsigned sums of both parameters.\n" |
23621 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23622 | "_mm_adds_pu8(__m64 __m1, __m64 __m2)\n" |
23623 | "{\n" |
23624 | " return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);\n" |
23625 | "}\n" |
23626 | "\n" |
23627 | "/// Adds each 16-bit unsigned integer element of the first 64-bit integer\n" |
23628 | "/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element\n" |
23629 | "/// of the second 64-bit integer vector of [4 x i16]. Sums greater than\n" |
23630 | "/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit\n" |
23631 | "/// integer vector of [4 x i16].\n" |
23632 | "///\n" |
23633 | "/// \\headerfile <x86intrin.h>\n" |
23634 | "///\n" |
23635 | "/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.\n" |
23636 | "///\n" |
23637 | "/// \\param __m1\n" |
23638 | "/// A 64-bit integer vector of [4 x i16].\n" |
23639 | "/// \\param __m2\n" |
23640 | "/// A 64-bit integer vector of [4 x i16].\n" |
23641 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n" |
23642 | "/// unsigned sums of both parameters.\n" |
23643 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23644 | "_mm_adds_pu16(__m64 __m1, __m64 __m2)\n" |
23645 | "{\n" |
23646 | " return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);\n" |
23647 | "}\n" |
23648 | "\n" |
23649 | "/// Subtracts each 8-bit integer element of the second 64-bit integer\n" |
23650 | "/// vector of [8 x i8] from the corresponding 8-bit integer element of the\n" |
23651 | "/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results\n" |
23652 | "/// are packed into a 64-bit integer vector of [8 x i8].\n" |
23653 | "///\n" |
23654 | "/// \\headerfile <x86intrin.h>\n" |
23655 | "///\n" |
23656 | "/// This intrinsic corresponds to the <c> PSUBB </c> instruction.\n" |
23657 | "///\n" |
23658 | "/// \\param __m1\n" |
23659 | "/// A 64-bit integer vector of [8 x i8] containing the minuends.\n" |
23660 | "/// \\param __m2\n" |
23661 | "/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n" |
23662 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the differences of\n" |
23663 | "/// both parameters.\n" |
23664 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23665 | "_mm_sub_pi8(__m64 __m1, __m64 __m2)\n" |
23666 | "{\n" |
23667 | " return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);\n" |
23668 | "}\n" |
23669 | "\n" |
23670 | "/// Subtracts each 16-bit integer element of the second 64-bit integer\n" |
23671 | "/// vector of [4 x i16] from the corresponding 16-bit integer element of the\n" |
23672 | "/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the\n" |
23673 | "/// results are packed into a 64-bit integer vector of [4 x i16].\n" |
23674 | "///\n" |
23675 | "/// \\headerfile <x86intrin.h>\n" |
23676 | "///\n" |
23677 | "/// This intrinsic corresponds to the <c> PSUBW </c> instruction.\n" |
23678 | "///\n" |
23679 | "/// \\param __m1\n" |
23680 | "/// A 64-bit integer vector of [4 x i16] containing the minuends.\n" |
23681 | "/// \\param __m2\n" |
23682 | "/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n" |
23683 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the differences of\n" |
23684 | "/// both parameters.\n" |
23685 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23686 | "_mm_sub_pi16(__m64 __m1, __m64 __m2)\n" |
23687 | "{\n" |
23688 | " return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);\n" |
23689 | "}\n" |
23690 | "\n" |
23691 | "/// Subtracts each 32-bit integer element of the second 64-bit integer\n" |
23692 | "/// vector of [2 x i32] from the corresponding 32-bit integer element of the\n" |
23693 | "/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the\n" |
23694 | "/// results are packed into a 64-bit integer vector of [2 x i32].\n" |
23695 | "///\n" |
23696 | "/// \\headerfile <x86intrin.h>\n" |
23697 | "///\n" |
23698 | "/// This intrinsic corresponds to the <c> PSUBD </c> instruction.\n" |
23699 | "///\n" |
23700 | "/// \\param __m1\n" |
23701 | "/// A 64-bit integer vector of [2 x i32] containing the minuends.\n" |
23702 | "/// \\param __m2\n" |
23703 | "/// A 64-bit integer vector of [2 x i32] containing the subtrahends.\n" |
23704 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the differences of\n" |
23705 | "/// both parameters.\n" |
23706 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23707 | "_mm_sub_pi32(__m64 __m1, __m64 __m2)\n" |
23708 | "{\n" |
23709 | " return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);\n" |
23710 | "}\n" |
23711 | "\n" |
23712 | "/// Subtracts each 8-bit signed integer element of the second 64-bit\n" |
23713 | "/// integer vector of [8 x i8] from the corresponding 8-bit signed integer\n" |
23714 | "/// element of the first 64-bit integer vector of [8 x i8]. Positive results\n" |
23715 | "/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80\n" |
23716 | "/// are saturated to 0x80. The results are packed into a 64-bit integer\n" |
23717 | "/// vector of [8 x i8].\n" |
23718 | "///\n" |
23719 | "/// \\headerfile <x86intrin.h>\n" |
23720 | "///\n" |
23721 | "/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.\n" |
23722 | "///\n" |
23723 | "/// \\param __m1\n" |
23724 | "/// A 64-bit integer vector of [8 x i8] containing the minuends.\n" |
23725 | "/// \\param __m2\n" |
23726 | "/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n" |
23727 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n" |
23728 | "/// differences of both parameters.\n" |
23729 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23730 | "_mm_subs_pi8(__m64 __m1, __m64 __m2)\n" |
23731 | "{\n" |
23732 | " return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);\n" |
23733 | "}\n" |
23734 | "\n" |
23735 | "/// Subtracts each 16-bit signed integer element of the second 64-bit\n" |
23736 | "/// integer vector of [4 x i16] from the corresponding 16-bit signed integer\n" |
23737 | "/// element of the first 64-bit integer vector of [4 x i16]. Positive results\n" |
23738 | "/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than\n" |
23739 | "/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit\n" |
23740 | "/// integer vector of [4 x i16].\n" |
23741 | "///\n" |
23742 | "/// \\headerfile <x86intrin.h>\n" |
23743 | "///\n" |
23744 | "/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.\n" |
23745 | "///\n" |
23746 | "/// \\param __m1\n" |
23747 | "/// A 64-bit integer vector of [4 x i16] containing the minuends.\n" |
23748 | "/// \\param __m2\n" |
23749 | "/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n" |
23750 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n" |
23751 | "/// differences of both parameters.\n" |
23752 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23753 | "_mm_subs_pi16(__m64 __m1, __m64 __m2)\n" |
23754 | "{\n" |
23755 | " return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);\n" |
23756 | "}\n" |
23757 | "\n" |
23758 | "/// Subtracts each 8-bit unsigned integer element of the second 64-bit\n" |
23759 | "/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer\n" |
23760 | "/// element of the first 64-bit integer vector of [8 x i8].\n" |
23761 | "///\n" |
23762 | "/// If an element of the first vector is less than the corresponding element\n" |
23763 | "/// of the second vector, the result is saturated to 0. The results are\n" |
23764 | "/// packed into a 64-bit integer vector of [8 x i8].\n" |
23765 | "///\n" |
23766 | "/// \\headerfile <x86intrin.h>\n" |
23767 | "///\n" |
23768 | "/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.\n" |
23769 | "///\n" |
23770 | "/// \\param __m1\n" |
23771 | "/// A 64-bit integer vector of [8 x i8] containing the minuends.\n" |
23772 | "/// \\param __m2\n" |
23773 | "/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n" |
23774 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n" |
23775 | "/// differences of both parameters.\n" |
23776 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23777 | "_mm_subs_pu8(__m64 __m1, __m64 __m2)\n" |
23778 | "{\n" |
23779 | " return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);\n" |
23780 | "}\n" |
23781 | "\n" |
23782 | "/// Subtracts each 16-bit unsigned integer element of the second 64-bit\n" |
23783 | "/// integer vector of [4 x i16] from the corresponding 16-bit unsigned\n" |
23784 | "/// integer element of the first 64-bit integer vector of [4 x i16].\n" |
23785 | "///\n" |
23786 | "/// If an element of the first vector is less than the corresponding element\n" |
23787 | "/// of the second vector, the result is saturated to 0. The results are\n" |
23788 | "/// packed into a 64-bit integer vector of [4 x i16].\n" |
23789 | "///\n" |
23790 | "/// \\headerfile <x86intrin.h>\n" |
23791 | "///\n" |
23792 | "/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.\n" |
23793 | "///\n" |
23794 | "/// \\param __m1\n" |
23795 | "/// A 64-bit integer vector of [4 x i16] containing the minuends.\n" |
23796 | "/// \\param __m2\n" |
23797 | "/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n" |
23798 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n" |
23799 | "/// differences of both parameters.\n" |
23800 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23801 | "_mm_subs_pu16(__m64 __m1, __m64 __m2)\n" |
23802 | "{\n" |
23803 | " return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);\n" |
23804 | "}\n" |
23805 | "\n" |
23806 | "/// Multiplies each 16-bit signed integer element of the first 64-bit\n" |
23807 | "/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n" |
23808 | "/// element of the second 64-bit integer vector of [4 x i16] and get four\n" |
23809 | "/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.\n" |
23810 | "/// The lower 32 bits of these two sums are packed into a 64-bit integer\n" |
23811 | "/// vector of [2 x i32].\n" |
23812 | "///\n" |
23813 | "/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]\n" |
23814 | "/// of both parameters are multiplied, and the sum of both results is written\n" |
23815 | "/// to bits [31:0] of the result.\n" |
23816 | "///\n" |
23817 | "/// \\headerfile <x86intrin.h>\n" |
23818 | "///\n" |
23819 | "/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.\n" |
23820 | "///\n" |
23821 | "/// \\param __m1\n" |
23822 | "/// A 64-bit integer vector of [4 x i16].\n" |
23823 | "/// \\param __m2\n" |
23824 | "/// A 64-bit integer vector of [4 x i16].\n" |
23825 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of\n" |
23826 | "/// products of both parameters.\n" |
23827 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23828 | "_mm_madd_pi16(__m64 __m1, __m64 __m2)\n" |
23829 | "{\n" |
23830 | " return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);\n" |
23831 | "}\n" |
23832 | "\n" |
23833 | "/// Multiplies each 16-bit signed integer element of the first 64-bit\n" |
23834 | "/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n" |
23835 | "/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper\n" |
23836 | "/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n" |
23837 | "///\n" |
23838 | "/// \\headerfile <x86intrin.h>\n" |
23839 | "///\n" |
23840 | "/// This intrinsic corresponds to the <c> PMULHW </c> instruction.\n" |
23841 | "///\n" |
23842 | "/// \\param __m1\n" |
23843 | "/// A 64-bit integer vector of [4 x i16].\n" |
23844 | "/// \\param __m2\n" |
23845 | "/// A 64-bit integer vector of [4 x i16].\n" |
23846 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits\n" |
23847 | "/// of the products of both parameters.\n" |
23848 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23849 | "_mm_mulhi_pi16(__m64 __m1, __m64 __m2)\n" |
23850 | "{\n" |
23851 | " return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);\n" |
23852 | "}\n" |
23853 | "\n" |
23854 | "/// Multiplies each 16-bit signed integer element of the first 64-bit\n" |
23855 | "/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n" |
23856 | "/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower\n" |
23857 | "/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n" |
23858 | "///\n" |
23859 | "/// \\headerfile <x86intrin.h>\n" |
23860 | "///\n" |
23861 | "/// This intrinsic corresponds to the <c> PMULLW </c> instruction.\n" |
23862 | "///\n" |
23863 | "/// \\param __m1\n" |
23864 | "/// A 64-bit integer vector of [4 x i16].\n" |
23865 | "/// \\param __m2\n" |
23866 | "/// A 64-bit integer vector of [4 x i16].\n" |
23867 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits\n" |
23868 | "/// of the products of both parameters.\n" |
23869 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23870 | "_mm_mullo_pi16(__m64 __m1, __m64 __m2)\n" |
23871 | "{\n" |
23872 | " return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);\n" |
23873 | "}\n" |
23874 | "\n" |
23875 | "/// Left-shifts each 16-bit signed integer element of the first\n" |
23876 | "/// parameter, which is a 64-bit integer vector of [4 x i16], by the number\n" |
23877 | "/// of bits specified by the second parameter, which is a 64-bit integer. The\n" |
23878 | "/// lower 16 bits of the results are packed into a 64-bit integer vector of\n" |
23879 | "/// [4 x i16].\n" |
23880 | "///\n" |
23881 | "/// \\headerfile <x86intrin.h>\n" |
23882 | "///\n" |
23883 | "/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n" |
23884 | "///\n" |
23885 | "/// \\param __m\n" |
23886 | "/// A 64-bit integer vector of [4 x i16].\n" |
23887 | "/// \\param __count\n" |
23888 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
23889 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n" |
23890 | "/// values. If \\a __count is greater or equal to 16, the result is set to all\n" |
23891 | "/// 0.\n" |
23892 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23893 | "_mm_sll_pi16(__m64 __m, __m64 __count)\n" |
23894 | "{\n" |
23895 | " return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);\n" |
23896 | "}\n" |
23897 | "\n" |
23898 | "/// Left-shifts each 16-bit signed integer element of a 64-bit integer\n" |
23899 | "/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.\n" |
23900 | "/// The lower 16 bits of the results are packed into a 64-bit integer vector\n" |
23901 | "/// of [4 x i16].\n" |
23902 | "///\n" |
23903 | "/// \\headerfile <x86intrin.h>\n" |
23904 | "///\n" |
23905 | "/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n" |
23906 | "///\n" |
23907 | "/// \\param __m\n" |
23908 | "/// A 64-bit integer vector of [4 x i16].\n" |
23909 | "/// \\param __count\n" |
23910 | "/// A 32-bit integer value.\n" |
23911 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n" |
23912 | "/// values. If \\a __count is greater or equal to 16, the result is set to all\n" |
23913 | "/// 0.\n" |
23914 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23915 | "_mm_slli_pi16(__m64 __m, int __count)\n" |
23916 | "{\n" |
23917 | " return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);\n" |
23918 | "}\n" |
23919 | "\n" |
23920 | "/// Left-shifts each 32-bit signed integer element of the first\n" |
23921 | "/// parameter, which is a 64-bit integer vector of [2 x i32], by the number\n" |
23922 | "/// of bits specified by the second parameter, which is a 64-bit integer. The\n" |
23923 | "/// lower 32 bits of the results are packed into a 64-bit integer vector of\n" |
23924 | "/// [2 x i32].\n" |
23925 | "///\n" |
23926 | "/// \\headerfile <x86intrin.h>\n" |
23927 | "///\n" |
23928 | "/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n" |
23929 | "///\n" |
23930 | "/// \\param __m\n" |
23931 | "/// A 64-bit integer vector of [2 x i32].\n" |
23932 | "/// \\param __count\n" |
23933 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
23934 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n" |
23935 | "/// values. If \\a __count is greater or equal to 32, the result is set to all\n" |
23936 | "/// 0.\n" |
23937 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23938 | "_mm_sll_pi32(__m64 __m, __m64 __count)\n" |
23939 | "{\n" |
23940 | " return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);\n" |
23941 | "}\n" |
23942 | "\n" |
23943 | "/// Left-shifts each 32-bit signed integer element of a 64-bit integer\n" |
23944 | "/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.\n" |
23945 | "/// The lower 32 bits of the results are packed into a 64-bit integer vector\n" |
23946 | "/// of [2 x i32].\n" |
23947 | "///\n" |
23948 | "/// \\headerfile <x86intrin.h>\n" |
23949 | "///\n" |
23950 | "/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n" |
23951 | "///\n" |
23952 | "/// \\param __m\n" |
23953 | "/// A 64-bit integer vector of [2 x i32].\n" |
23954 | "/// \\param __count\n" |
23955 | "/// A 32-bit integer value.\n" |
23956 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n" |
23957 | "/// values. If \\a __count is greater or equal to 32, the result is set to all\n" |
23958 | "/// 0.\n" |
23959 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23960 | "_mm_slli_pi32(__m64 __m, int __count)\n" |
23961 | "{\n" |
23962 | " return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);\n" |
23963 | "}\n" |
23964 | "\n" |
23965 | "/// Left-shifts the first 64-bit integer parameter by the number of bits\n" |
23966 | "/// specified by the second 64-bit integer parameter. The lower 64 bits of\n" |
23967 | "/// result are returned.\n" |
23968 | "///\n" |
23969 | "/// \\headerfile <x86intrin.h>\n" |
23970 | "///\n" |
23971 | "/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n" |
23972 | "///\n" |
23973 | "/// \\param __m\n" |
23974 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
23975 | "/// \\param __count\n" |
23976 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
23977 | "/// \\returns A 64-bit integer vector containing the left-shifted value. If\n" |
23978 | "/// \\a __count is greater or equal to 64, the result is set to 0.\n" |
23979 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
23980 | "_mm_sll_si64(__m64 __m, __m64 __count)\n" |
23981 | "{\n" |
23982 | " return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);\n" |
23983 | "}\n" |
23984 | "\n" |
23985 | "/// Left-shifts the first parameter, which is a 64-bit integer, by the\n" |
23986 | "/// number of bits specified by the second parameter, which is a 32-bit\n" |
23987 | "/// integer. The lower 64 bits of result are returned.\n" |
23988 | "///\n" |
23989 | "/// \\headerfile <x86intrin.h>\n" |
23990 | "///\n" |
23991 | "/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n" |
23992 | "///\n" |
23993 | "/// \\param __m\n" |
23994 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
23995 | "/// \\param __count\n" |
23996 | "/// A 32-bit integer value.\n" |
23997 | "/// \\returns A 64-bit integer vector containing the left-shifted value. If\n" |
23998 | "/// \\a __count is greater or equal to 64, the result is set to 0.\n" |
23999 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24000 | "_mm_slli_si64(__m64 __m, int __count)\n" |
24001 | "{\n" |
24002 | " return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);\n" |
24003 | "}\n" |
24004 | "\n" |
24005 | "/// Right-shifts each 16-bit integer element of the first parameter,\n" |
24006 | "/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n" |
24007 | "/// specified by the second parameter, which is a 64-bit integer.\n" |
24008 | "///\n" |
24009 | "/// High-order bits are filled with the sign bit of the initial value of each\n" |
24010 | "/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n" |
24011 | "/// vector of [4 x i16].\n" |
24012 | "///\n" |
24013 | "/// \\headerfile <x86intrin.h>\n" |
24014 | "///\n" |
24015 | "/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n" |
24016 | "///\n" |
24017 | "/// \\param __m\n" |
24018 | "/// A 64-bit integer vector of [4 x i16].\n" |
24019 | "/// \\param __count\n" |
24020 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
24021 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n" |
24022 | "/// values.\n" |
24023 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24024 | "_mm_sra_pi16(__m64 __m, __m64 __count)\n" |
24025 | "{\n" |
24026 | " return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);\n" |
24027 | "}\n" |
24028 | "\n" |
24029 | "/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n" |
24030 | "/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n" |
24031 | "///\n" |
24032 | "/// High-order bits are filled with the sign bit of the initial value of each\n" |
24033 | "/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n" |
24034 | "/// vector of [4 x i16].\n" |
24035 | "///\n" |
24036 | "/// \\headerfile <x86intrin.h>\n" |
24037 | "///\n" |
24038 | "/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n" |
24039 | "///\n" |
24040 | "/// \\param __m\n" |
24041 | "/// A 64-bit integer vector of [4 x i16].\n" |
24042 | "/// \\param __count\n" |
24043 | "/// A 32-bit integer value.\n" |
24044 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n" |
24045 | "/// values.\n" |
24046 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24047 | "_mm_srai_pi16(__m64 __m, int __count)\n" |
24048 | "{\n" |
24049 | " return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);\n" |
24050 | "}\n" |
24051 | "\n" |
24052 | "/// Right-shifts each 32-bit integer element of the first parameter,\n" |
24053 | "/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n" |
24054 | "/// specified by the second parameter, which is a 64-bit integer.\n" |
24055 | "///\n" |
24056 | "/// High-order bits are filled with the sign bit of the initial value of each\n" |
24057 | "/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n" |
24058 | "/// vector of [2 x i32].\n" |
24059 | "///\n" |
24060 | "/// \\headerfile <x86intrin.h>\n" |
24061 | "///\n" |
24062 | "/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n" |
24063 | "///\n" |
24064 | "/// \\param __m\n" |
24065 | "/// A 64-bit integer vector of [2 x i32].\n" |
24066 | "/// \\param __count\n" |
24067 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
24068 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n" |
24069 | "/// values.\n" |
24070 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24071 | "_mm_sra_pi32(__m64 __m, __m64 __count)\n" |
24072 | "{\n" |
24073 | " return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);\n" |
24074 | "}\n" |
24075 | "\n" |
24076 | "/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n" |
24077 | "/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n" |
24078 | "///\n" |
24079 | "/// High-order bits are filled with the sign bit of the initial value of each\n" |
24080 | "/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n" |
24081 | "/// vector of [2 x i32].\n" |
24082 | "///\n" |
24083 | "/// \\headerfile <x86intrin.h>\n" |
24084 | "///\n" |
24085 | "/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n" |
24086 | "///\n" |
24087 | "/// \\param __m\n" |
24088 | "/// A 64-bit integer vector of [2 x i32].\n" |
24089 | "/// \\param __count\n" |
24090 | "/// A 32-bit integer value.\n" |
24091 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n" |
24092 | "/// values.\n" |
24093 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24094 | "_mm_srai_pi32(__m64 __m, int __count)\n" |
24095 | "{\n" |
24096 | " return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);\n" |
24097 | "}\n" |
24098 | "\n" |
24099 | "/// Right-shifts each 16-bit integer element of the first parameter,\n" |
24100 | "/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n" |
24101 | "/// specified by the second parameter, which is a 64-bit integer.\n" |
24102 | "///\n" |
24103 | "/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n" |
24104 | "/// integer vector of [4 x i16].\n" |
24105 | "///\n" |
24106 | "/// \\headerfile <x86intrin.h>\n" |
24107 | "///\n" |
24108 | "/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n" |
24109 | "///\n" |
24110 | "/// \\param __m\n" |
24111 | "/// A 64-bit integer vector of [4 x i16].\n" |
24112 | "/// \\param __count\n" |
24113 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
24114 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n" |
24115 | "/// values.\n" |
24116 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24117 | "_mm_srl_pi16(__m64 __m, __m64 __count)\n" |
24118 | "{\n" |
24119 | " return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);\n" |
24120 | "}\n" |
24121 | "\n" |
24122 | "/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n" |
24123 | "/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n" |
24124 | "///\n" |
24125 | "/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n" |
24126 | "/// integer vector of [4 x i16].\n" |
24127 | "///\n" |
24128 | "/// \\headerfile <x86intrin.h>\n" |
24129 | "///\n" |
24130 | "/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n" |
24131 | "///\n" |
24132 | "/// \\param __m\n" |
24133 | "/// A 64-bit integer vector of [4 x i16].\n" |
24134 | "/// \\param __count\n" |
24135 | "/// A 32-bit integer value.\n" |
24136 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n" |
24137 | "/// values.\n" |
24138 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24139 | "_mm_srli_pi16(__m64 __m, int __count)\n" |
24140 | "{\n" |
24141 | " return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);\n" |
24142 | "}\n" |
24143 | "\n" |
24144 | "/// Right-shifts each 32-bit integer element of the first parameter,\n" |
24145 | "/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n" |
24146 | "/// specified by the second parameter, which is a 64-bit integer.\n" |
24147 | "///\n" |
24148 | "/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n" |
24149 | "/// integer vector of [2 x i32].\n" |
24150 | "///\n" |
24151 | "/// \\headerfile <x86intrin.h>\n" |
24152 | "///\n" |
24153 | "/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n" |
24154 | "///\n" |
24155 | "/// \\param __m\n" |
24156 | "/// A 64-bit integer vector of [2 x i32].\n" |
24157 | "/// \\param __count\n" |
24158 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
24159 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n" |
24160 | "/// values.\n" |
24161 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24162 | "_mm_srl_pi32(__m64 __m, __m64 __count)\n" |
24163 | "{\n" |
24164 | " return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);\n" |
24165 | "}\n" |
24166 | "\n" |
24167 | "/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n" |
24168 | "/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n" |
24169 | "///\n" |
24170 | "/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n" |
24171 | "/// integer vector of [2 x i32].\n" |
24172 | "///\n" |
24173 | "/// \\headerfile <x86intrin.h>\n" |
24174 | "///\n" |
24175 | "/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n" |
24176 | "///\n" |
24177 | "/// \\param __m\n" |
24178 | "/// A 64-bit integer vector of [2 x i32].\n" |
24179 | "/// \\param __count\n" |
24180 | "/// A 32-bit integer value.\n" |
24181 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n" |
24182 | "/// values.\n" |
24183 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24184 | "_mm_srli_pi32(__m64 __m, int __count)\n" |
24185 | "{\n" |
24186 | " return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);\n" |
24187 | "}\n" |
24188 | "\n" |
24189 | "/// Right-shifts the first 64-bit integer parameter by the number of bits\n" |
24190 | "/// specified by the second 64-bit integer parameter.\n" |
24191 | "///\n" |
24192 | "/// High-order bits are cleared.\n" |
24193 | "///\n" |
24194 | "/// \\headerfile <x86intrin.h>\n" |
24195 | "///\n" |
24196 | "/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n" |
24197 | "///\n" |
24198 | "/// \\param __m\n" |
24199 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
24200 | "/// \\param __count\n" |
24201 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
24202 | "/// \\returns A 64-bit integer vector containing the right-shifted value.\n" |
24203 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24204 | "_mm_srl_si64(__m64 __m, __m64 __count)\n" |
24205 | "{\n" |
24206 | " return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);\n" |
24207 | "}\n" |
24208 | "\n" |
24209 | "/// Right-shifts the first parameter, which is a 64-bit integer, by the\n" |
24210 | "/// number of bits specified by the second parameter, which is a 32-bit\n" |
24211 | "/// integer.\n" |
24212 | "///\n" |
24213 | "/// High-order bits are cleared.\n" |
24214 | "///\n" |
24215 | "/// \\headerfile <x86intrin.h>\n" |
24216 | "///\n" |
24217 | "/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n" |
24218 | "///\n" |
24219 | "/// \\param __m\n" |
24220 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
24221 | "/// \\param __count\n" |
24222 | "/// A 32-bit integer value.\n" |
24223 | "/// \\returns A 64-bit integer vector containing the right-shifted value.\n" |
24224 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24225 | "_mm_srli_si64(__m64 __m, int __count)\n" |
24226 | "{\n" |
24227 | " return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);\n" |
24228 | "}\n" |
24229 | "\n" |
24230 | "/// Performs a bitwise AND of two 64-bit integer vectors.\n" |
24231 | "///\n" |
24232 | "/// \\headerfile <x86intrin.h>\n" |
24233 | "///\n" |
24234 | "/// This intrinsic corresponds to the <c> PAND </c> instruction.\n" |
24235 | "///\n" |
24236 | "/// \\param __m1\n" |
24237 | "/// A 64-bit integer vector.\n" |
24238 | "/// \\param __m2\n" |
24239 | "/// A 64-bit integer vector.\n" |
24240 | "/// \\returns A 64-bit integer vector containing the bitwise AND of both\n" |
24241 | "/// parameters.\n" |
24242 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24243 | "_mm_and_si64(__m64 __m1, __m64 __m2)\n" |
24244 | "{\n" |
24245 | " return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);\n" |
24246 | "}\n" |
24247 | "\n" |
24248 | "/// Performs a bitwise NOT of the first 64-bit integer vector, and then\n" |
24249 | "/// performs a bitwise AND of the intermediate result and the second 64-bit\n" |
24250 | "/// integer vector.\n" |
24251 | "///\n" |
24252 | "/// \\headerfile <x86intrin.h>\n" |
24253 | "///\n" |
24254 | "/// This intrinsic corresponds to the <c> PANDN </c> instruction.\n" |
24255 | "///\n" |
24256 | "/// \\param __m1\n" |
24257 | "/// A 64-bit integer vector. The one's complement of this parameter is used\n" |
24258 | "/// in the bitwise AND.\n" |
24259 | "/// \\param __m2\n" |
24260 | "/// A 64-bit integer vector.\n" |
24261 | "/// \\returns A 64-bit integer vector containing the bitwise AND of the second\n" |
24262 | "/// parameter and the one's complement of the first parameter.\n" |
24263 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24264 | "_mm_andnot_si64(__m64 __m1, __m64 __m2)\n" |
24265 | "{\n" |
24266 | " return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);\n" |
24267 | "}\n" |
24268 | "\n" |
24269 | "/// Performs a bitwise OR of two 64-bit integer vectors.\n" |
24270 | "///\n" |
24271 | "/// \\headerfile <x86intrin.h>\n" |
24272 | "///\n" |
24273 | "/// This intrinsic corresponds to the <c> POR </c> instruction.\n" |
24274 | "///\n" |
24275 | "/// \\param __m1\n" |
24276 | "/// A 64-bit integer vector.\n" |
24277 | "/// \\param __m2\n" |
24278 | "/// A 64-bit integer vector.\n" |
24279 | "/// \\returns A 64-bit integer vector containing the bitwise OR of both\n" |
24280 | "/// parameters.\n" |
24281 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24282 | "_mm_or_si64(__m64 __m1, __m64 __m2)\n" |
24283 | "{\n" |
24284 | " return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);\n" |
24285 | "}\n" |
24286 | "\n" |
24287 | "/// Performs a bitwise exclusive OR of two 64-bit integer vectors.\n" |
24288 | "///\n" |
24289 | "/// \\headerfile <x86intrin.h>\n" |
24290 | "///\n" |
24291 | "/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n" |
24292 | "///\n" |
24293 | "/// \\param __m1\n" |
24294 | "/// A 64-bit integer vector.\n" |
24295 | "/// \\param __m2\n" |
24296 | "/// A 64-bit integer vector.\n" |
24297 | "/// \\returns A 64-bit integer vector containing the bitwise exclusive OR of both\n" |
24298 | "/// parameters.\n" |
24299 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24300 | "_mm_xor_si64(__m64 __m1, __m64 __m2)\n" |
24301 | "{\n" |
24302 | " return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);\n" |
24303 | "}\n" |
24304 | "\n" |
24305 | "/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n" |
24306 | "/// [8 x i8] to determine if the element of the first vector is equal to the\n" |
24307 | "/// corresponding element of the second vector.\n" |
24308 | "///\n" |
24309 | "/// The comparison yields 0 for false, 0xFF for true.\n" |
24310 | "///\n" |
24311 | "/// \\headerfile <x86intrin.h>\n" |
24312 | "///\n" |
24313 | "/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.\n" |
24314 | "///\n" |
24315 | "/// \\param __m1\n" |
24316 | "/// A 64-bit integer vector of [8 x i8].\n" |
24317 | "/// \\param __m2\n" |
24318 | "/// A 64-bit integer vector of [8 x i8].\n" |
24319 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n" |
24320 | "/// results.\n" |
24321 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24322 | "_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)\n" |
24323 | "{\n" |
24324 | " return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);\n" |
24325 | "}\n" |
24326 | "\n" |
24327 | "/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n" |
24328 | "/// [4 x i16] to determine if the element of the first vector is equal to the\n" |
24329 | "/// corresponding element of the second vector.\n" |
24330 | "///\n" |
24331 | "/// The comparison yields 0 for false, 0xFFFF for true.\n" |
24332 | "///\n" |
24333 | "/// \\headerfile <x86intrin.h>\n" |
24334 | "///\n" |
24335 | "/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.\n" |
24336 | "///\n" |
24337 | "/// \\param __m1\n" |
24338 | "/// A 64-bit integer vector of [4 x i16].\n" |
24339 | "/// \\param __m2\n" |
24340 | "/// A 64-bit integer vector of [4 x i16].\n" |
24341 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n" |
24342 | "/// results.\n" |
24343 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24344 | "_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)\n" |
24345 | "{\n" |
24346 | " return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);\n" |
24347 | "}\n" |
24348 | "\n" |
24349 | "/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n" |
24350 | "/// [2 x i32] to determine if the element of the first vector is equal to the\n" |
24351 | "/// corresponding element of the second vector.\n" |
24352 | "///\n" |
24353 | "/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n" |
24354 | "///\n" |
24355 | "/// \\headerfile <x86intrin.h>\n" |
24356 | "///\n" |
24357 | "/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.\n" |
24358 | "///\n" |
24359 | "/// \\param __m1\n" |
24360 | "/// A 64-bit integer vector of [2 x i32].\n" |
24361 | "/// \\param __m2\n" |
24362 | "/// A 64-bit integer vector of [2 x i32].\n" |
24363 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n" |
24364 | "/// results.\n" |
24365 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24366 | "_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)\n" |
24367 | "{\n" |
24368 | " return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);\n" |
24369 | "}\n" |
24370 | "\n" |
24371 | "/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n" |
24372 | "/// [8 x i8] to determine if the element of the first vector is greater than\n" |
24373 | "/// the corresponding element of the second vector.\n" |
24374 | "///\n" |
24375 | "/// The comparison yields 0 for false, 0xFF for true.\n" |
24376 | "///\n" |
24377 | "/// \\headerfile <x86intrin.h>\n" |
24378 | "///\n" |
24379 | "/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.\n" |
24380 | "///\n" |
24381 | "/// \\param __m1\n" |
24382 | "/// A 64-bit integer vector of [8 x i8].\n" |
24383 | "/// \\param __m2\n" |
24384 | "/// A 64-bit integer vector of [8 x i8].\n" |
24385 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n" |
24386 | "/// results.\n" |
24387 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24388 | "_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)\n" |
24389 | "{\n" |
24390 | " return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);\n" |
24391 | "}\n" |
24392 | "\n" |
24393 | "/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n" |
24394 | "/// [4 x i16] to determine if the element of the first vector is greater than\n" |
24395 | "/// the corresponding element of the second vector.\n" |
24396 | "///\n" |
24397 | "/// The comparison yields 0 for false, 0xFFFF for true.\n" |
24398 | "///\n" |
24399 | "/// \\headerfile <x86intrin.h>\n" |
24400 | "///\n" |
24401 | "/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.\n" |
24402 | "///\n" |
24403 | "/// \\param __m1\n" |
24404 | "/// A 64-bit integer vector of [4 x i16].\n" |
24405 | "/// \\param __m2\n" |
24406 | "/// A 64-bit integer vector of [4 x i16].\n" |
24407 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n" |
24408 | "/// results.\n" |
24409 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24410 | "_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)\n" |
24411 | "{\n" |
24412 | " return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);\n" |
24413 | "}\n" |
24414 | "\n" |
24415 | "/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n" |
24416 | "/// [2 x i32] to determine if the element of the first vector is greater than\n" |
24417 | "/// the corresponding element of the second vector.\n" |
24418 | "///\n" |
24419 | "/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n" |
24420 | "///\n" |
24421 | "/// \\headerfile <x86intrin.h>\n" |
24422 | "///\n" |
24423 | "/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.\n" |
24424 | "///\n" |
24425 | "/// \\param __m1\n" |
24426 | "/// A 64-bit integer vector of [2 x i32].\n" |
24427 | "/// \\param __m2\n" |
24428 | "/// A 64-bit integer vector of [2 x i32].\n" |
24429 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n" |
24430 | "/// results.\n" |
24431 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24432 | "_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)\n" |
24433 | "{\n" |
24434 | " return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);\n" |
24435 | "}\n" |
24436 | "\n" |
24437 | "/// Constructs a 64-bit integer vector initialized to zero.\n" |
24438 | "///\n" |
24439 | "/// \\headerfile <x86intrin.h>\n" |
24440 | "///\n" |
24441 | "/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n" |
24442 | "///\n" |
24443 | "/// \\returns An initialized 64-bit integer vector with all elements set to zero.\n" |
24444 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24445 | "_mm_setzero_si64(void)\n" |
24446 | "{\n" |
24447 | " return __extension__ (__m64){ 0LL };\n" |
24448 | "}\n" |
24449 | "\n" |
24450 | "/// Constructs a 64-bit integer vector initialized with the specified\n" |
24451 | "/// 32-bit integer values.\n" |
24452 | "///\n" |
24453 | "/// \\headerfile <x86intrin.h>\n" |
24454 | "///\n" |
24455 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
24456 | "/// instruction.\n" |
24457 | "///\n" |
24458 | "/// \\param __i1\n" |
24459 | "/// A 32-bit integer value used to initialize the upper 32 bits of the\n" |
24460 | "/// result.\n" |
24461 | "/// \\param __i0\n" |
24462 | "/// A 32-bit integer value used to initialize the lower 32 bits of the\n" |
24463 | "/// result.\n" |
24464 | "/// \\returns An initialized 64-bit integer vector.\n" |
24465 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24466 | "_mm_set_pi32(int __i1, int __i0)\n" |
24467 | "{\n" |
24468 | " return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);\n" |
24469 | "}\n" |
24470 | "\n" |
24471 | "/// Constructs a 64-bit integer vector initialized with the specified\n" |
24472 | "/// 16-bit integer values.\n" |
24473 | "///\n" |
24474 | "/// \\headerfile <x86intrin.h>\n" |
24475 | "///\n" |
24476 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
24477 | "/// instruction.\n" |
24478 | "///\n" |
24479 | "/// \\param __s3\n" |
24480 | "/// A 16-bit integer value used to initialize bits [63:48] of the result.\n" |
24481 | "/// \\param __s2\n" |
24482 | "/// A 16-bit integer value used to initialize bits [47:32] of the result.\n" |
24483 | "/// \\param __s1\n" |
24484 | "/// A 16-bit integer value used to initialize bits [31:16] of the result.\n" |
24485 | "/// \\param __s0\n" |
24486 | "/// A 16-bit integer value used to initialize bits [15:0] of the result.\n" |
24487 | "/// \\returns An initialized 64-bit integer vector.\n" |
24488 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24489 | "_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)\n" |
24490 | "{\n" |
24491 | " return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);\n" |
24492 | "}\n" |
24493 | "\n" |
24494 | "/// Constructs a 64-bit integer vector initialized with the specified\n" |
24495 | "/// 8-bit integer values.\n" |
24496 | "///\n" |
24497 | "/// \\headerfile <x86intrin.h>\n" |
24498 | "///\n" |
24499 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
24500 | "/// instruction.\n" |
24501 | "///\n" |
24502 | "/// \\param __b7\n" |
24503 | "/// An 8-bit integer value used to initialize bits [63:56] of the result.\n" |
24504 | "/// \\param __b6\n" |
24505 | "/// An 8-bit integer value used to initialize bits [55:48] of the result.\n" |
24506 | "/// \\param __b5\n" |
24507 | "/// An 8-bit integer value used to initialize bits [47:40] of the result.\n" |
24508 | "/// \\param __b4\n" |
24509 | "/// An 8-bit integer value used to initialize bits [39:32] of the result.\n" |
24510 | "/// \\param __b3\n" |
24511 | "/// An 8-bit integer value used to initialize bits [31:24] of the result.\n" |
24512 | "/// \\param __b2\n" |
24513 | "/// An 8-bit integer value used to initialize bits [23:16] of the result.\n" |
24514 | "/// \\param __b1\n" |
24515 | "/// An 8-bit integer value used to initialize bits [15:8] of the result.\n" |
24516 | "/// \\param __b0\n" |
24517 | "/// An 8-bit integer value used to initialize bits [7:0] of the result.\n" |
24518 | "/// \\returns An initialized 64-bit integer vector.\n" |
24519 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24520 | "_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,\n" |
24521 | " char __b1, char __b0)\n" |
24522 | "{\n" |
24523 | " return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,\n" |
24524 | " __b4, __b5, __b6, __b7);\n" |
24525 | "}\n" |
24526 | "\n" |
24527 | "/// Constructs a 64-bit integer vector of [2 x i32], with each of the\n" |
24528 | "/// 32-bit integer vector elements set to the specified 32-bit integer\n" |
24529 | "/// value.\n" |
24530 | "///\n" |
24531 | "/// \\headerfile <x86intrin.h>\n" |
24532 | "///\n" |
24533 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
24534 | "/// instruction.\n" |
24535 | "///\n" |
24536 | "/// \\param __i\n" |
24537 | "/// A 32-bit integer value used to initialize each vector element of the\n" |
24538 | "/// result.\n" |
24539 | "/// \\returns An initialized 64-bit integer vector of [2 x i32].\n" |
24540 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24541 | "_mm_set1_pi32(int __i)\n" |
24542 | "{\n" |
24543 | " return _mm_set_pi32(__i, __i);\n" |
24544 | "}\n" |
24545 | "\n" |
24546 | "/// Constructs a 64-bit integer vector of [4 x i16], with each of the\n" |
24547 | "/// 16-bit integer vector elements set to the specified 16-bit integer\n" |
24548 | "/// value.\n" |
24549 | "///\n" |
24550 | "/// \\headerfile <x86intrin.h>\n" |
24551 | "///\n" |
24552 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
24553 | "/// instruction.\n" |
24554 | "///\n" |
24555 | "/// \\param __w\n" |
24556 | "/// A 16-bit integer value used to initialize each vector element of the\n" |
24557 | "/// result.\n" |
24558 | "/// \\returns An initialized 64-bit integer vector of [4 x i16].\n" |
24559 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24560 | "_mm_set1_pi16(short __w)\n" |
24561 | "{\n" |
24562 | " return _mm_set_pi16(__w, __w, __w, __w);\n" |
24563 | "}\n" |
24564 | "\n" |
24565 | "/// Constructs a 64-bit integer vector of [8 x i8], with each of the\n" |
24566 | "/// 8-bit integer vector elements set to the specified 8-bit integer value.\n" |
24567 | "///\n" |
24568 | "/// \\headerfile <x86intrin.h>\n" |
24569 | "///\n" |
24570 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
24571 | "/// instruction.\n" |
24572 | "///\n" |
24573 | "/// \\param __b\n" |
24574 | "/// An 8-bit integer value used to initialize each vector element of the\n" |
24575 | "/// result.\n" |
24576 | "/// \\returns An initialized 64-bit integer vector of [8 x i8].\n" |
24577 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24578 | "_mm_set1_pi8(char __b)\n" |
24579 | "{\n" |
24580 | " return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);\n" |
24581 | "}\n" |
24582 | "\n" |
24583 | "/// Constructs a 64-bit integer vector, initialized in reverse order with\n" |
24584 | "/// the specified 32-bit integer values.\n" |
24585 | "///\n" |
24586 | "/// \\headerfile <x86intrin.h>\n" |
24587 | "///\n" |
24588 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
24589 | "/// instruction.\n" |
24590 | "///\n" |
24591 | "/// \\param __i0\n" |
24592 | "/// A 32-bit integer value used to initialize the lower 32 bits of the\n" |
24593 | "/// result.\n" |
24594 | "/// \\param __i1\n" |
24595 | "/// A 32-bit integer value used to initialize the upper 32 bits of the\n" |
24596 | "/// result.\n" |
24597 | "/// \\returns An initialized 64-bit integer vector.\n" |
24598 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24599 | "_mm_setr_pi32(int __i0, int __i1)\n" |
24600 | "{\n" |
24601 | " return _mm_set_pi32(__i1, __i0);\n" |
24602 | "}\n" |
24603 | "\n" |
24604 | "/// Constructs a 64-bit integer vector, initialized in reverse order with\n" |
24605 | "/// the specified 16-bit integer values.\n" |
24606 | "///\n" |
24607 | "/// \\headerfile <x86intrin.h>\n" |
24608 | "///\n" |
24609 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
24610 | "/// instruction.\n" |
24611 | "///\n" |
24612 | "/// \\param __w0\n" |
24613 | "/// A 16-bit integer value used to initialize bits [15:0] of the result.\n" |
24614 | "/// \\param __w1\n" |
24615 | "/// A 16-bit integer value used to initialize bits [31:16] of the result.\n" |
24616 | "/// \\param __w2\n" |
24617 | "/// A 16-bit integer value used to initialize bits [47:32] of the result.\n" |
24618 | "/// \\param __w3\n" |
24619 | "/// A 16-bit integer value used to initialize bits [63:48] of the result.\n" |
24620 | "/// \\returns An initialized 64-bit integer vector.\n" |
24621 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24622 | "_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)\n" |
24623 | "{\n" |
24624 | " return _mm_set_pi16(__w3, __w2, __w1, __w0);\n" |
24625 | "}\n" |
24626 | "\n" |
24627 | "/// Constructs a 64-bit integer vector, initialized in reverse order with\n" |
24628 | "/// the specified 8-bit integer values.\n" |
24629 | "///\n" |
24630 | "/// \\headerfile <x86intrin.h>\n" |
24631 | "///\n" |
24632 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
24633 | "/// instruction.\n" |
24634 | "///\n" |
24635 | "/// \\param __b0\n" |
24636 | "/// An 8-bit integer value used to initialize bits [7:0] of the result.\n" |
24637 | "/// \\param __b1\n" |
24638 | "/// An 8-bit integer value used to initialize bits [15:8] of the result.\n" |
24639 | "/// \\param __b2\n" |
24640 | "/// An 8-bit integer value used to initialize bits [23:16] of the result.\n" |
24641 | "/// \\param __b3\n" |
24642 | "/// An 8-bit integer value used to initialize bits [31:24] of the result.\n" |
24643 | "/// \\param __b4\n" |
24644 | "/// An 8-bit integer value used to initialize bits [39:32] of the result.\n" |
24645 | "/// \\param __b5\n" |
24646 | "/// An 8-bit integer value used to initialize bits [47:40] of the result.\n" |
24647 | "/// \\param __b6\n" |
24648 | "/// An 8-bit integer value used to initialize bits [55:48] of the result.\n" |
24649 | "/// \\param __b7\n" |
24650 | "/// An 8-bit integer value used to initialize bits [63:56] of the result.\n" |
24651 | "/// \\returns An initialized 64-bit integer vector.\n" |
24652 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
24653 | "_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,\n" |
24654 | " char __b6, char __b7)\n" |
24655 | "{\n" |
24656 | " return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n" |
24657 | "}\n" |
24658 | "\n" |
24659 | "#undef __DEFAULT_FN_ATTRS\n" |
24660 | "\n" |
24661 | "/* Aliases for compatibility. */\n" |
24662 | "#define _m_empty _mm_empty\n" |
24663 | "#define _m_from_int _mm_cvtsi32_si64\n" |
24664 | "#define _m_from_int64 _mm_cvtsi64_m64\n" |
24665 | "#define _m_to_int _mm_cvtsi64_si32\n" |
24666 | "#define _m_to_int64 _mm_cvtm64_si64\n" |
24667 | "#define _m_packsswb _mm_packs_pi16\n" |
24668 | "#define _m_packssdw _mm_packs_pi32\n" |
24669 | "#define _m_packuswb _mm_packs_pu16\n" |
24670 | "#define _m_punpckhbw _mm_unpackhi_pi8\n" |
24671 | "#define _m_punpckhwd _mm_unpackhi_pi16\n" |
24672 | "#define _m_punpckhdq _mm_unpackhi_pi32\n" |
24673 | "#define _m_punpcklbw _mm_unpacklo_pi8\n" |
24674 | "#define _m_punpcklwd _mm_unpacklo_pi16\n" |
24675 | "#define _m_punpckldq _mm_unpacklo_pi32\n" |
24676 | "#define _m_paddb _mm_add_pi8\n" |
24677 | "#define _m_paddw _mm_add_pi16\n" |
24678 | "#define _m_paddd _mm_add_pi32\n" |
24679 | "#define _m_paddsb _mm_adds_pi8\n" |
24680 | "#define _m_paddsw _mm_adds_pi16\n" |
24681 | "#define _m_paddusb _mm_adds_pu8\n" |
24682 | "#define _m_paddusw _mm_adds_pu16\n" |
24683 | "#define _m_psubb _mm_sub_pi8\n" |
24684 | "#define _m_psubw _mm_sub_pi16\n" |
24685 | "#define _m_psubd _mm_sub_pi32\n" |
24686 | "#define _m_psubsb _mm_subs_pi8\n" |
24687 | "#define _m_psubsw _mm_subs_pi16\n" |
24688 | "#define _m_psubusb _mm_subs_pu8\n" |
24689 | "#define _m_psubusw _mm_subs_pu16\n" |
24690 | "#define _m_pmaddwd _mm_madd_pi16\n" |
24691 | "#define _m_pmulhw _mm_mulhi_pi16\n" |
24692 | "#define _m_pmullw _mm_mullo_pi16\n" |
24693 | "#define _m_psllw _mm_sll_pi16\n" |
24694 | "#define _m_psllwi _mm_slli_pi16\n" |
24695 | "#define _m_pslld _mm_sll_pi32\n" |
24696 | "#define _m_pslldi _mm_slli_pi32\n" |
24697 | "#define _m_psllq _mm_sll_si64\n" |
24698 | "#define _m_psllqi _mm_slli_si64\n" |
24699 | "#define _m_psraw _mm_sra_pi16\n" |
24700 | "#define _m_psrawi _mm_srai_pi16\n" |
24701 | "#define _m_psrad _mm_sra_pi32\n" |
24702 | "#define _m_psradi _mm_srai_pi32\n" |
24703 | "#define _m_psrlw _mm_srl_pi16\n" |
24704 | "#define _m_psrlwi _mm_srli_pi16\n" |
24705 | "#define _m_psrld _mm_srl_pi32\n" |
24706 | "#define _m_psrldi _mm_srli_pi32\n" |
24707 | "#define _m_psrlq _mm_srl_si64\n" |
24708 | "#define _m_psrlqi _mm_srli_si64\n" |
24709 | "#define _m_pand _mm_and_si64\n" |
24710 | "#define _m_pandn _mm_andnot_si64\n" |
24711 | "#define _m_por _mm_or_si64\n" |
24712 | "#define _m_pxor _mm_xor_si64\n" |
24713 | "#define _m_pcmpeqb _mm_cmpeq_pi8\n" |
24714 | "#define _m_pcmpeqw _mm_cmpeq_pi16\n" |
24715 | "#define _m_pcmpeqd _mm_cmpeq_pi32\n" |
24716 | "#define _m_pcmpgtb _mm_cmpgt_pi8\n" |
24717 | "#define _m_pcmpgtw _mm_cmpgt_pi16\n" |
24718 | "#define _m_pcmpgtd _mm_cmpgt_pi32\n" |
24719 | "\n" |
24720 | "#endif /* __MMINTRIN_H */\n" |
24721 | "\n" |
24722 | "" } , |
24723 | { "/builtins/movdirintrin.h" , "/*===------------------------- movdirintrin.h ------------------------------===\n" |
24724 | " *\n" |
24725 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
24726 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
24727 | " * in the Software without restriction, including without limitation the rights\n" |
24728 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
24729 | " * copies of the Software, and to permit persons to whom the Software is\n" |
24730 | " * furnished to do so, subject to the following conditions:\n" |
24731 | " *\n" |
24732 | " * The above copyright notice and this permission notice shall be included in\n" |
24733 | " * all copies or substantial portions of the Software.\n" |
24734 | " *\n" |
24735 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
24736 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
24737 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
24738 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
24739 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
24740 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
24741 | " * THE SOFTWARE.\n" |
24742 | " *\n" |
24743 | " *===-----------------------------------------------------------------------===\n" |
24744 | " */\n" |
24745 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
24746 | "#error \"Never use <movdirintrin.h> directly; include <x86intrin.h> instead.\"\n" |
24747 | "#endif\n" |
24748 | "\n" |
24749 | "#ifndef _MOVDIRINTRIN_H\n" |
24750 | "#define _MOVDIRINTRIN_H\n" |
24751 | "\n" |
24752 | "/* Move doubleword as direct store */\n" |
24753 | "static __inline__ void\n" |
24754 | "__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n" |
24755 | "_directstoreu_u32 (void *__dst, unsigned int __value)\n" |
24756 | "{\n" |
24757 | " __builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value);\n" |
24758 | "}\n" |
24759 | "\n" |
24760 | "#ifdef __x86_64__\n" |
24761 | "\n" |
24762 | "/* Move quadword as direct store */\n" |
24763 | "static __inline__ void\n" |
24764 | "__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n" |
24765 | "_directstoreu_u64 (void *__dst, unsigned long __value)\n" |
24766 | "{\n" |
24767 | " __builtin_ia32_directstore_u64((unsigned long *)__dst, __value);\n" |
24768 | "}\n" |
24769 | "\n" |
24770 | "#endif /* __x86_64__ */\n" |
24771 | "\n" |
24772 | "/*\n" |
24773 | " * movdir64b - Move 64 bytes as direct store.\n" |
24774 | " * The destination must be 64 byte aligned, and the store is atomic.\n" |
24775 | " * The source address has no alignment requirement, and the load from\n" |
24776 | " * the source address is not atomic.\n" |
24777 | " */\n" |
24778 | "static __inline__ void\n" |
24779 | "__attribute__((__always_inline__, __nodebug__, __target__(\"movdir64b\")))\n" |
24780 | "_movdir64b (void *__dst __attribute__((align_value(64))), const void *__src)\n" |
24781 | "{\n" |
24782 | " __builtin_ia32_movdir64b(__dst, __src);\n" |
24783 | "}\n" |
24784 | "\n" |
24785 | "#endif /* _MOVDIRINTRIN_H */\n" |
24786 | "" } , |
24787 | { "/builtins/msa.h" , "/*===---- msa.h - MIPS MSA intrinsics --------------------------------------===\n" |
24788 | " *\n" |
24789 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
24790 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
24791 | " * in the Software without restriction, including without limitation the rights\n" |
24792 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
24793 | " * copies of the Software, and to permit persons to whom the Software is\n" |
24794 | " * furnished to do so, subject to the following conditions:\n" |
24795 | " *\n" |
24796 | " * The above copyright notice and this permission notice shall be included in\n" |
24797 | " * all copies or substantial portions of the Software.\n" |
24798 | " *\n" |
24799 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
24800 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
24801 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
24802 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
24803 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
24804 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
24805 | " * THE SOFTWARE.\n" |
24806 | " *\n" |
24807 | " *===-----------------------------------------------------------------------===\n" |
24808 | " */\n" |
24809 | "\n" |
24810 | "#ifndef _MSA_H\n" |
24811 | "#define _MSA_H 1\n" |
24812 | "\n" |
24813 | "#if defined(__mips_msa)\n" |
24814 | "typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));\n" |
24815 | "typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1)));\n" |
24816 | "typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16)));\n" |
24817 | "typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1)));\n" |
24818 | "typedef short v8i16 __attribute__((vector_size(16), aligned(16)));\n" |
24819 | "typedef short v8i16_h __attribute__((vector_size(16), aligned(2)));\n" |
24820 | "typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16)));\n" |
24821 | "typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2)));\n" |
24822 | "typedef int v4i32 __attribute__((vector_size(16), aligned(16)));\n" |
24823 | "typedef int v4i32_w __attribute__((vector_size(16), aligned(4)));\n" |
24824 | "typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16)));\n" |
24825 | "typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4)));\n" |
24826 | "typedef long long v2i64 __attribute__((vector_size(16), aligned(16)));\n" |
24827 | "typedef long long v2i64_d __attribute__((vector_size(16), aligned(8)));\n" |
24828 | "typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16)));\n" |
24829 | "typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8)));\n" |
24830 | "typedef float v4f32 __attribute__((vector_size(16), aligned(16)));\n" |
24831 | "typedef float v4f32_w __attribute__((vector_size(16), aligned(4)));\n" |
24832 | "typedef double v2f64 __attribute__ ((vector_size(16), aligned(16)));\n" |
24833 | "typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8)));\n" |
24834 | "\n" |
24835 | "#define __msa_sll_b __builtin_msa_sll_b\n" |
24836 | "#define __msa_sll_h __builtin_msa_sll_h\n" |
24837 | "#define __msa_sll_w __builtin_msa_sll_w\n" |
24838 | "#define __msa_sll_d __builtin_msa_sll_d\n" |
24839 | "#define __msa_slli_b __builtin_msa_slli_b\n" |
24840 | "#define __msa_slli_h __builtin_msa_slli_h\n" |
24841 | "#define __msa_slli_w __builtin_msa_slli_w\n" |
24842 | "#define __msa_slli_d __builtin_msa_slli_d\n" |
24843 | "#define __msa_sra_b __builtin_msa_sra_b\n" |
24844 | "#define __msa_sra_h __builtin_msa_sra_h\n" |
24845 | "#define __msa_sra_w __builtin_msa_sra_w\n" |
24846 | "#define __msa_sra_d __builtin_msa_sra_d\n" |
24847 | "#define __msa_srai_b __builtin_msa_srai_b\n" |
24848 | "#define __msa_srai_h __builtin_msa_srai_h\n" |
24849 | "#define __msa_srai_w __builtin_msa_srai_w\n" |
24850 | "#define __msa_srai_d __builtin_msa_srai_d\n" |
24851 | "#define __msa_srar_b __builtin_msa_srar_b\n" |
24852 | "#define __msa_srar_h __builtin_msa_srar_h\n" |
24853 | "#define __msa_srar_w __builtin_msa_srar_w\n" |
24854 | "#define __msa_srar_d __builtin_msa_srar_d\n" |
24855 | "#define __msa_srari_b __builtin_msa_srari_b\n" |
24856 | "#define __msa_srari_h __builtin_msa_srari_h\n" |
24857 | "#define __msa_srari_w __builtin_msa_srari_w\n" |
24858 | "#define __msa_srari_d __builtin_msa_srari_d\n" |
24859 | "#define __msa_srl_b __builtin_msa_srl_b\n" |
24860 | "#define __msa_srl_h __builtin_msa_srl_h\n" |
24861 | "#define __msa_srl_w __builtin_msa_srl_w\n" |
24862 | "#define __msa_srl_d __builtin_msa_srl_d\n" |
24863 | "#define __msa_srli_b __builtin_msa_srli_b\n" |
24864 | "#define __msa_srli_h __builtin_msa_srli_h\n" |
24865 | "#define __msa_srli_w __builtin_msa_srli_w\n" |
24866 | "#define __msa_srli_d __builtin_msa_srli_d\n" |
24867 | "#define __msa_srlr_b __builtin_msa_srlr_b\n" |
24868 | "#define __msa_srlr_h __builtin_msa_srlr_h\n" |
24869 | "#define __msa_srlr_w __builtin_msa_srlr_w\n" |
24870 | "#define __msa_srlr_d __builtin_msa_srlr_d\n" |
24871 | "#define __msa_srlri_b __builtin_msa_srlri_b\n" |
24872 | "#define __msa_srlri_h __builtin_msa_srlri_h\n" |
24873 | "#define __msa_srlri_w __builtin_msa_srlri_w\n" |
24874 | "#define __msa_srlri_d __builtin_msa_srlri_d\n" |
24875 | "#define __msa_bclr_b __builtin_msa_bclr_b\n" |
24876 | "#define __msa_bclr_h __builtin_msa_bclr_h\n" |
24877 | "#define __msa_bclr_w __builtin_msa_bclr_w\n" |
24878 | "#define __msa_bclr_d __builtin_msa_bclr_d\n" |
24879 | "#define __msa_bclri_b __builtin_msa_bclri_b\n" |
24880 | "#define __msa_bclri_h __builtin_msa_bclri_h\n" |
24881 | "#define __msa_bclri_w __builtin_msa_bclri_w\n" |
24882 | "#define __msa_bclri_d __builtin_msa_bclri_d\n" |
24883 | "#define __msa_bset_b __builtin_msa_bset_b\n" |
24884 | "#define __msa_bset_h __builtin_msa_bset_h\n" |
24885 | "#define __msa_bset_w __builtin_msa_bset_w\n" |
24886 | "#define __msa_bset_d __builtin_msa_bset_d\n" |
24887 | "#define __msa_bseti_b __builtin_msa_bseti_b\n" |
24888 | "#define __msa_bseti_h __builtin_msa_bseti_h\n" |
24889 | "#define __msa_bseti_w __builtin_msa_bseti_w\n" |
24890 | "#define __msa_bseti_d __builtin_msa_bseti_d\n" |
24891 | "#define __msa_bneg_b __builtin_msa_bneg_b\n" |
24892 | "#define __msa_bneg_h __builtin_msa_bneg_h\n" |
24893 | "#define __msa_bneg_w __builtin_msa_bneg_w\n" |
24894 | "#define __msa_bneg_d __builtin_msa_bneg_d\n" |
24895 | "#define __msa_bnegi_b __builtin_msa_bnegi_b\n" |
24896 | "#define __msa_bnegi_h __builtin_msa_bnegi_h\n" |
24897 | "#define __msa_bnegi_w __builtin_msa_bnegi_w\n" |
24898 | "#define __msa_bnegi_d __builtin_msa_bnegi_d\n" |
24899 | "#define __msa_binsl_b __builtin_msa_binsl_b\n" |
24900 | "#define __msa_binsl_h __builtin_msa_binsl_h\n" |
24901 | "#define __msa_binsl_w __builtin_msa_binsl_w\n" |
24902 | "#define __msa_binsl_d __builtin_msa_binsl_d\n" |
24903 | "#define __msa_binsli_b __builtin_msa_binsli_b\n" |
24904 | "#define __msa_binsli_h __builtin_msa_binsli_h\n" |
24905 | "#define __msa_binsli_w __builtin_msa_binsli_w\n" |
24906 | "#define __msa_binsli_d __builtin_msa_binsli_d\n" |
24907 | "#define __msa_binsr_b __builtin_msa_binsr_b\n" |
24908 | "#define __msa_binsr_h __builtin_msa_binsr_h\n" |
24909 | "#define __msa_binsr_w __builtin_msa_binsr_w\n" |
24910 | "#define __msa_binsr_d __builtin_msa_binsr_d\n" |
24911 | "#define __msa_binsri_b __builtin_msa_binsri_b\n" |
24912 | "#define __msa_binsri_h __builtin_msa_binsri_h\n" |
24913 | "#define __msa_binsri_w __builtin_msa_binsri_w\n" |
24914 | "#define __msa_binsri_d __builtin_msa_binsri_d\n" |
24915 | "#define __msa_addv_b __builtin_msa_addv_b\n" |
24916 | "#define __msa_addv_h __builtin_msa_addv_h\n" |
24917 | "#define __msa_addv_w __builtin_msa_addv_w\n" |
24918 | "#define __msa_addv_d __builtin_msa_addv_d\n" |
24919 | "#define __msa_addvi_b __builtin_msa_addvi_b\n" |
24920 | "#define __msa_addvi_h __builtin_msa_addvi_h\n" |
24921 | "#define __msa_addvi_w __builtin_msa_addvi_w\n" |
24922 | "#define __msa_addvi_d __builtin_msa_addvi_d\n" |
24923 | "#define __msa_subv_b __builtin_msa_subv_b\n" |
24924 | "#define __msa_subv_h __builtin_msa_subv_h\n" |
24925 | "#define __msa_subv_w __builtin_msa_subv_w\n" |
24926 | "#define __msa_subv_d __builtin_msa_subv_d\n" |
24927 | "#define __msa_subvi_b __builtin_msa_subvi_b\n" |
24928 | "#define __msa_subvi_h __builtin_msa_subvi_h\n" |
24929 | "#define __msa_subvi_w __builtin_msa_subvi_w\n" |
24930 | "#define __msa_subvi_d __builtin_msa_subvi_d\n" |
24931 | "#define __msa_max_s_b __builtin_msa_max_s_b\n" |
24932 | "#define __msa_max_s_h __builtin_msa_max_s_h\n" |
24933 | "#define __msa_max_s_w __builtin_msa_max_s_w\n" |
24934 | "#define __msa_max_s_d __builtin_msa_max_s_d\n" |
24935 | "#define __msa_maxi_s_b __builtin_msa_maxi_s_b\n" |
24936 | "#define __msa_maxi_s_h __builtin_msa_maxi_s_h\n" |
24937 | "#define __msa_maxi_s_w __builtin_msa_maxi_s_w\n" |
24938 | "#define __msa_maxi_s_d __builtin_msa_maxi_s_d\n" |
24939 | "#define __msa_max_u_b __builtin_msa_max_u_b\n" |
24940 | "#define __msa_max_u_h __builtin_msa_max_u_h\n" |
24941 | "#define __msa_max_u_w __builtin_msa_max_u_w\n" |
24942 | "#define __msa_max_u_d __builtin_msa_max_u_d\n" |
24943 | "#define __msa_maxi_u_b __builtin_msa_maxi_u_b\n" |
24944 | "#define __msa_maxi_u_h __builtin_msa_maxi_u_h\n" |
24945 | "#define __msa_maxi_u_w __builtin_msa_maxi_u_w\n" |
24946 | "#define __msa_maxi_u_d __builtin_msa_maxi_u_d\n" |
24947 | "#define __msa_min_s_b __builtin_msa_min_s_b\n" |
24948 | "#define __msa_min_s_h __builtin_msa_min_s_h\n" |
24949 | "#define __msa_min_s_w __builtin_msa_min_s_w\n" |
24950 | "#define __msa_min_s_d __builtin_msa_min_s_d\n" |
24951 | "#define __msa_mini_s_b __builtin_msa_mini_s_b\n" |
24952 | "#define __msa_mini_s_h __builtin_msa_mini_s_h\n" |
24953 | "#define __msa_mini_s_w __builtin_msa_mini_s_w\n" |
24954 | "#define __msa_mini_s_d __builtin_msa_mini_s_d\n" |
24955 | "#define __msa_min_u_b __builtin_msa_min_u_b\n" |
24956 | "#define __msa_min_u_h __builtin_msa_min_u_h\n" |
24957 | "#define __msa_min_u_w __builtin_msa_min_u_w\n" |
24958 | "#define __msa_min_u_d __builtin_msa_min_u_d\n" |
24959 | "#define __msa_mini_u_b __builtin_msa_mini_u_b\n" |
24960 | "#define __msa_mini_u_h __builtin_msa_mini_u_h\n" |
24961 | "#define __msa_mini_u_w __builtin_msa_mini_u_w\n" |
24962 | "#define __msa_mini_u_d __builtin_msa_mini_u_d\n" |
24963 | "#define __msa_max_a_b __builtin_msa_max_a_b\n" |
24964 | "#define __msa_max_a_h __builtin_msa_max_a_h\n" |
24965 | "#define __msa_max_a_w __builtin_msa_max_a_w\n" |
24966 | "#define __msa_max_a_d __builtin_msa_max_a_d\n" |
24967 | "#define __msa_min_a_b __builtin_msa_min_a_b\n" |
24968 | "#define __msa_min_a_h __builtin_msa_min_a_h\n" |
24969 | "#define __msa_min_a_w __builtin_msa_min_a_w\n" |
24970 | "#define __msa_min_a_d __builtin_msa_min_a_d\n" |
24971 | "#define __msa_ceq_b __builtin_msa_ceq_b\n" |
24972 | "#define __msa_ceq_h __builtin_msa_ceq_h\n" |
24973 | "#define __msa_ceq_w __builtin_msa_ceq_w\n" |
24974 | "#define __msa_ceq_d __builtin_msa_ceq_d\n" |
24975 | "#define __msa_ceqi_b __builtin_msa_ceqi_b\n" |
24976 | "#define __msa_ceqi_h __builtin_msa_ceqi_h\n" |
24977 | "#define __msa_ceqi_w __builtin_msa_ceqi_w\n" |
24978 | "#define __msa_ceqi_d __builtin_msa_ceqi_d\n" |
24979 | "#define __msa_clt_s_b __builtin_msa_clt_s_b\n" |
24980 | "#define __msa_clt_s_h __builtin_msa_clt_s_h\n" |
24981 | "#define __msa_clt_s_w __builtin_msa_clt_s_w\n" |
24982 | "#define __msa_clt_s_d __builtin_msa_clt_s_d\n" |
24983 | "#define __msa_clti_s_b __builtin_msa_clti_s_b\n" |
24984 | "#define __msa_clti_s_h __builtin_msa_clti_s_h\n" |
24985 | "#define __msa_clti_s_w __builtin_msa_clti_s_w\n" |
24986 | "#define __msa_clti_s_d __builtin_msa_clti_s_d\n" |
24987 | "#define __msa_clt_u_b __builtin_msa_clt_u_b\n" |
24988 | "#define __msa_clt_u_h __builtin_msa_clt_u_h\n" |
24989 | "#define __msa_clt_u_w __builtin_msa_clt_u_w\n" |
24990 | "#define __msa_clt_u_d __builtin_msa_clt_u_d\n" |
24991 | "#define __msa_clti_u_b __builtin_msa_clti_u_b\n" |
24992 | "#define __msa_clti_u_h __builtin_msa_clti_u_h\n" |
24993 | "#define __msa_clti_u_w __builtin_msa_clti_u_w\n" |
24994 | "#define __msa_clti_u_d __builtin_msa_clti_u_d\n" |
24995 | "#define __msa_cle_s_b __builtin_msa_cle_s_b\n" |
24996 | "#define __msa_cle_s_h __builtin_msa_cle_s_h\n" |
24997 | "#define __msa_cle_s_w __builtin_msa_cle_s_w\n" |
24998 | "#define __msa_cle_s_d __builtin_msa_cle_s_d\n" |
24999 | "#define __msa_clei_s_b __builtin_msa_clei_s_b\n" |
25000 | "#define __msa_clei_s_h __builtin_msa_clei_s_h\n" |
25001 | "#define __msa_clei_s_w __builtin_msa_clei_s_w\n" |
25002 | "#define __msa_clei_s_d __builtin_msa_clei_s_d\n" |
25003 | "#define __msa_cle_u_b __builtin_msa_cle_u_b\n" |
25004 | "#define __msa_cle_u_h __builtin_msa_cle_u_h\n" |
25005 | "#define __msa_cle_u_w __builtin_msa_cle_u_w\n" |
25006 | "#define __msa_cle_u_d __builtin_msa_cle_u_d\n" |
25007 | "#define __msa_clei_u_b __builtin_msa_clei_u_b\n" |
25008 | "#define __msa_clei_u_h __builtin_msa_clei_u_h\n" |
25009 | "#define __msa_clei_u_w __builtin_msa_clei_u_w\n" |
25010 | "#define __msa_clei_u_d __builtin_msa_clei_u_d\n" |
25011 | "#define __msa_ld_b __builtin_msa_ld_b\n" |
25012 | "#define __msa_ld_h __builtin_msa_ld_h\n" |
25013 | "#define __msa_ld_w __builtin_msa_ld_w\n" |
25014 | "#define __msa_ld_d __builtin_msa_ld_d\n" |
25015 | "#define __msa_st_b __builtin_msa_st_b\n" |
25016 | "#define __msa_st_h __builtin_msa_st_h\n" |
25017 | "#define __msa_st_w __builtin_msa_st_w\n" |
25018 | "#define __msa_st_d __builtin_msa_st_d\n" |
25019 | "#define __msa_sat_s_b __builtin_msa_sat_s_b\n" |
25020 | "#define __msa_sat_s_h __builtin_msa_sat_s_h\n" |
25021 | "#define __msa_sat_s_w __builtin_msa_sat_s_w\n" |
25022 | "#define __msa_sat_s_d __builtin_msa_sat_s_d\n" |
25023 | "#define __msa_sat_u_b __builtin_msa_sat_u_b\n" |
25024 | "#define __msa_sat_u_h __builtin_msa_sat_u_h\n" |
25025 | "#define __msa_sat_u_w __builtin_msa_sat_u_w\n" |
25026 | "#define __msa_sat_u_d __builtin_msa_sat_u_d\n" |
25027 | "#define __msa_add_a_b __builtin_msa_add_a_b\n" |
25028 | "#define __msa_add_a_h __builtin_msa_add_a_h\n" |
25029 | "#define __msa_add_a_w __builtin_msa_add_a_w\n" |
25030 | "#define __msa_add_a_d __builtin_msa_add_a_d\n" |
25031 | "#define __msa_adds_a_b __builtin_msa_adds_a_b\n" |
25032 | "#define __msa_adds_a_h __builtin_msa_adds_a_h\n" |
25033 | "#define __msa_adds_a_w __builtin_msa_adds_a_w\n" |
25034 | "#define __msa_adds_a_d __builtin_msa_adds_a_d\n" |
25035 | "#define __msa_adds_s_b __builtin_msa_adds_s_b\n" |
25036 | "#define __msa_adds_s_h __builtin_msa_adds_s_h\n" |
25037 | "#define __msa_adds_s_w __builtin_msa_adds_s_w\n" |
25038 | "#define __msa_adds_s_d __builtin_msa_adds_s_d\n" |
25039 | "#define __msa_adds_u_b __builtin_msa_adds_u_b\n" |
25040 | "#define __msa_adds_u_h __builtin_msa_adds_u_h\n" |
25041 | "#define __msa_adds_u_w __builtin_msa_adds_u_w\n" |
25042 | "#define __msa_adds_u_d __builtin_msa_adds_u_d\n" |
25043 | "#define __msa_ave_s_b __builtin_msa_ave_s_b\n" |
25044 | "#define __msa_ave_s_h __builtin_msa_ave_s_h\n" |
25045 | "#define __msa_ave_s_w __builtin_msa_ave_s_w\n" |
25046 | "#define __msa_ave_s_d __builtin_msa_ave_s_d\n" |
25047 | "#define __msa_ave_u_b __builtin_msa_ave_u_b\n" |
25048 | "#define __msa_ave_u_h __builtin_msa_ave_u_h\n" |
25049 | "#define __msa_ave_u_w __builtin_msa_ave_u_w\n" |
25050 | "#define __msa_ave_u_d __builtin_msa_ave_u_d\n" |
25051 | "#define __msa_aver_s_b __builtin_msa_aver_s_b\n" |
25052 | "#define __msa_aver_s_h __builtin_msa_aver_s_h\n" |
25053 | "#define __msa_aver_s_w __builtin_msa_aver_s_w\n" |
25054 | "#define __msa_aver_s_d __builtin_msa_aver_s_d\n" |
25055 | "#define __msa_aver_u_b __builtin_msa_aver_u_b\n" |
25056 | "#define __msa_aver_u_h __builtin_msa_aver_u_h\n" |
25057 | "#define __msa_aver_u_w __builtin_msa_aver_u_w\n" |
25058 | "#define __msa_aver_u_d __builtin_msa_aver_u_d\n" |
25059 | "#define __msa_subs_s_b __builtin_msa_subs_s_b\n" |
25060 | "#define __msa_subs_s_h __builtin_msa_subs_s_h\n" |
25061 | "#define __msa_subs_s_w __builtin_msa_subs_s_w\n" |
25062 | "#define __msa_subs_s_d __builtin_msa_subs_s_d\n" |
25063 | "#define __msa_subs_u_b __builtin_msa_subs_u_b\n" |
25064 | "#define __msa_subs_u_h __builtin_msa_subs_u_h\n" |
25065 | "#define __msa_subs_u_w __builtin_msa_subs_u_w\n" |
25066 | "#define __msa_subs_u_d __builtin_msa_subs_u_d\n" |
25067 | "#define __msa_subsuu_s_b __builtin_msa_subsuu_s_b\n" |
25068 | "#define __msa_subsuu_s_h __builtin_msa_subsuu_s_h\n" |
25069 | "#define __msa_subsuu_s_w __builtin_msa_subsuu_s_w\n" |
25070 | "#define __msa_subsuu_s_d __builtin_msa_subsuu_s_d\n" |
25071 | "#define __msa_subsus_u_b __builtin_msa_subsus_u_b\n" |
25072 | "#define __msa_subsus_u_h __builtin_msa_subsus_u_h\n" |
25073 | "#define __msa_subsus_u_w __builtin_msa_subsus_u_w\n" |
25074 | "#define __msa_subsus_u_d __builtin_msa_subsus_u_d\n" |
25075 | "#define __msa_asub_s_b __builtin_msa_asub_s_b\n" |
25076 | "#define __msa_asub_s_h __builtin_msa_asub_s_h\n" |
25077 | "#define __msa_asub_s_w __builtin_msa_asub_s_w\n" |
25078 | "#define __msa_asub_s_d __builtin_msa_asub_s_d\n" |
25079 | "#define __msa_asub_u_b __builtin_msa_asub_u_b\n" |
25080 | "#define __msa_asub_u_h __builtin_msa_asub_u_h\n" |
25081 | "#define __msa_asub_u_w __builtin_msa_asub_u_w\n" |
25082 | "#define __msa_asub_u_d __builtin_msa_asub_u_d\n" |
25083 | "#define __msa_mulv_b __builtin_msa_mulv_b\n" |
25084 | "#define __msa_mulv_h __builtin_msa_mulv_h\n" |
25085 | "#define __msa_mulv_w __builtin_msa_mulv_w\n" |
25086 | "#define __msa_mulv_d __builtin_msa_mulv_d\n" |
25087 | "#define __msa_maddv_b __builtin_msa_maddv_b\n" |
25088 | "#define __msa_maddv_h __builtin_msa_maddv_h\n" |
25089 | "#define __msa_maddv_w __builtin_msa_maddv_w\n" |
25090 | "#define __msa_maddv_d __builtin_msa_maddv_d\n" |
25091 | "#define __msa_msubv_b __builtin_msa_msubv_b\n" |
25092 | "#define __msa_msubv_h __builtin_msa_msubv_h\n" |
25093 | "#define __msa_msubv_w __builtin_msa_msubv_w\n" |
25094 | "#define __msa_msubv_d __builtin_msa_msubv_d\n" |
25095 | "#define __msa_div_s_b __builtin_msa_div_s_b\n" |
25096 | "#define __msa_div_s_h __builtin_msa_div_s_h\n" |
25097 | "#define __msa_div_s_w __builtin_msa_div_s_w\n" |
25098 | "#define __msa_div_s_d __builtin_msa_div_s_d\n" |
25099 | "#define __msa_div_u_b __builtin_msa_div_u_b\n" |
25100 | "#define __msa_div_u_h __builtin_msa_div_u_h\n" |
25101 | "#define __msa_div_u_w __builtin_msa_div_u_w\n" |
25102 | "#define __msa_div_u_d __builtin_msa_div_u_d\n" |
25103 | "#define __msa_hadd_s_h __builtin_msa_hadd_s_h\n" |
25104 | "#define __msa_hadd_s_w __builtin_msa_hadd_s_w\n" |
25105 | "#define __msa_hadd_s_d __builtin_msa_hadd_s_d\n" |
25106 | "#define __msa_hadd_u_h __builtin_msa_hadd_u_h\n" |
25107 | "#define __msa_hadd_u_w __builtin_msa_hadd_u_w\n" |
25108 | "#define __msa_hadd_u_d __builtin_msa_hadd_u_d\n" |
25109 | "#define __msa_hsub_s_h __builtin_msa_hsub_s_h\n" |
25110 | "#define __msa_hsub_s_w __builtin_msa_hsub_s_w\n" |
25111 | "#define __msa_hsub_s_d __builtin_msa_hsub_s_d\n" |
25112 | "#define __msa_hsub_u_h __builtin_msa_hsub_u_h\n" |
25113 | "#define __msa_hsub_u_w __builtin_msa_hsub_u_w\n" |
25114 | "#define __msa_hsub_u_d __builtin_msa_hsub_u_d\n" |
25115 | "#define __msa_mod_s_b __builtin_msa_mod_s_b\n" |
25116 | "#define __msa_mod_s_h __builtin_msa_mod_s_h\n" |
25117 | "#define __msa_mod_s_w __builtin_msa_mod_s_w\n" |
25118 | "#define __msa_mod_s_d __builtin_msa_mod_s_d\n" |
25119 | "#define __msa_mod_u_b __builtin_msa_mod_u_b\n" |
25120 | "#define __msa_mod_u_h __builtin_msa_mod_u_h\n" |
25121 | "#define __msa_mod_u_w __builtin_msa_mod_u_w\n" |
25122 | "#define __msa_mod_u_d __builtin_msa_mod_u_d\n" |
25123 | "#define __msa_dotp_s_h __builtin_msa_dotp_s_h\n" |
25124 | "#define __msa_dotp_s_w __builtin_msa_dotp_s_w\n" |
25125 | "#define __msa_dotp_s_d __builtin_msa_dotp_s_d\n" |
25126 | "#define __msa_dotp_u_h __builtin_msa_dotp_u_h\n" |
25127 | "#define __msa_dotp_u_w __builtin_msa_dotp_u_w\n" |
25128 | "#define __msa_dotp_u_d __builtin_msa_dotp_u_d\n" |
25129 | "#define __msa_dpadd_s_h __builtin_msa_dpadd_s_h\n" |
25130 | "#define __msa_dpadd_s_w __builtin_msa_dpadd_s_w\n" |
25131 | "#define __msa_dpadd_s_d __builtin_msa_dpadd_s_d\n" |
25132 | "#define __msa_dpadd_u_h __builtin_msa_dpadd_u_h\n" |
25133 | "#define __msa_dpadd_u_w __builtin_msa_dpadd_u_w\n" |
25134 | "#define __msa_dpadd_u_d __builtin_msa_dpadd_u_d\n" |
25135 | "#define __msa_dpsub_s_h __builtin_msa_dpsub_s_h\n" |
25136 | "#define __msa_dpsub_s_w __builtin_msa_dpsub_s_w\n" |
25137 | "#define __msa_dpsub_s_d __builtin_msa_dpsub_s_d\n" |
25138 | "#define __msa_dpsub_u_h __builtin_msa_dpsub_u_h\n" |
25139 | "#define __msa_dpsub_u_w __builtin_msa_dpsub_u_w\n" |
25140 | "#define __msa_dpsub_u_d __builtin_msa_dpsub_u_d\n" |
25141 | "#define __msa_sld_b __builtin_msa_sld_b\n" |
25142 | "#define __msa_sld_h __builtin_msa_sld_h\n" |
25143 | "#define __msa_sld_w __builtin_msa_sld_w\n" |
25144 | "#define __msa_sld_d __builtin_msa_sld_d\n" |
25145 | "#define __msa_sldi_b __builtin_msa_sldi_b\n" |
25146 | "#define __msa_sldi_h __builtin_msa_sldi_h\n" |
25147 | "#define __msa_sldi_w __builtin_msa_sldi_w\n" |
25148 | "#define __msa_sldi_d __builtin_msa_sldi_d\n" |
25149 | "#define __msa_splat_b __builtin_msa_splat_b\n" |
25150 | "#define __msa_splat_h __builtin_msa_splat_h\n" |
25151 | "#define __msa_splat_w __builtin_msa_splat_w\n" |
25152 | "#define __msa_splat_d __builtin_msa_splat_d\n" |
25153 | "#define __msa_splati_b __builtin_msa_splati_b\n" |
25154 | "#define __msa_splati_h __builtin_msa_splati_h\n" |
25155 | "#define __msa_splati_w __builtin_msa_splati_w\n" |
25156 | "#define __msa_splati_d __builtin_msa_splati_d\n" |
25157 | "#define __msa_pckev_b __builtin_msa_pckev_b\n" |
25158 | "#define __msa_pckev_h __builtin_msa_pckev_h\n" |
25159 | "#define __msa_pckev_w __builtin_msa_pckev_w\n" |
25160 | "#define __msa_pckev_d __builtin_msa_pckev_d\n" |
25161 | "#define __msa_pckod_b __builtin_msa_pckod_b\n" |
25162 | "#define __msa_pckod_h __builtin_msa_pckod_h\n" |
25163 | "#define __msa_pckod_w __builtin_msa_pckod_w\n" |
25164 | "#define __msa_pckod_d __builtin_msa_pckod_d\n" |
25165 | "#define __msa_ilvl_b __builtin_msa_ilvl_b\n" |
25166 | "#define __msa_ilvl_h __builtin_msa_ilvl_h\n" |
25167 | "#define __msa_ilvl_w __builtin_msa_ilvl_w\n" |
25168 | "#define __msa_ilvl_d __builtin_msa_ilvl_d\n" |
25169 | "#define __msa_ilvr_b __builtin_msa_ilvr_b\n" |
25170 | "#define __msa_ilvr_h __builtin_msa_ilvr_h\n" |
25171 | "#define __msa_ilvr_w __builtin_msa_ilvr_w\n" |
25172 | "#define __msa_ilvr_d __builtin_msa_ilvr_d\n" |
25173 | "#define __msa_ilvev_b __builtin_msa_ilvev_b\n" |
25174 | "#define __msa_ilvev_h __builtin_msa_ilvev_h\n" |
25175 | "#define __msa_ilvev_w __builtin_msa_ilvev_w\n" |
25176 | "#define __msa_ilvev_d __builtin_msa_ilvev_d\n" |
25177 | "#define __msa_ilvod_b __builtin_msa_ilvod_b\n" |
25178 | "#define __msa_ilvod_h __builtin_msa_ilvod_h\n" |
25179 | "#define __msa_ilvod_w __builtin_msa_ilvod_w\n" |
25180 | "#define __msa_ilvod_d __builtin_msa_ilvod_d\n" |
25181 | "#define __msa_vshf_b __builtin_msa_vshf_b\n" |
25182 | "#define __msa_vshf_h __builtin_msa_vshf_h\n" |
25183 | "#define __msa_vshf_w __builtin_msa_vshf_w\n" |
25184 | "#define __msa_vshf_d __builtin_msa_vshf_d\n" |
25185 | "#define __msa_and_v __builtin_msa_and_v\n" |
25186 | "#define __msa_andi_b __builtin_msa_andi_b\n" |
25187 | "#define __msa_or_v __builtin_msa_or_v\n" |
25188 | "#define __msa_ori_b __builtin_msa_ori_b\n" |
25189 | "#define __msa_nor_v __builtin_msa_nor_v\n" |
25190 | "#define __msa_nori_b __builtin_msa_nori_b\n" |
25191 | "#define __msa_xor_v __builtin_msa_xor_v\n" |
25192 | "#define __msa_xori_b __builtin_msa_xori_b\n" |
25193 | "#define __msa_bmnz_v __builtin_msa_bmnz_v\n" |
25194 | "#define __msa_bmnzi_b __builtin_msa_bmnzi_b\n" |
25195 | "#define __msa_bmz_v __builtin_msa_bmz_v\n" |
25196 | "#define __msa_bmzi_b __builtin_msa_bmzi_b\n" |
25197 | "#define __msa_bsel_v __builtin_msa_bsel_v\n" |
25198 | "#define __msa_bseli_b __builtin_msa_bseli_b\n" |
25199 | "#define __msa_shf_b __builtin_msa_shf_b\n" |
25200 | "#define __msa_shf_h __builtin_msa_shf_h\n" |
25201 | "#define __msa_shf_w __builtin_msa_shf_w\n" |
25202 | "#define __msa_test_bnz_v __builtin_msa_bnz_v\n" |
25203 | "#define __msa_test_bz_v __builtin_msa_bz_v\n" |
25204 | "#define __msa_fill_b __builtin_msa_fill_b\n" |
25205 | "#define __msa_fill_h __builtin_msa_fill_h\n" |
25206 | "#define __msa_fill_w __builtin_msa_fill_w\n" |
25207 | "#define __msa_fill_d __builtin_msa_fill_d\n" |
25208 | "#define __msa_pcnt_b __builtin_msa_pcnt_b\n" |
25209 | "#define __msa_pcnt_h __builtin_msa_pcnt_h\n" |
25210 | "#define __msa_pcnt_w __builtin_msa_pcnt_w\n" |
25211 | "#define __msa_pcnt_d __builtin_msa_pcnt_d\n" |
25212 | "#define __msa_nloc_b __builtin_msa_nloc_b\n" |
25213 | "#define __msa_nloc_h __builtin_msa_nloc_h\n" |
25214 | "#define __msa_nloc_w __builtin_msa_nloc_w\n" |
25215 | "#define __msa_nloc_d __builtin_msa_nloc_d\n" |
25216 | "#define __msa_nlzc_b __builtin_msa_nlzc_b\n" |
25217 | "#define __msa_nlzc_h __builtin_msa_nlzc_h\n" |
25218 | "#define __msa_nlzc_w __builtin_msa_nlzc_w\n" |
25219 | "#define __msa_nlzc_d __builtin_msa_nlzc_d\n" |
25220 | "#define __msa_copy_s_b __builtin_msa_copy_s_b\n" |
25221 | "#define __msa_copy_s_h __builtin_msa_copy_s_h\n" |
25222 | "#define __msa_copy_s_w __builtin_msa_copy_s_w\n" |
25223 | "#define __msa_copy_s_d __builtin_msa_copy_s_d\n" |
25224 | "#define __msa_copy_u_b __builtin_msa_copy_u_b\n" |
25225 | "#define __msa_copy_u_h __builtin_msa_copy_u_h\n" |
25226 | "#define __msa_copy_u_w __builtin_msa_copy_u_w\n" |
25227 | "#define __msa_copy_u_d __builtin_msa_copy_u_d\n" |
25228 | "#define __msa_insert_b __builtin_msa_insert_b\n" |
25229 | "#define __msa_insert_h __builtin_msa_insert_h\n" |
25230 | "#define __msa_insert_w __builtin_msa_insert_w\n" |
25231 | "#define __msa_insert_d __builtin_msa_insert_d\n" |
25232 | "#define __msa_insve_b __builtin_msa_insve_b\n" |
25233 | "#define __msa_insve_h __builtin_msa_insve_h\n" |
25234 | "#define __msa_insve_w __builtin_msa_insve_w\n" |
25235 | "#define __msa_insve_d __builtin_msa_insve_d\n" |
25236 | "#define __msa_test_bnz_b __builtin_msa_bnz_b\n" |
25237 | "#define __msa_test_bnz_h __builtin_msa_bnz_h\n" |
25238 | "#define __msa_test_bnz_w __builtin_msa_bnz_w\n" |
25239 | "#define __msa_test_bnz_d __builtin_msa_bnz_d\n" |
25240 | "#define __msa_test_bz_b __builtin_msa_bz_b\n" |
25241 | "#define __msa_test_bz_h __builtin_msa_bz_h\n" |
25242 | "#define __msa_test_bz_w __builtin_msa_bz_w\n" |
25243 | "#define __msa_test_bz_d __builtin_msa_bz_d\n" |
25244 | "#define __msa_ldi_b __builtin_msa_ldi_b\n" |
25245 | "#define __msa_ldi_h __builtin_msa_ldi_h\n" |
25246 | "#define __msa_ldi_w __builtin_msa_ldi_w\n" |
25247 | "#define __msa_ldi_d __builtin_msa_ldi_d\n" |
25248 | "#define __msa_fcaf_w __builtin_msa_fcaf_w\n" |
25249 | "#define __msa_fcaf_d __builtin_msa_fcaf_d\n" |
25250 | "#define __msa_fcor_w __builtin_msa_fcor_w\n" |
25251 | "#define __msa_fcor_d __builtin_msa_fcor_d\n" |
25252 | "#define __msa_fcun_w __builtin_msa_fcun_w\n" |
25253 | "#define __msa_fcun_d __builtin_msa_fcun_d\n" |
25254 | "#define __msa_fcune_w __builtin_msa_fcune_w\n" |
25255 | "#define __msa_fcune_d __builtin_msa_fcune_d\n" |
25256 | "#define __msa_fcueq_w __builtin_msa_fcueq_w\n" |
25257 | "#define __msa_fcueq_d __builtin_msa_fcueq_d\n" |
25258 | "#define __msa_fceq_w __builtin_msa_fceq_w\n" |
25259 | "#define __msa_fceq_d __builtin_msa_fceq_d\n" |
25260 | "#define __msa_fcne_w __builtin_msa_fcne_w\n" |
25261 | "#define __msa_fcne_d __builtin_msa_fcne_d\n" |
25262 | "#define __msa_fclt_w __builtin_msa_fclt_w\n" |
25263 | "#define __msa_fclt_d __builtin_msa_fclt_d\n" |
25264 | "#define __msa_fcult_w __builtin_msa_fcult_w\n" |
25265 | "#define __msa_fcult_d __builtin_msa_fcult_d\n" |
25266 | "#define __msa_fcle_w __builtin_msa_fcle_w\n" |
25267 | "#define __msa_fcle_d __builtin_msa_fcle_d\n" |
25268 | "#define __msa_fcule_w __builtin_msa_fcule_w\n" |
25269 | "#define __msa_fcule_d __builtin_msa_fcule_d\n" |
25270 | "#define __msa_fsaf_w __builtin_msa_fsaf_w\n" |
25271 | "#define __msa_fsaf_d __builtin_msa_fsaf_d\n" |
25272 | "#define __msa_fsor_w __builtin_msa_fsor_w\n" |
25273 | "#define __msa_fsor_d __builtin_msa_fsor_d\n" |
25274 | "#define __msa_fsun_w __builtin_msa_fsun_w\n" |
25275 | "#define __msa_fsun_d __builtin_msa_fsun_d\n" |
25276 | "#define __msa_fsune_w __builtin_msa_fsune_w\n" |
25277 | "#define __msa_fsune_d __builtin_msa_fsune_d\n" |
25278 | "#define __msa_fsueq_w __builtin_msa_fsueq_w\n" |
25279 | "#define __msa_fsueq_d __builtin_msa_fsueq_d\n" |
25280 | "#define __msa_fseq_w __builtin_msa_fseq_w\n" |
25281 | "#define __msa_fseq_d __builtin_msa_fseq_d\n" |
25282 | "#define __msa_fsne_w __builtin_msa_fsne_w\n" |
25283 | "#define __msa_fsne_d __builtin_msa_fsne_d\n" |
25284 | "#define __msa_fslt_w __builtin_msa_fslt_w\n" |
25285 | "#define __msa_fslt_d __builtin_msa_fslt_d\n" |
25286 | "#define __msa_fsult_w __builtin_msa_fsult_w\n" |
25287 | "#define __msa_fsult_d __builtin_msa_fsult_d\n" |
25288 | "#define __msa_fsle_w __builtin_msa_fsle_w\n" |
25289 | "#define __msa_fsle_d __builtin_msa_fsle_d\n" |
25290 | "#define __msa_fsule_w __builtin_msa_fsule_w\n" |
25291 | "#define __msa_fsule_d __builtin_msa_fsule_d\n" |
25292 | "#define __msa_fadd_w __builtin_msa_fadd_w\n" |
25293 | "#define __msa_fadd_d __builtin_msa_fadd_d\n" |
25294 | "#define __msa_fsub_w __builtin_msa_fsub_w\n" |
25295 | "#define __msa_fsub_d __builtin_msa_fsub_d\n" |
25296 | "#define __msa_fmul_w __builtin_msa_fmul_w\n" |
25297 | "#define __msa_fmul_d __builtin_msa_fmul_d\n" |
25298 | "#define __msa_fdiv_w __builtin_msa_fdiv_w\n" |
25299 | "#define __msa_fdiv_d __builtin_msa_fdiv_d\n" |
25300 | "#define __msa_fmadd_w __builtin_msa_fmadd_w\n" |
25301 | "#define __msa_fmadd_d __builtin_msa_fmadd_d\n" |
25302 | "#define __msa_fmsub_w __builtin_msa_fmsub_w\n" |
25303 | "#define __msa_fmsub_d __builtin_msa_fmsub_d\n" |
25304 | "#define __msa_fexp2_w __builtin_msa_fexp2_w\n" |
25305 | "#define __msa_fexp2_d __builtin_msa_fexp2_d\n" |
25306 | "#define __msa_fexdo_h __builtin_msa_fexdo_h\n" |
25307 | "#define __msa_fexdo_w __builtin_msa_fexdo_w\n" |
25308 | "#define __msa_ftq_h __builtin_msa_ftq_h\n" |
25309 | "#define __msa_ftq_w __builtin_msa_ftq_w\n" |
25310 | "#define __msa_fmin_w __builtin_msa_fmin_w\n" |
25311 | "#define __msa_fmin_d __builtin_msa_fmin_d\n" |
25312 | "#define __msa_fmin_a_w __builtin_msa_fmin_a_w\n" |
25313 | "#define __msa_fmin_a_d __builtin_msa_fmin_a_d\n" |
25314 | "#define __msa_fmax_w __builtin_msa_fmax_w\n" |
25315 | "#define __msa_fmax_d __builtin_msa_fmax_d\n" |
25316 | "#define __msa_fmax_a_w __builtin_msa_fmax_a_w\n" |
25317 | "#define __msa_fmax_a_d __builtin_msa_fmax_a_d\n" |
25318 | "#define __msa_mul_q_h __builtin_msa_mul_q_h\n" |
25319 | "#define __msa_mul_q_w __builtin_msa_mul_q_w\n" |
25320 | "#define __msa_mulr_q_h __builtin_msa_mulr_q_h\n" |
25321 | "#define __msa_mulr_q_w __builtin_msa_mulr_q_w\n" |
25322 | "#define __msa_madd_q_h __builtin_msa_madd_q_h\n" |
25323 | "#define __msa_madd_q_w __builtin_msa_madd_q_w\n" |
25324 | "#define __msa_maddr_q_h __builtin_msa_maddr_q_h\n" |
25325 | "#define __msa_maddr_q_w __builtin_msa_maddr_q_w\n" |
25326 | "#define __msa_msub_q_h __builtin_msa_msub_q_h\n" |
25327 | "#define __msa_msub_q_w __builtin_msa_msub_q_w\n" |
25328 | "#define __msa_msubr_q_h __builtin_msa_msubr_q_h\n" |
25329 | "#define __msa_msubr_q_w __builtin_msa_msubr_q_w\n" |
25330 | "#define __msa_fclass_w __builtin_msa_fclass_w\n" |
25331 | "#define __msa_fclass_d __builtin_msa_fclass_d\n" |
25332 | "#define __msa_fsqrt_w __builtin_msa_fsqrt_w\n" |
25333 | "#define __msa_fsqrt_d __builtin_msa_fsqrt_d\n" |
25334 | "#define __msa_frcp_w __builtin_msa_frcp_w\n" |
25335 | "#define __msa_frcp_d __builtin_msa_frcp_d\n" |
25336 | "#define __msa_frint_w __builtin_msa_frint_w\n" |
25337 | "#define __msa_frint_d __builtin_msa_frint_d\n" |
25338 | "#define __msa_frsqrt_w __builtin_msa_frsqrt_w\n" |
25339 | "#define __msa_frsqrt_d __builtin_msa_frsqrt_d\n" |
25340 | "#define __msa_flog2_w __builtin_msa_flog2_w\n" |
25341 | "#define __msa_flog2_d __builtin_msa_flog2_d\n" |
25342 | "#define __msa_fexupl_w __builtin_msa_fexupl_w\n" |
25343 | "#define __msa_fexupl_d __builtin_msa_fexupl_d\n" |
25344 | "#define __msa_fexupr_w __builtin_msa_fexupr_w\n" |
25345 | "#define __msa_fexupr_d __builtin_msa_fexupr_d\n" |
25346 | "#define __msa_ffql_w __builtin_msa_ffql_w\n" |
25347 | "#define __msa_ffql_d __builtin_msa_ffql_d\n" |
25348 | "#define __msa_ffqr_w __builtin_msa_ffqr_w\n" |
25349 | "#define __msa_ffqr_d __builtin_msa_ffqr_d\n" |
25350 | "#define __msa_ftint_s_w __builtin_msa_ftint_s_w\n" |
25351 | "#define __msa_ftint_s_d __builtin_msa_ftint_s_d\n" |
25352 | "#define __msa_ftint_u_w __builtin_msa_ftint_u_w\n" |
25353 | "#define __msa_ftint_u_d __builtin_msa_ftint_u_d\n" |
25354 | "#define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w\n" |
25355 | "#define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d\n" |
25356 | "#define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w\n" |
25357 | "#define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d\n" |
25358 | "#define __msa_ffint_s_w __builtin_msa_ffint_s_w\n" |
25359 | "#define __msa_ffint_s_d __builtin_msa_ffint_s_d\n" |
25360 | "#define __msa_ffint_u_w __builtin_msa_ffint_u_w\n" |
25361 | "#define __msa_ffint_u_d __builtin_msa_ffint_u_d\n" |
25362 | "#define __msa_cfcmsa __builtin_msa_cfcmsa\n" |
25363 | "#define __msa_move_v __builtin_msa_move_v\n" |
25364 | "#define __msa_cast_to_vector_float __builtin_msa_cast_to_vector_float\n" |
25365 | "#define __msa_cast_to_vector_double __builtin_msa_cast_to_vector_double\n" |
25366 | "#define __msa_cast_to_scalar_float __builtin_msa_cast_to_scalar_float\n" |
25367 | "#define __msa_cast_to_scalar_double __builtin_msa_cast_to_scalar_double\n" |
25368 | "#endif /* defined(__mips_msa) */\n" |
25369 | "#endif /* _MSA_H */\n" |
25370 | "" } , |
25371 | { "/builtins/mwaitxintrin.h" , "/*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------===\n" |
25372 | " *\n" |
25373 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
25374 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
25375 | " * in the Software without restriction, including without limitation the rights\n" |
25376 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
25377 | " * copies of the Software, and to permit persons to whom the Software is\n" |
25378 | " * furnished to do so, subject to the following conditions:\n" |
25379 | " *\n" |
25380 | " * The above copyright notice and this permission notice shall be included in\n" |
25381 | " * all copies or substantial portions of the Software.\n" |
25382 | " *\n" |
25383 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
25384 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
25385 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
25386 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
25387 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
25388 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
25389 | " * THE SOFTWARE.\n" |
25390 | " *\n" |
25391 | " *===-----------------------------------------------------------------------===\n" |
25392 | " */\n" |
25393 | "\n" |
25394 | "#ifndef __X86INTRIN_H\n" |
25395 | "#error \"Never use <mwaitxintrin.h> directly; include <x86intrin.h> instead.\"\n" |
25396 | "#endif\n" |
25397 | "\n" |
25398 | "#ifndef __MWAITXINTRIN_H\n" |
25399 | "#define __MWAITXINTRIN_H\n" |
25400 | "\n" |
25401 | "/* Define the default attributes for the functions in this file. */\n" |
25402 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mwaitx\")))\n" |
25403 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
25404 | "_mm_monitorx(void const * __p, unsigned __extensions, unsigned __hints)\n" |
25405 | "{\n" |
25406 | " __builtin_ia32_monitorx((void *)__p, __extensions, __hints);\n" |
25407 | "}\n" |
25408 | "\n" |
25409 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
25410 | "_mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock)\n" |
25411 | "{\n" |
25412 | " __builtin_ia32_mwaitx(__extensions, __hints, __clock);\n" |
25413 | "}\n" |
25414 | "\n" |
25415 | "#undef __DEFAULT_FN_ATTRS\n" |
25416 | "\n" |
25417 | "#endif /* __MWAITXINTRIN_H */\n" |
25418 | "" } , |
25419 | { "/builtins/nmmintrin.h" , "/*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------===\n" |
25420 | " *\n" |
25421 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
25422 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
25423 | " * in the Software without restriction, including without limitation the rights\n" |
25424 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
25425 | " * copies of the Software, and to permit persons to whom the Software is\n" |
25426 | " * furnished to do so, subject to the following conditions:\n" |
25427 | " *\n" |
25428 | " * The above copyright notice and this permission notice shall be included in\n" |
25429 | " * all copies or substantial portions of the Software.\n" |
25430 | " *\n" |
25431 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
25432 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
25433 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
25434 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
25435 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
25436 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
25437 | " * THE SOFTWARE.\n" |
25438 | " *\n" |
25439 | " *===-----------------------------------------------------------------------===\n" |
25440 | " */\n" |
25441 | "\n" |
25442 | "#ifndef __NMMINTRIN_H\n" |
25443 | "#define __NMMINTRIN_H\n" |
25444 | "\n" |
25445 | "/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,\n" |
25446 | " just include it now then. */\n" |
25447 | "#include <smmintrin.h>\n" |
25448 | "#endif /* __NMMINTRIN_H */\n" |
25449 | "" } , |
25450 | { "/builtins/omp-tools.h" , "/*\n" |
25451 | " * include/50/omp-tools.h.var\n" |
25452 | " */\n" |
25453 | "\n" |
25454 | "//===----------------------------------------------------------------------===//\n" |
25455 | "//\n" |
25456 | "// The LLVM Compiler Infrastructure\n" |
25457 | "//\n" |
25458 | "// This file is dual licensed under the MIT and the University of Illinois Open\n" |
25459 | "// Source Licenses. See LICENSE.txt for details.\n" |
25460 | "//\n" |
25461 | "//===----------------------------------------------------------------------===//\n" |
25462 | "\n" |
25463 | "#ifndef __OMPT__\n" |
25464 | "#define __OMPT__\n" |
25465 | "\n" |
25466 | "/*****************************************************************************\n" |
25467 | " * system include files\n" |
25468 | " *****************************************************************************/\n" |
25469 | "\n" |
25470 | "#include <stdint.h>\n" |
25471 | "#include <stddef.h>\n" |
25472 | "\n" |
25473 | "/*****************************************************************************\n" |
25474 | " * iteration macros\n" |
25475 | " *****************************************************************************/\n" |
25476 | "\n" |
25477 | "#define FOREACH_OMPT_INQUIRY_FN(macro) \\\n" |
25478 | " macro (ompt_enumerate_states) \\\n" |
25479 | " macro (ompt_enumerate_mutex_impls) \\\n" |
25480 | " \\\n" |
25481 | " macro (ompt_set_callback) \\\n" |
25482 | " macro (ompt_get_callback) \\\n" |
25483 | " \\\n" |
25484 | " macro (ompt_get_state) \\\n" |
25485 | " \\\n" |
25486 | " macro (ompt_get_parallel_info) \\\n" |
25487 | " macro (ompt_get_task_info) \\\n" |
25488 | " macro (ompt_get_task_memory) \\\n" |
25489 | " macro (ompt_get_thread_data) \\\n" |
25490 | " macro (ompt_get_unique_id) \\\n" |
25491 | " macro (ompt_finalize_tool) \\\n" |
25492 | " \\\n" |
25493 | " macro(ompt_get_num_procs) \\\n" |
25494 | " macro(ompt_get_num_places) \\\n" |
25495 | " macro(ompt_get_place_proc_ids) \\\n" |
25496 | " macro(ompt_get_place_num) \\\n" |
25497 | " macro(ompt_get_partition_place_nums) \\\n" |
25498 | " macro(ompt_get_proc_id) \\\n" |
25499 | " \\\n" |
25500 | " macro(ompt_get_target_info) \\\n" |
25501 | " macro(ompt_get_num_devices)\n" |
25502 | "\n" |
25503 | "#define FOREACH_OMPT_STATE(macro) \\\n" |
25504 | " \\\n" |
25505 | " /* first available state */ \\\n" |
25506 | " macro (ompt_state_undefined, 0x102) /* undefined thread state */ \\\n" |
25507 | " \\\n" |
25508 | " /* work states (0..15) */ \\\n" |
25509 | " macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \\\n" |
25510 | " macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \\\n" |
25511 | " macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \\\n" |
25512 | " \\\n" |
25513 | " /* barrier wait states (16..31) */ \\\n" |
25514 | " macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \\\n" |
25515 | " macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \\\n" |
25516 | " /* implicit barrier at the end of parallel region */\\\n" |
25517 | " macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \\\n" |
25518 | " /* implicit barrier at the end of worksharing */ \\\n" |
25519 | " macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \\\n" |
25520 | " macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \\\n" |
25521 | " \\\n" |
25522 | " /* task wait states (32..63) */ \\\n" |
25523 | " macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \\\n" |
25524 | " macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \\\n" |
25525 | " \\\n" |
25526 | " /* mutex wait states (64..127) */ \\\n" |
25527 | " macro (ompt_state_wait_mutex, 0x040) \\\n" |
25528 | " macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \\\n" |
25529 | " macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \\\n" |
25530 | " macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \\\n" |
25531 | " macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \\\n" |
25532 | " \\\n" |
25533 | " /* target wait states (128..255) */ \\\n" |
25534 | " macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \\\n" |
25535 | " macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \\\n" |
25536 | " macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \\\n" |
25537 | " \\\n" |
25538 | " /* misc (256..511) */ \\\n" |
25539 | " macro (ompt_state_idle, 0x100) /* waiting for work */ \\\n" |
25540 | " macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \\\n" |
25541 | " \\\n" |
25542 | " /* implementation-specific states (512..) */\n" |
25543 | "\n" |
25544 | "\n" |
25545 | "#define FOREACH_KMP_MUTEX_IMPL(macro) \\\n" |
25546 | " macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \\\n" |
25547 | " macro (kmp_mutex_impl_spin, 1) /* based on spin */ \\\n" |
25548 | " macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \\\n" |
25549 | " macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */\n" |
25550 | "\n" |
25551 | "#define FOREACH_OMPT_EVENT(macro) \\\n" |
25552 | " \\\n" |
25553 | " /*--- Mandatory Events ---*/ \\\n" |
25554 | " macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \\\n" |
25555 | " macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \\\n" |
25556 | " \\\n" |
25557 | " macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \\\n" |
25558 | " macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \\\n" |
25559 | " \\\n" |
25560 | " macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \\\n" |
25561 | " macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \\\n" |
25562 | " macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \\\n" |
25563 | " \\\n" |
25564 | " macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \\\n" |
25565 | " macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \\\n" |
25566 | " macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \\\n" |
25567 | " \\\n" |
25568 | " macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \\\n" |
25569 | " \\\n" |
25570 | " macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \\\n" |
25571 | " macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \\\n" |
25572 | " \\\n" |
25573 | " macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \\\n" |
25574 | " macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \\\n" |
25575 | " \\\n" |
25576 | " /* Optional Events */ \\\n" |
25577 | " macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \\\n" |
25578 | " \\\n" |
25579 | " macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \\\n" |
25580 | " \\\n" |
25581 | " macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \\\n" |
25582 | " macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \\\n" |
25583 | " \\\n" |
25584 | " macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \\\n" |
25585 | " \\\n" |
25586 | " macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \\\n" |
25587 | " \\\n" |
25588 | " macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \\\n" |
25589 | " \\\n" |
25590 | " macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \\\n" |
25591 | " \\\n" |
25592 | " macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \\\n" |
25593 | " macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \\\n" |
25594 | " \\\n" |
25595 | " macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \\\n" |
25596 | " macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \\\n" |
25597 | " \\\n" |
25598 | " macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \\\n" |
25599 | " \\\n" |
25600 | " macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \\\n" |
25601 | " \\\n" |
25602 | " macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \\\n" |
25603 | " \\\n" |
25604 | " macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \\\n" |
25605 | " \\\n" |
25606 | " macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */\n" |
25607 | "\n" |
25608 | "/*****************************************************************************\n" |
25609 | " * implementation specific types\n" |
25610 | " *****************************************************************************/\n" |
25611 | "\n" |
25612 | "typedef enum kmp_mutex_impl_t {\n" |
25613 | "#define kmp_mutex_impl_macro(impl, code) impl = code,\n" |
25614 | " FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)\n" |
25615 | "#undef kmp_mutex_impl_macro\n" |
25616 | "} kmp_mutex_impl_t;\n" |
25617 | "\n" |
25618 | "/*****************************************************************************\n" |
25619 | " * definitions generated from spec\n" |
25620 | " *****************************************************************************/\n" |
25621 | "\n" |
25622 | "typedef enum ompt_callbacks_t {\n" |
25623 | " ompt_callback_thread_begin = 1,\n" |
25624 | " ompt_callback_thread_end = 2,\n" |
25625 | " ompt_callback_parallel_begin = 3,\n" |
25626 | " ompt_callback_parallel_end = 4,\n" |
25627 | " ompt_callback_task_create = 5,\n" |
25628 | " ompt_callback_task_schedule = 6,\n" |
25629 | " ompt_callback_implicit_task = 7,\n" |
25630 | " ompt_callback_target = 8,\n" |
25631 | " ompt_callback_target_data_op = 9,\n" |
25632 | " ompt_callback_target_submit = 10,\n" |
25633 | " ompt_callback_control_tool = 11,\n" |
25634 | " ompt_callback_device_initialize = 12,\n" |
25635 | " ompt_callback_device_finalize = 13,\n" |
25636 | " ompt_callback_device_load = 14,\n" |
25637 | " ompt_callback_device_unload = 15,\n" |
25638 | " ompt_callback_sync_region_wait = 16,\n" |
25639 | " ompt_callback_mutex_released = 17,\n" |
25640 | " ompt_callback_dependences = 18,\n" |
25641 | " ompt_callback_task_dependence = 19,\n" |
25642 | " ompt_callback_work = 20,\n" |
25643 | " ompt_callback_master = 21,\n" |
25644 | " ompt_callback_target_map = 22,\n" |
25645 | " ompt_callback_sync_region = 23,\n" |
25646 | " ompt_callback_lock_init = 24,\n" |
25647 | " ompt_callback_lock_destroy = 25,\n" |
25648 | " ompt_callback_mutex_acquire = 26,\n" |
25649 | " ompt_callback_mutex_acquired = 27,\n" |
25650 | " ompt_callback_nest_lock = 28,\n" |
25651 | " ompt_callback_flush = 29,\n" |
25652 | " ompt_callback_cancel = 30,\n" |
25653 | " ompt_callback_reduction = 31,\n" |
25654 | " ompt_callback_dispatch = 32\n" |
25655 | "} ompt_callbacks_t;\n" |
25656 | "\n" |
25657 | "typedef enum ompt_record_t {\n" |
25658 | " ompt_record_ompt = 1,\n" |
25659 | " ompt_record_native = 2,\n" |
25660 | " ompt_record_invalid = 3\n" |
25661 | "} ompt_record_t;\n" |
25662 | "\n" |
25663 | "typedef enum ompt_record_native_t {\n" |
25664 | " ompt_record_native_info = 1,\n" |
25665 | " ompt_record_native_event = 2\n" |
25666 | "} ompt_record_native_t;\n" |
25667 | "\n" |
25668 | "typedef enum ompt_set_result_t {\n" |
25669 | " ompt_set_error = 0,\n" |
25670 | " ompt_set_never = 1,\n" |
25671 | " ompt_set_impossible = 2,\n" |
25672 | " ompt_set_sometimes = 3,\n" |
25673 | " ompt_set_sometimes_paired = 4,\n" |
25674 | " ompt_set_always = 5\n" |
25675 | "} ompt_set_result_t;\n" |
25676 | "\n" |
25677 | "typedef uint64_t ompt_id_t;\n" |
25678 | "\n" |
25679 | "typedef uint64_t ompt_device_time_t;\n" |
25680 | "\n" |
25681 | "typedef uint64_t ompt_buffer_cursor_t;\n" |
25682 | "\n" |
25683 | "typedef enum ompt_thread_t {\n" |
25684 | " ompt_thread_initial = 1,\n" |
25685 | " ompt_thread_worker = 2,\n" |
25686 | " ompt_thread_other = 3,\n" |
25687 | " ompt_thread_unknown = 4\n" |
25688 | "} ompt_thread_t;\n" |
25689 | "\n" |
25690 | "typedef enum ompt_scope_endpoint_t {\n" |
25691 | " ompt_scope_begin = 1,\n" |
25692 | " ompt_scope_end = 2\n" |
25693 | "} ompt_scope_endpoint_t;\n" |
25694 | "\n" |
25695 | "typedef enum ompt_dispatch_t {\n" |
25696 | " ompt_dispatch_iteration = 1,\n" |
25697 | " ompt_dispatch_section = 2\n" |
25698 | "} ompt_dispatch_t;\n" |
25699 | "\n" |
25700 | "typedef enum ompt_sync_region_t {\n" |
25701 | " ompt_sync_region_barrier = 1,\n" |
25702 | " ompt_sync_region_barrier_implicit = 2,\n" |
25703 | " ompt_sync_region_barrier_explicit = 3,\n" |
25704 | " ompt_sync_region_barrier_implementation = 4,\n" |
25705 | " ompt_sync_region_taskwait = 5,\n" |
25706 | " ompt_sync_region_taskgroup = 6,\n" |
25707 | " ompt_sync_region_reduction = 7\n" |
25708 | "} ompt_sync_region_t;\n" |
25709 | "\n" |
25710 | "typedef enum ompt_target_data_op_t {\n" |
25711 | " ompt_target_data_alloc = 1,\n" |
25712 | " ompt_target_data_transfer_to_device = 2,\n" |
25713 | " ompt_target_data_transfer_from_device = 3,\n" |
25714 | " ompt_target_data_delete = 4,\n" |
25715 | " ompt_target_data_associate = 5,\n" |
25716 | " ompt_target_data_disassociate = 6\n" |
25717 | "} ompt_target_data_op_t;\n" |
25718 | "\n" |
25719 | "typedef enum ompt_work_t {\n" |
25720 | " ompt_work_loop = 1,\n" |
25721 | " ompt_work_sections = 2,\n" |
25722 | " ompt_work_single_executor = 3,\n" |
25723 | " ompt_work_single_other = 4,\n" |
25724 | " ompt_work_workshare = 5,\n" |
25725 | " ompt_work_distribute = 6,\n" |
25726 | " ompt_work_taskloop = 7\n" |
25727 | "} ompt_work_t;\n" |
25728 | "\n" |
25729 | "typedef enum ompt_mutex_t {\n" |
25730 | " ompt_mutex_lock = 1,\n" |
25731 | " ompt_mutex_test_lock = 2,\n" |
25732 | " ompt_mutex_nest_lock = 3,\n" |
25733 | " ompt_mutex_test_nest_lock = 4,\n" |
25734 | " ompt_mutex_critical = 5,\n" |
25735 | " ompt_mutex_atomic = 6,\n" |
25736 | " ompt_mutex_ordered = 7\n" |
25737 | "} ompt_mutex_t;\n" |
25738 | "\n" |
25739 | "typedef enum ompt_native_mon_flag_t {\n" |
25740 | " ompt_native_data_motion_explicit = 0x01,\n" |
25741 | " ompt_native_data_motion_implicit = 0x02,\n" |
25742 | " ompt_native_kernel_invocation = 0x04,\n" |
25743 | " ompt_native_kernel_execution = 0x08,\n" |
25744 | " ompt_native_driver = 0x10,\n" |
25745 | " ompt_native_runtime = 0x20,\n" |
25746 | " ompt_native_overhead = 0x40,\n" |
25747 | " ompt_native_idleness = 0x80\n" |
25748 | "} ompt_native_mon_flag_t;\n" |
25749 | "\n" |
25750 | "typedef enum ompt_task_flag_t {\n" |
25751 | " ompt_task_initial = 0x00000001,\n" |
25752 | " ompt_task_implicit = 0x00000002,\n" |
25753 | " ompt_task_explicit = 0x00000004,\n" |
25754 | " ompt_task_target = 0x00000008,\n" |
25755 | " ompt_task_undeferred = 0x08000000,\n" |
25756 | " ompt_task_untied = 0x10000000,\n" |
25757 | " ompt_task_final = 0x20000000,\n" |
25758 | " ompt_task_mergeable = 0x40000000,\n" |
25759 | " ompt_task_merged = 0x80000000\n" |
25760 | "} ompt_task_flag_t;\n" |
25761 | "\n" |
25762 | "typedef enum ompt_task_status_t {\n" |
25763 | " ompt_task_complete = 1,\n" |
25764 | " ompt_task_yield = 2,\n" |
25765 | " ompt_task_cancel = 3,\n" |
25766 | " ompt_task_detach = 4,\n" |
25767 | " ompt_task_early_fulfill = 5,\n" |
25768 | " ompt_task_late_fulfill = 6,\n" |
25769 | " ompt_task_switch = 7\n" |
25770 | "} ompt_task_status_t;\n" |
25771 | "\n" |
25772 | "typedef enum ompt_target_t {\n" |
25773 | " ompt_target = 1,\n" |
25774 | " ompt_target_enter_data = 2,\n" |
25775 | " ompt_target_exit_data = 3,\n" |
25776 | " ompt_target_update = 4\n" |
25777 | "} ompt_target_t;\n" |
25778 | "\n" |
25779 | "typedef enum ompt_parallel_flag_t {\n" |
25780 | " ompt_parallel_invoker_program = 0x00000001,\n" |
25781 | " ompt_parallel_invoker_runtime = 0x00000002,\n" |
25782 | " ompt_parallel_league = 0x40000000,\n" |
25783 | " ompt_parallel_team = 0x80000000\n" |
25784 | "} ompt_parallel_flag_t;\n" |
25785 | "\n" |
25786 | "typedef enum ompt_target_map_flag_t {\n" |
25787 | " ompt_target_map_flag_to = 0x01,\n" |
25788 | " ompt_target_map_flag_from = 0x02,\n" |
25789 | " ompt_target_map_flag_alloc = 0x04,\n" |
25790 | " ompt_target_map_flag_release = 0x08,\n" |
25791 | " ompt_target_map_flag_delete = 0x10,\n" |
25792 | " ompt_target_map_flag_implicit = 0x20\n" |
25793 | "} ompt_target_map_flag_t;\n" |
25794 | "\n" |
25795 | "typedef enum ompt_dependence_type_t {\n" |
25796 | " ompt_dependence_type_in = 1,\n" |
25797 | " ompt_dependence_type_out = 2,\n" |
25798 | " ompt_dependence_type_inout = 3,\n" |
25799 | " ompt_dependence_type_mutexinoutset = 4,\n" |
25800 | " ompt_dependence_type_source = 5,\n" |
25801 | " ompt_dependence_type_sink = 6\n" |
25802 | "} ompt_dependence_type_t;\n" |
25803 | "\n" |
25804 | "typedef enum ompt_cancel_flag_t {\n" |
25805 | " ompt_cancel_parallel = 0x01,\n" |
25806 | " ompt_cancel_sections = 0x02,\n" |
25807 | " ompt_cancel_loop = 0x04,\n" |
25808 | " ompt_cancel_taskgroup = 0x08,\n" |
25809 | " ompt_cancel_activated = 0x10,\n" |
25810 | " ompt_cancel_detected = 0x20,\n" |
25811 | " ompt_cancel_discarded_task = 0x40\n" |
25812 | "} ompt_cancel_flag_t;\n" |
25813 | "\n" |
25814 | "typedef uint64_t ompt_hwid_t;\n" |
25815 | "\n" |
25816 | "typedef uint64_t ompt_wait_id_t;\n" |
25817 | "\n" |
25818 | "typedef enum ompt_frame_flag_t {\n" |
25819 | " ompt_frame_runtime = 0x00,\n" |
25820 | " ompt_frame_application = 0x01,\n" |
25821 | " ompt_frame_cfa = 0x10,\n" |
25822 | " ompt_frame_framepointer = 0x20,\n" |
25823 | " ompt_frame_stackaddress = 0x30\n" |
25824 | "} ompt_frame_flag_t; \n" |
25825 | "\n" |
25826 | "typedef enum ompt_state_t {\n" |
25827 | " ompt_state_work_serial = 0x000,\n" |
25828 | " ompt_state_work_parallel = 0x001,\n" |
25829 | " ompt_state_work_reduction = 0x002,\n" |
25830 | "\n" |
25831 | " ompt_state_wait_barrier = 0x010,\n" |
25832 | " ompt_state_wait_barrier_implicit_parallel = 0x011,\n" |
25833 | " ompt_state_wait_barrier_implicit_workshare = 0x012,\n" |
25834 | " ompt_state_wait_barrier_implicit = 0x013,\n" |
25835 | " ompt_state_wait_barrier_explicit = 0x014,\n" |
25836 | "\n" |
25837 | " ompt_state_wait_taskwait = 0x020,\n" |
25838 | " ompt_state_wait_taskgroup = 0x021,\n" |
25839 | "\n" |
25840 | " ompt_state_wait_mutex = 0x040,\n" |
25841 | " ompt_state_wait_lock = 0x041,\n" |
25842 | " ompt_state_wait_critical = 0x042,\n" |
25843 | " ompt_state_wait_atomic = 0x043,\n" |
25844 | " ompt_state_wait_ordered = 0x044,\n" |
25845 | "\n" |
25846 | " ompt_state_wait_target = 0x080,\n" |
25847 | " ompt_state_wait_target_map = 0x081,\n" |
25848 | " ompt_state_wait_target_update = 0x082,\n" |
25849 | "\n" |
25850 | " ompt_state_idle = 0x100,\n" |
25851 | " ompt_state_overhead = 0x101,\n" |
25852 | " ompt_state_undefined = 0x102\n" |
25853 | "} ompt_state_t;\n" |
25854 | "\n" |
25855 | "typedef uint64_t (*ompt_get_unique_id_t) (void);\n" |
25856 | "\n" |
25857 | "typedef uint64_t ompd_size_t;\n" |
25858 | "\n" |
25859 | "typedef uint64_t ompd_wait_id_t;\n" |
25860 | "\n" |
25861 | "typedef uint64_t ompd_addr_t;\n" |
25862 | "typedef int64_t ompd_word_t;\n" |
25863 | "typedef uint64_t ompd_seg_t;\n" |
25864 | "\n" |
25865 | "typedef uint64_t ompd_device_t;\n" |
25866 | "\n" |
25867 | "typedef uint64_t ompd_thread_id_t;\n" |
25868 | "\n" |
25869 | "typedef enum ompd_scope_t {\n" |
25870 | " ompd_scope_global = 1,\n" |
25871 | " ompd_scope_address_space = 2,\n" |
25872 | " ompd_scope_thread = 3,\n" |
25873 | " ompd_scope_parallel = 4,\n" |
25874 | " ompd_scope_implicit_task = 5,\n" |
25875 | " ompd_scope_task = 6\n" |
25876 | "} ompd_scope_t;\n" |
25877 | "\n" |
25878 | "typedef uint64_t ompd_icv_id_t;\n" |
25879 | "\n" |
25880 | "typedef enum ompd_rc_t {\n" |
25881 | " ompd_rc_ok = 0,\n" |
25882 | " ompd_rc_unavailable = 1,\n" |
25883 | " ompd_rc_stale_handle = 2,\n" |
25884 | " ompd_rc_bad_input = 3,\n" |
25885 | " ompd_rc_error = 4,\n" |
25886 | " ompd_rc_unsupported = 5,\n" |
25887 | " ompd_rc_needs_state_tracking = 6,\n" |
25888 | " ompd_rc_incompatible = 7,\n" |
25889 | " ompd_rc_device_read_error = 8,\n" |
25890 | " ompd_rc_device_write_error = 9,\n" |
25891 | " ompd_rc_nomem = 10,\n" |
25892 | "} ompd_rc_t;\n" |
25893 | "\n" |
25894 | "typedef void (*ompt_interface_fn_t) (void);\n" |
25895 | "\n" |
25896 | "typedef ompt_interface_fn_t (*ompt_function_lookup_t) (\n" |
25897 | " const char *interface_function_name\n" |
25898 | ");\n" |
25899 | "\n" |
25900 | "typedef union ompt_data_t {\n" |
25901 | " uint64_t value;\n" |
25902 | " void *ptr;\n" |
25903 | "} ompt_data_t;\n" |
25904 | "\n" |
25905 | "typedef struct ompt_frame_t {\n" |
25906 | " ompt_data_t exit_frame;\n" |
25907 | " ompt_data_t enter_frame;\n" |
25908 | " int exit_frame_flags;\n" |
25909 | " int enter_frame_flags;\n" |
25910 | "} ompt_frame_t;\n" |
25911 | "\n" |
25912 | "typedef void (*ompt_callback_t) (void);\n" |
25913 | "\n" |
25914 | "typedef void ompt_device_t;\n" |
25915 | "\n" |
25916 | "typedef void ompt_buffer_t;\n" |
25917 | "\n" |
25918 | "typedef void (*ompt_callback_buffer_request_t) (\n" |
25919 | " int device_num,\n" |
25920 | " ompt_buffer_t **buffer,\n" |
25921 | " size_t *bytes\n" |
25922 | ");\n" |
25923 | "\n" |
25924 | "typedef void (*ompt_callback_buffer_complete_t) (\n" |
25925 | " int device_num,\n" |
25926 | " ompt_buffer_t *buffer,\n" |
25927 | " size_t bytes,\n" |
25928 | " ompt_buffer_cursor_t begin,\n" |
25929 | " int buffer_owned\n" |
25930 | ");\n" |
25931 | "\n" |
25932 | "typedef void (*ompt_finalize_t) (\n" |
25933 | " ompt_data_t *tool_data\n" |
25934 | ");\n" |
25935 | "\n" |
25936 | "typedef int (*ompt_initialize_t) (\n" |
25937 | " ompt_function_lookup_t lookup,\n" |
25938 | " int initial_device_num,\n" |
25939 | " ompt_data_t *tool_data\n" |
25940 | ");\n" |
25941 | "\n" |
25942 | "typedef struct ompt_start_tool_result_t {\n" |
25943 | " ompt_initialize_t initialize;\n" |
25944 | " ompt_finalize_t finalize;\n" |
25945 | " ompt_data_t tool_data;\n" |
25946 | "} ompt_start_tool_result_t;\n" |
25947 | "\n" |
25948 | "typedef struct ompt_record_abstract_t {\n" |
25949 | " ompt_record_native_t rclass;\n" |
25950 | " const char *type;\n" |
25951 | " ompt_device_time_t start_time;\n" |
25952 | " ompt_device_time_t end_time;\n" |
25953 | " ompt_hwid_t hwid;\n" |
25954 | "} ompt_record_abstract_t;\n" |
25955 | "\n" |
25956 | "typedef struct ompt_dependence_t {\n" |
25957 | " ompt_data_t variable;\n" |
25958 | " ompt_dependence_type_t dependence_type;\n" |
25959 | "} ompt_dependence_t;\n" |
25960 | "\n" |
25961 | "typedef int (*ompt_enumerate_states_t) (\n" |
25962 | " int current_state,\n" |
25963 | " int *next_state,\n" |
25964 | " const char **next_state_name\n" |
25965 | ");\n" |
25966 | "\n" |
25967 | "typedef int (*ompt_enumerate_mutex_impls_t) (\n" |
25968 | " int current_impl,\n" |
25969 | " int *next_impl,\n" |
25970 | " const char **next_impl_name\n" |
25971 | ");\n" |
25972 | "\n" |
25973 | "typedef ompt_set_result_t (*ompt_set_callback_t) (\n" |
25974 | " ompt_callbacks_t event,\n" |
25975 | " ompt_callback_t callback\n" |
25976 | ");\n" |
25977 | "\n" |
25978 | "typedef int (*ompt_get_callback_t) (\n" |
25979 | " ompt_callbacks_t event,\n" |
25980 | " ompt_callback_t *callback\n" |
25981 | ");\n" |
25982 | "\n" |
25983 | "typedef ompt_data_t *(*ompt_get_thread_data_t) (void);\n" |
25984 | "\n" |
25985 | "typedef int (*ompt_get_num_procs_t) (void);\n" |
25986 | "\n" |
25987 | "typedef int (*ompt_get_num_places_t) (void);\n" |
25988 | "\n" |
25989 | "typedef int (*ompt_get_place_proc_ids_t) (\n" |
25990 | " int place_num,\n" |
25991 | " int ids_size,\n" |
25992 | " int *ids\n" |
25993 | ");\n" |
25994 | "\n" |
25995 | "typedef int (*ompt_get_place_num_t) (void);\n" |
25996 | "\n" |
25997 | "typedef int (*ompt_get_partition_place_nums_t) (\n" |
25998 | " int place_nums_size,\n" |
25999 | " int *place_nums\n" |
26000 | ");\n" |
26001 | "\n" |
26002 | "typedef int (*ompt_get_proc_id_t) (void);\n" |
26003 | "\n" |
26004 | "typedef int (*ompt_get_state_t) (\n" |
26005 | " ompt_wait_id_t *wait_id\n" |
26006 | ");\n" |
26007 | "\n" |
26008 | "typedef int (*ompt_get_parallel_info_t) (\n" |
26009 | " int ancestor_level,\n" |
26010 | " ompt_data_t **parallel_data,\n" |
26011 | " int *team_size\n" |
26012 | ");\n" |
26013 | "\n" |
26014 | "typedef int (*ompt_get_task_info_t) (\n" |
26015 | " int ancestor_level,\n" |
26016 | " int *flags,\n" |
26017 | " ompt_data_t **task_data,\n" |
26018 | " ompt_frame_t **task_frame,\n" |
26019 | " ompt_data_t **parallel_data,\n" |
26020 | " int *thread_num\n" |
26021 | ");\n" |
26022 | "\n" |
26023 | "typedef int (*ompt_get_task_memory_t)(\n" |
26024 | " void **addr,\n" |
26025 | " size_t *size,\n" |
26026 | " int block\n" |
26027 | ");\n" |
26028 | "\n" |
26029 | "typedef int (*ompt_get_target_info_t) (\n" |
26030 | " uint64_t *device_num,\n" |
26031 | " ompt_id_t *target_id,\n" |
26032 | " ompt_id_t *host_op_id\n" |
26033 | ");\n" |
26034 | "\n" |
26035 | "typedef int (*ompt_get_num_devices_t) (void);\n" |
26036 | "\n" |
26037 | "typedef void (*ompt_finalize_tool_t) (void);\n" |
26038 | "\n" |
26039 | "typedef int (*ompt_get_device_num_procs_t) (\n" |
26040 | " ompt_device_t *device\n" |
26041 | ");\n" |
26042 | "\n" |
26043 | "typedef ompt_device_time_t (*ompt_get_device_time_t) (\n" |
26044 | " ompt_device_t *device\n" |
26045 | ");\n" |
26046 | "\n" |
26047 | "typedef double (*ompt_translate_time_t) (\n" |
26048 | " ompt_device_t *device,\n" |
26049 | " ompt_device_time_t time\n" |
26050 | ");\n" |
26051 | "\n" |
26052 | "typedef ompt_set_result_t (*ompt_set_trace_ompt_t) (\n" |
26053 | " ompt_device_t *device,\n" |
26054 | " unsigned int enable,\n" |
26055 | " unsigned int etype\n" |
26056 | ");\n" |
26057 | "\n" |
26058 | "typedef ompt_set_result_t (*ompt_set_trace_native_t) (\n" |
26059 | " ompt_device_t *device,\n" |
26060 | " int enable,\n" |
26061 | " int flags\n" |
26062 | ");\n" |
26063 | "\n" |
26064 | "typedef int (*ompt_start_trace_t) (\n" |
26065 | " ompt_device_t *device,\n" |
26066 | " ompt_callback_buffer_request_t request,\n" |
26067 | " ompt_callback_buffer_complete_t complete\n" |
26068 | ");\n" |
26069 | "\n" |
26070 | "typedef int (*ompt_pause_trace_t) (\n" |
26071 | " ompt_device_t *device,\n" |
26072 | " int begin_pause\n" |
26073 | ");\n" |
26074 | "\n" |
26075 | "typedef int (*ompt_flush_trace_t) (\n" |
26076 | " ompt_device_t *device\n" |
26077 | ");\n" |
26078 | "\n" |
26079 | "typedef int (*ompt_stop_trace_t) (\n" |
26080 | " ompt_device_t *device\n" |
26081 | ");\n" |
26082 | "\n" |
26083 | "typedef int (*ompt_advance_buffer_cursor_t) (\n" |
26084 | " ompt_device_t *device,\n" |
26085 | " ompt_buffer_t *buffer,\n" |
26086 | " size_t size,\n" |
26087 | " ompt_buffer_cursor_t current,\n" |
26088 | " ompt_buffer_cursor_t *next\n" |
26089 | ");\n" |
26090 | "\n" |
26091 | "typedef ompt_record_t (*ompt_get_record_type_t) (\n" |
26092 | " ompt_buffer_t *buffer,\n" |
26093 | " ompt_buffer_cursor_t current\n" |
26094 | ");\n" |
26095 | "\n" |
26096 | "typedef void *(*ompt_get_record_native_t) (\n" |
26097 | " ompt_buffer_t *buffer,\n" |
26098 | " ompt_buffer_cursor_t current,\n" |
26099 | " ompt_id_t *host_op_id\n" |
26100 | ");\n" |
26101 | "\n" |
26102 | "typedef ompt_record_abstract_t *\n" |
26103 | "(*ompt_get_record_abstract_t) (\n" |
26104 | " void *native_record\n" |
26105 | ");\n" |
26106 | "\n" |
26107 | "typedef void (*ompt_callback_thread_begin_t) (\n" |
26108 | " ompt_thread_t thread_type,\n" |
26109 | " ompt_data_t *thread_data\n" |
26110 | ");\n" |
26111 | "\n" |
26112 | "typedef struct ompt_record_thread_begin_t {\n" |
26113 | " ompt_thread_t thread_type;\n" |
26114 | "} ompt_record_thread_begin_t;\n" |
26115 | "\n" |
26116 | "typedef void (*ompt_callback_thread_end_t) (\n" |
26117 | " ompt_data_t *thread_data\n" |
26118 | ");\n" |
26119 | "\n" |
26120 | "typedef void (*ompt_callback_parallel_begin_t) (\n" |
26121 | " ompt_data_t *encountering_task_data,\n" |
26122 | " const ompt_frame_t *encountering_task_frame,\n" |
26123 | " ompt_data_t *parallel_data,\n" |
26124 | " unsigned int requested_parallelism,\n" |
26125 | " int flags,\n" |
26126 | " const void *codeptr_ra\n" |
26127 | ");\n" |
26128 | "\n" |
26129 | "typedef struct ompt_record_parallel_begin_t {\n" |
26130 | " ompt_id_t encountering_task_id;\n" |
26131 | " ompt_id_t parallel_id;\n" |
26132 | " unsigned int requested_parallelism;\n" |
26133 | " int flags;\n" |
26134 | " const void *codeptr_ra;\n" |
26135 | "} ompt_record_parallel_begin_t;\n" |
26136 | "\n" |
26137 | "typedef void (*ompt_callback_parallel_end_t) (\n" |
26138 | " ompt_data_t *parallel_data,\n" |
26139 | " ompt_data_t *encountering_task_data,\n" |
26140 | " int flags,\n" |
26141 | " const void *codeptr_ra\n" |
26142 | ");\n" |
26143 | "\n" |
26144 | "typedef struct ompt_record_parallel_end_t {\n" |
26145 | " ompt_id_t parallel_id;\n" |
26146 | " ompt_id_t encountering_task_id;\n" |
26147 | " int flags;\n" |
26148 | " const void *codeptr_ra;\n" |
26149 | "} ompt_record_parallel_end_t;\n" |
26150 | "\n" |
26151 | "typedef void (*ompt_callback_work_t) (\n" |
26152 | " ompt_work_t wstype,\n" |
26153 | " ompt_scope_endpoint_t endpoint,\n" |
26154 | " ompt_data_t *parallel_data,\n" |
26155 | " ompt_data_t *task_data,\n" |
26156 | " uint64_t count,\n" |
26157 | " const void *codeptr_ra\n" |
26158 | ");\n" |
26159 | "\n" |
26160 | "typedef struct ompt_record_work_t {\n" |
26161 | " ompt_work_t wstype;\n" |
26162 | " ompt_scope_endpoint_t endpoint;\n" |
26163 | " ompt_id_t parallel_id;\n" |
26164 | " ompt_id_t task_id;\n" |
26165 | " uint64_t count;\n" |
26166 | " const void *codeptr_ra;\n" |
26167 | "} ompt_record_work_t;\n" |
26168 | "\n" |
26169 | "typedef void (*ompt_callback_dispatch_t) (\n" |
26170 | " ompt_data_t *parallel_data,\n" |
26171 | " ompt_data_t *task_data,\n" |
26172 | " ompt_dispatch_t kind,\n" |
26173 | " ompt_data_t instance \n" |
26174 | ");\n" |
26175 | "\n" |
26176 | "typedef struct ompt_record_dispatch_t {\n" |
26177 | " ompt_id_t parallel_id;\n" |
26178 | " ompt_id_t task_id;\n" |
26179 | " ompt_dispatch_t kind;\n" |
26180 | " ompt_data_t instance; \n" |
26181 | "} ompt_record_dispatch_t;\n" |
26182 | "\n" |
26183 | "typedef void (*ompt_callback_task_create_t) (\n" |
26184 | " ompt_data_t *encountering_task_data,\n" |
26185 | " const ompt_frame_t *encountering_task_frame,\n" |
26186 | " ompt_data_t *new_task_data,\n" |
26187 | " int flags,\n" |
26188 | " int has_dependences,\n" |
26189 | " const void *codeptr_ra\n" |
26190 | ");\n" |
26191 | "\n" |
26192 | "typedef struct ompt_record_task_create_t {\n" |
26193 | " ompt_id_t encountering_task_id;\n" |
26194 | " ompt_id_t new_task_id;\n" |
26195 | " int flags;\n" |
26196 | " int has_dependences;\n" |
26197 | " const void *codeptr_ra;\n" |
26198 | "} ompt_record_task_create_t;\n" |
26199 | "\n" |
26200 | "typedef void (*ompt_callback_dependences_t) (\n" |
26201 | " ompt_data_t *task_data,\n" |
26202 | " const ompt_dependence_t *deps,\n" |
26203 | " int ndeps\n" |
26204 | ");\n" |
26205 | "\n" |
26206 | "typedef struct ompt_record_dependences_t {\n" |
26207 | " ompt_id_t task_id;\n" |
26208 | " ompt_dependence_t dep;\n" |
26209 | " int ndeps;\n" |
26210 | "} ompt_record_dependences_t;\n" |
26211 | "\n" |
26212 | "typedef void (*ompt_callback_task_dependence_t) (\n" |
26213 | " ompt_data_t *src_task_data,\n" |
26214 | " ompt_data_t *sink_task_data\n" |
26215 | ");\n" |
26216 | "\n" |
26217 | "typedef struct ompt_record_task_dependence_t {\n" |
26218 | " ompt_id_t src_task_id;\n" |
26219 | " ompt_id_t sink_task_id;\n" |
26220 | "} ompt_record_task_dependence_t;\n" |
26221 | "\n" |
26222 | "typedef void (*ompt_callback_task_schedule_t) (\n" |
26223 | " ompt_data_t *prior_task_data,\n" |
26224 | " ompt_task_status_t prior_task_status,\n" |
26225 | " ompt_data_t *next_task_data\n" |
26226 | ");\n" |
26227 | "\n" |
26228 | "typedef struct ompt_record_task_schedule_t {\n" |
26229 | " ompt_id_t prior_task_id;\n" |
26230 | " ompt_task_status_t prior_task_status;\n" |
26231 | " ompt_id_t next_task_id;\n" |
26232 | "} ompt_record_task_schedule_t;\n" |
26233 | "\n" |
26234 | "typedef void (*ompt_callback_implicit_task_t) (\n" |
26235 | " ompt_scope_endpoint_t endpoint,\n" |
26236 | " ompt_data_t *parallel_data,\n" |
26237 | " ompt_data_t *task_data,\n" |
26238 | " unsigned int actual_parallelism,\n" |
26239 | " unsigned int index,\n" |
26240 | " int flags\n" |
26241 | ");\n" |
26242 | "\n" |
26243 | "typedef struct ompt_record_implicit_task_t {\n" |
26244 | " ompt_scope_endpoint_t endpoint;\n" |
26245 | " ompt_id_t parallel_id;\n" |
26246 | " ompt_id_t task_id;\n" |
26247 | " unsigned int actual_parallelism;\n" |
26248 | " unsigned int index;\n" |
26249 | " int flags;\n" |
26250 | "} ompt_record_implicit_task_t;\n" |
26251 | "\n" |
26252 | "typedef void (*ompt_callback_master_t) (\n" |
26253 | " ompt_scope_endpoint_t endpoint,\n" |
26254 | " ompt_data_t *parallel_data,\n" |
26255 | " ompt_data_t *task_data,\n" |
26256 | " const void *codeptr_ra\n" |
26257 | ");\n" |
26258 | "\n" |
26259 | "typedef struct ompt_record_master_t {\n" |
26260 | " ompt_scope_endpoint_t endpoint;\n" |
26261 | " ompt_id_t parallel_id;\n" |
26262 | " ompt_id_t task_id;\n" |
26263 | " const void *codeptr_ra;\n" |
26264 | "} ompt_record_master_t;\n" |
26265 | "\n" |
26266 | "typedef void (*ompt_callback_sync_region_t) (\n" |
26267 | " ompt_sync_region_t kind,\n" |
26268 | " ompt_scope_endpoint_t endpoint,\n" |
26269 | " ompt_data_t *parallel_data,\n" |
26270 | " ompt_data_t *task_data,\n" |
26271 | " const void *codeptr_ra\n" |
26272 | ");\n" |
26273 | "\n" |
26274 | "typedef struct ompt_record_sync_region_t {\n" |
26275 | " ompt_sync_region_t kind;\n" |
26276 | " ompt_scope_endpoint_t endpoint;\n" |
26277 | " ompt_id_t parallel_id;\n" |
26278 | " ompt_id_t task_id;\n" |
26279 | " const void *codeptr_ra;\n" |
26280 | "} ompt_record_sync_region_t;\n" |
26281 | "\n" |
26282 | "typedef void (*ompt_callback_mutex_acquire_t) (\n" |
26283 | " ompt_mutex_t kind,\n" |
26284 | " unsigned int hint,\n" |
26285 | " unsigned int impl,\n" |
26286 | " ompt_wait_id_t wait_id,\n" |
26287 | " const void *codeptr_ra\n" |
26288 | ");\n" |
26289 | "\n" |
26290 | "typedef struct ompt_record_mutex_acquire_t {\n" |
26291 | " ompt_mutex_t kind;\n" |
26292 | " unsigned int hint;\n" |
26293 | " unsigned int impl;\n" |
26294 | " ompt_wait_id_t wait_id;\n" |
26295 | " const void *codeptr_ra;\n" |
26296 | "} ompt_record_mutex_acquire_t;\n" |
26297 | "\n" |
26298 | "typedef void (*ompt_callback_mutex_t) (\n" |
26299 | " ompt_mutex_t kind,\n" |
26300 | " ompt_wait_id_t wait_id,\n" |
26301 | " const void *codeptr_ra\n" |
26302 | ");\n" |
26303 | "\n" |
26304 | "typedef struct ompt_record_mutex_t {\n" |
26305 | " ompt_mutex_t kind;\n" |
26306 | " ompt_wait_id_t wait_id;\n" |
26307 | " const void *codeptr_ra;\n" |
26308 | "} ompt_record_mutex_t;\n" |
26309 | "\n" |
26310 | "typedef void (*ompt_callback_nest_lock_t) (\n" |
26311 | " ompt_scope_endpoint_t endpoint,\n" |
26312 | " ompt_wait_id_t wait_id,\n" |
26313 | " const void *codeptr_ra\n" |
26314 | ");\n" |
26315 | "\n" |
26316 | "typedef struct ompt_record_nest_lock_t {\n" |
26317 | " ompt_scope_endpoint_t endpoint;\n" |
26318 | " ompt_wait_id_t wait_id;\n" |
26319 | " const void *codeptr_ra;\n" |
26320 | "} ompt_record_nest_lock_t;\n" |
26321 | "\n" |
26322 | "typedef void (*ompt_callback_flush_t) (\n" |
26323 | " ompt_data_t *thread_data,\n" |
26324 | " const void *codeptr_ra\n" |
26325 | ");\n" |
26326 | "\n" |
26327 | "typedef struct ompt_record_flush_t {\n" |
26328 | " const void *codeptr_ra;\n" |
26329 | "} ompt_record_flush_t;\n" |
26330 | "\n" |
26331 | "typedef void (*ompt_callback_cancel_t) (\n" |
26332 | " ompt_data_t *task_data,\n" |
26333 | " int flags,\n" |
26334 | " const void *codeptr_ra\n" |
26335 | ");\n" |
26336 | "\n" |
26337 | "typedef struct ompt_record_cancel_t {\n" |
26338 | " ompt_id_t task_id;\n" |
26339 | " int flags;\n" |
26340 | " const void *codeptr_ra;\n" |
26341 | "} ompt_record_cancel_t;\n" |
26342 | "\n" |
26343 | "typedef void (*ompt_callback_device_initialize_t) (\n" |
26344 | " int device_num,\n" |
26345 | " const char *type,\n" |
26346 | " ompt_device_t *device,\n" |
26347 | " ompt_function_lookup_t lookup,\n" |
26348 | " const char *documentation\n" |
26349 | ");\n" |
26350 | "\n" |
26351 | "typedef void (*ompt_callback_device_finalize_t) (\n" |
26352 | " int device_num\n" |
26353 | ");\n" |
26354 | "\n" |
26355 | "typedef void (*ompt_callback_device_load_t) (\n" |
26356 | " int device_num,\n" |
26357 | " const char *filename,\n" |
26358 | " int64_t offset_in_file,\n" |
26359 | " void *vma_in_file,\n" |
26360 | " size_t bytes,\n" |
26361 | " void *host_addr,\n" |
26362 | " void *device_addr,\n" |
26363 | " uint64_t module_id\n" |
26364 | ");\n" |
26365 | "\n" |
26366 | "typedef void (*ompt_callback_device_unload_t) (\n" |
26367 | " int device_num,\n" |
26368 | " uint64_t module_id\n" |
26369 | ");\n" |
26370 | "\n" |
26371 | "typedef void (*ompt_callback_target_data_op_t) (\n" |
26372 | " ompt_id_t target_id,\n" |
26373 | " ompt_id_t host_op_id,\n" |
26374 | " ompt_target_data_op_t optype,\n" |
26375 | " void *src_addr,\n" |
26376 | " int src_device_num,\n" |
26377 | " void *dest_addr,\n" |
26378 | " int dest_device_num,\n" |
26379 | " size_t bytes,\n" |
26380 | " const void *codeptr_ra\n" |
26381 | ");\n" |
26382 | "\n" |
26383 | "typedef struct ompt_record_target_data_op_t {\n" |
26384 | " ompt_id_t host_op_id;\n" |
26385 | " ompt_target_data_op_t optype;\n" |
26386 | " void *src_addr;\n" |
26387 | " int src_device_num;\n" |
26388 | " void *dest_addr;\n" |
26389 | " int dest_device_num;\n" |
26390 | " size_t bytes;\n" |
26391 | " ompt_device_time_t end_time;\n" |
26392 | " const void *codeptr_ra;\n" |
26393 | "} ompt_record_target_data_op_t;\n" |
26394 | "\n" |
26395 | "typedef void (*ompt_callback_target_t) (\n" |
26396 | " ompt_target_t kind,\n" |
26397 | " ompt_scope_endpoint_t endpoint,\n" |
26398 | " int device_num,\n" |
26399 | " ompt_data_t *task_data,\n" |
26400 | " ompt_id_t target_id,\n" |
26401 | " const void *codeptr_ra\n" |
26402 | ");\n" |
26403 | "\n" |
26404 | "typedef struct ompt_record_target_t {\n" |
26405 | " ompt_target_t kind;\n" |
26406 | " ompt_scope_endpoint_t endpoint;\n" |
26407 | " int device_num;\n" |
26408 | " ompt_id_t task_id;\n" |
26409 | " ompt_id_t target_id;\n" |
26410 | " const void *codeptr_ra;\n" |
26411 | "} ompt_record_target_t;\n" |
26412 | "\n" |
26413 | "typedef void (*ompt_callback_target_map_t) (\n" |
26414 | " ompt_id_t target_id,\n" |
26415 | " unsigned int nitems,\n" |
26416 | " void **host_addr,\n" |
26417 | " void **device_addr,\n" |
26418 | " size_t *bytes,\n" |
26419 | " unsigned int *mapping_flags,\n" |
26420 | " const void *codeptr_ra\n" |
26421 | ");\n" |
26422 | "\n" |
26423 | "typedef struct ompt_record_target_map_t {\n" |
26424 | " ompt_id_t target_id;\n" |
26425 | " unsigned int nitems;\n" |
26426 | " void **host_addr;\n" |
26427 | " void **device_addr;\n" |
26428 | " size_t *bytes;\n" |
26429 | " unsigned int *mapping_flags;\n" |
26430 | " const void *codeptr_ra;\n" |
26431 | "} ompt_record_target_map_t;\n" |
26432 | "\n" |
26433 | "typedef void (*ompt_callback_target_submit_t) (\n" |
26434 | " ompt_id_t target_id,\n" |
26435 | " ompt_id_t host_op_id,\n" |
26436 | " unsigned int requested_num_teams\n" |
26437 | ");\n" |
26438 | "\n" |
26439 | "typedef struct ompt_record_target_kernel_t {\n" |
26440 | " ompt_id_t host_op_id;\n" |
26441 | " unsigned int requested_num_teams;\n" |
26442 | " unsigned int granted_num_teams;\n" |
26443 | " ompt_device_time_t end_time;\n" |
26444 | "} ompt_record_target_kernel_t;\n" |
26445 | "\n" |
26446 | "typedef int (*ompt_callback_control_tool_t) (\n" |
26447 | " uint64_t command,\n" |
26448 | " uint64_t modifier,\n" |
26449 | " void *arg,\n" |
26450 | " const void *codeptr_ra\n" |
26451 | ");\n" |
26452 | "\n" |
26453 | "typedef struct ompt_record_control_tool_t {\n" |
26454 | " uint64_t command;\n" |
26455 | " uint64_t modifier;\n" |
26456 | " const void *codeptr_ra;\n" |
26457 | "} ompt_record_control_tool_t;\n" |
26458 | "\n" |
26459 | "typedef struct ompd_address_t {\n" |
26460 | " ompd_seg_t segment;\n" |
26461 | " ompd_addr_t address;\n" |
26462 | "} ompd_address_t;\n" |
26463 | "\n" |
26464 | "typedef struct ompd_frame_info_t {\n" |
26465 | " ompd_address_t frame_address;\n" |
26466 | " ompd_word_t frame_flag;\n" |
26467 | "} ompd_frame_info_t;\n" |
26468 | "\n" |
26469 | "typedef struct _ompd_aspace_handle ompd_address_space_handle_t;\n" |
26470 | "typedef struct _ompd_thread_handle ompd_thread_handle_t;\n" |
26471 | "typedef struct _ompd_parallel_handle ompd_parallel_handle_t;\n" |
26472 | "typedef struct _ompd_task_handle ompd_task_handle_t;\n" |
26473 | "\n" |
26474 | "typedef struct _ompd_aspace_cont ompd_address_space_context_t;\n" |
26475 | "typedef struct _ompd_thread_cont ompd_thread_context_t;\n" |
26476 | "\n" |
26477 | "typedef struct ompd_device_type_sizes_t {\n" |
26478 | " uint8_t sizeof_char;\n" |
26479 | " uint8_t sizeof_short;\n" |
26480 | " uint8_t sizeof_int;\n" |
26481 | " uint8_t sizeof_long;\n" |
26482 | " uint8_t sizeof_long_long;\n" |
26483 | " uint8_t sizeof_pointer;\n" |
26484 | "} ompd_device_type_sizes_t;\n" |
26485 | "\n" |
26486 | "typedef struct ompt_record_ompt_t {\n" |
26487 | " ompt_callbacks_t type;\n" |
26488 | " ompt_device_time_t time;\n" |
26489 | " ompt_id_t thread_id;\n" |
26490 | " ompt_id_t target_id;\n" |
26491 | " union {\n" |
26492 | " ompt_record_thread_begin_t thread_begin;\n" |
26493 | " ompt_record_parallel_begin_t parallel_begin;\n" |
26494 | " ompt_record_parallel_end_t parallel_end;\n" |
26495 | " ompt_record_work_t work;\n" |
26496 | " ompt_record_dispatch_t dispatch;\n" |
26497 | " ompt_record_task_create_t task_create;\n" |
26498 | " ompt_record_dependences_t dependences;\n" |
26499 | " ompt_record_task_dependence_t task_dependence;\n" |
26500 | " ompt_record_task_schedule_t task_schedule;\n" |
26501 | " ompt_record_implicit_task_t implicit_task;\n" |
26502 | " ompt_record_master_t master;\n" |
26503 | " ompt_record_sync_region_t sync_region;\n" |
26504 | " ompt_record_mutex_acquire_t mutex_acquire;\n" |
26505 | " ompt_record_mutex_t mutex;\n" |
26506 | " ompt_record_nest_lock_t nest_lock;\n" |
26507 | " ompt_record_flush_t flush;\n" |
26508 | " ompt_record_cancel_t cancel;\n" |
26509 | " ompt_record_target_t target;\n" |
26510 | " ompt_record_target_data_op_t target_data_op;\n" |
26511 | " ompt_record_target_map_t target_map;\n" |
26512 | " ompt_record_target_kernel_t target_kernel;\n" |
26513 | " ompt_record_control_tool_t control_tool;\n" |
26514 | " } record;\n" |
26515 | "} ompt_record_ompt_t;\n" |
26516 | "\n" |
26517 | "typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (\n" |
26518 | " ompt_buffer_t *buffer,\n" |
26519 | " ompt_buffer_cursor_t current\n" |
26520 | ");\n" |
26521 | "\n" |
26522 | "#define ompt_id_none 0\n" |
26523 | "#define ompt_data_none {0}\n" |
26524 | "#define ompt_time_none 0\n" |
26525 | "#define ompt_hwid_none 0\n" |
26526 | "#define ompt_addr_none ~0\n" |
26527 | "#define ompt_mutex_impl_none 0\n" |
26528 | "#define ompt_wait_id_none 0\n" |
26529 | "\n" |
26530 | "#define ompd_segment_none 0\n" |
26531 | "\n" |
26532 | "#endif /* __OMPT__ */\n" |
26533 | "" } , |
26534 | { "/builtins/omp.h" , "/*\n" |
26535 | " * include/50/omp.h.var\n" |
26536 | " */\n" |
26537 | "\n" |
26538 | "\n" |
26539 | "//===----------------------------------------------------------------------===//\n" |
26540 | "//\n" |
26541 | "// The LLVM Compiler Infrastructure\n" |
26542 | "//\n" |
26543 | "// This file is dual licensed under the MIT and the University of Illinois Open\n" |
26544 | "// Source Licenses. See LICENSE.txt for details.\n" |
26545 | "//\n" |
26546 | "//===----------------------------------------------------------------------===//\n" |
26547 | "\n" |
26548 | "\n" |
26549 | "#ifndef __OMP_H\n" |
26550 | "# define __OMP_H\n" |
26551 | "\n" |
26552 | "# define KMP_VERSION_MAJOR 5\n" |
26553 | "# define KMP_VERSION_MINOR 0\n" |
26554 | "# define KMP_VERSION_BUILD 20140926\n" |
26555 | "# define KMP_BUILD_DATE \"No_Timestamp\"\n" |
26556 | "\n" |
26557 | "# ifdef __cplusplus\n" |
26558 | " extern \"C\" {\n" |
26559 | "# endif\n" |
26560 | "\n" |
26561 | "# define omp_set_affinity_format ompc_set_affinity_format\n" |
26562 | "# define omp_get_affinity_format ompc_get_affinity_format\n" |
26563 | "# define omp_display_affinity ompc_display_affinity\n" |
26564 | "# define omp_capture_affinity ompc_capture_affinity\n" |
26565 | "\n" |
26566 | "# if defined(_WIN32)\n" |
26567 | "# define __KAI_KMPC_CONVENTION __cdecl\n" |
26568 | "# ifndef __KMP_IMP\n" |
26569 | "# define __KMP_IMP __declspec(dllimport)\n" |
26570 | "# endif\n" |
26571 | "# else\n" |
26572 | "# define __KAI_KMPC_CONVENTION\n" |
26573 | "# ifndef __KMP_IMP\n" |
26574 | "# define __KMP_IMP\n" |
26575 | "# endif\n" |
26576 | "# endif\n" |
26577 | "\n" |
26578 | " /* schedule kind constants */\n" |
26579 | " typedef enum omp_sched_t {\n" |
26580 | " omp_sched_static = 1,\n" |
26581 | " omp_sched_dynamic = 2,\n" |
26582 | " omp_sched_guided = 3,\n" |
26583 | " omp_sched_auto = 4\n" |
26584 | " } omp_sched_t;\n" |
26585 | "\n" |
26586 | " /* set API functions */\n" |
26587 | " extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int);\n" |
26588 | " extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int);\n" |
26589 | " extern void __KAI_KMPC_CONVENTION omp_set_nested (int);\n" |
26590 | " extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int);\n" |
26591 | " extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int);\n" |
26592 | "\n" |
26593 | " /* query API functions */\n" |
26594 | " extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void);\n" |
26595 | " extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void);\n" |
26596 | " extern int __KAI_KMPC_CONVENTION omp_get_nested (void);\n" |
26597 | " extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void);\n" |
26598 | " extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void);\n" |
26599 | " extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void);\n" |
26600 | " extern int __KAI_KMPC_CONVENTION omp_in_parallel (void);\n" |
26601 | " extern int __KAI_KMPC_CONVENTION omp_in_final (void);\n" |
26602 | " extern int __KAI_KMPC_CONVENTION omp_get_active_level (void);\n" |
26603 | " extern int __KAI_KMPC_CONVENTION omp_get_level (void);\n" |
26604 | " extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int);\n" |
26605 | " extern int __KAI_KMPC_CONVENTION omp_get_team_size (int);\n" |
26606 | " extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void);\n" |
26607 | " extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void);\n" |
26608 | " extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *);\n" |
26609 | " extern int __KAI_KMPC_CONVENTION omp_get_max_task_priority (void);\n" |
26610 | "\n" |
26611 | " /* lock API functions */\n" |
26612 | " typedef struct omp_lock_t {\n" |
26613 | " void * _lk;\n" |
26614 | " } omp_lock_t;\n" |
26615 | "\n" |
26616 | " extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *);\n" |
26617 | " extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *);\n" |
26618 | " extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *);\n" |
26619 | " extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *);\n" |
26620 | " extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *);\n" |
26621 | "\n" |
26622 | " /* nested lock API functions */\n" |
26623 | " typedef struct omp_nest_lock_t {\n" |
26624 | " void * _lk;\n" |
26625 | " } omp_nest_lock_t;\n" |
26626 | "\n" |
26627 | " extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *);\n" |
26628 | " extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *);\n" |
26629 | " extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *);\n" |
26630 | " extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *);\n" |
26631 | " extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *);\n" |
26632 | "\n" |
26633 | " /* OpenMP 5.0 Synchronization hints*/\n" |
26634 | " typedef enum omp_sync_hint_t {\n" |
26635 | " omp_sync_hint_none = 0,\n" |
26636 | " omp_lock_hint_none = omp_sync_hint_none,\n" |
26637 | " omp_sync_hint_uncontended = 1,\n" |
26638 | " omp_lock_hint_uncontended = omp_sync_hint_uncontended,\n" |
26639 | " omp_sync_hint_contended = (1<<1),\n" |
26640 | " omp_lock_hint_contended = omp_sync_hint_contended,\n" |
26641 | " omp_sync_hint_nonspeculative = (1<<2),\n" |
26642 | " omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative,\n" |
26643 | " omp_sync_hint_speculative = (1<<3),\n" |
26644 | " omp_lock_hint_speculative = omp_sync_hint_speculative,\n" |
26645 | " kmp_lock_hint_hle = (1<<16),\n" |
26646 | " kmp_lock_hint_rtm = (1<<17),\n" |
26647 | " kmp_lock_hint_adaptive = (1<<18)\n" |
26648 | " } omp_sync_hint_t;\n" |
26649 | "\n" |
26650 | " /* lock hint type for dynamic user lock */\n" |
26651 | " typedef omp_sync_hint_t omp_lock_hint_t;\n" |
26652 | "\n" |
26653 | " /* hinted lock initializers */\n" |
26654 | " extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t);\n" |
26655 | " extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t);\n" |
26656 | "\n" |
26657 | " /* time API functions */\n" |
26658 | " extern double __KAI_KMPC_CONVENTION omp_get_wtime (void);\n" |
26659 | " extern double __KAI_KMPC_CONVENTION omp_get_wtick (void);\n" |
26660 | "\n" |
26661 | " /* OpenMP 4.0 */\n" |
26662 | " extern int __KAI_KMPC_CONVENTION omp_get_default_device (void);\n" |
26663 | " extern void __KAI_KMPC_CONVENTION omp_set_default_device (int);\n" |
26664 | " extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void);\n" |
26665 | " extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void);\n" |
26666 | " extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void);\n" |
26667 | " extern int __KAI_KMPC_CONVENTION omp_get_team_num (void);\n" |
26668 | " extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void);\n" |
26669 | "\n" |
26670 | "# include <stdlib.h>\n" |
26671 | " /* OpenMP 4.5 */\n" |
26672 | " extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void);\n" |
26673 | " extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int);\n" |
26674 | " extern void __KAI_KMPC_CONVENTION omp_target_free(void *, int);\n" |
26675 | " extern int __KAI_KMPC_CONVENTION omp_target_is_present(void *, int);\n" |
26676 | " extern int __KAI_KMPC_CONVENTION omp_target_memcpy(void *, void *, size_t, size_t, size_t, int, int);\n" |
26677 | " extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect(void *, void *, size_t, int, const size_t *,\n" |
26678 | " const size_t *, const size_t *, const size_t *, const size_t *, int, int);\n" |
26679 | " extern int __KAI_KMPC_CONVENTION omp_target_associate_ptr(void *, void *, size_t, size_t, int);\n" |
26680 | " extern int __KAI_KMPC_CONVENTION omp_target_disassociate_ptr(void *, int);\n" |
26681 | "\n" |
26682 | " /* OpenMP 5.0 */\n" |
26683 | " extern int __KAI_KMPC_CONVENTION omp_get_device_num (void);\n" |
26684 | "\n" |
26685 | " /* kmp API functions */\n" |
26686 | " extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void);\n" |
26687 | " extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int);\n" |
26688 | " extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void);\n" |
26689 | " extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t);\n" |
26690 | " extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void);\n" |
26691 | " extern int __KAI_KMPC_CONVENTION kmp_get_library (void);\n" |
26692 | " extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int);\n" |
26693 | " extern void __KAI_KMPC_CONVENTION kmp_set_library (int);\n" |
26694 | " extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void);\n" |
26695 | " extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void);\n" |
26696 | " extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void);\n" |
26697 | " extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *);\n" |
26698 | " extern void __KAI_KMPC_CONVENTION kmp_set_disp_num_buffers (int);\n" |
26699 | "\n" |
26700 | " /* Intel affinity API */\n" |
26701 | " typedef void * kmp_affinity_mask_t;\n" |
26702 | "\n" |
26703 | " extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *);\n" |
26704 | " extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *);\n" |
26705 | " extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void);\n" |
26706 | " extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *);\n" |
26707 | " extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *);\n" |
26708 | " extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *);\n" |
26709 | " extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);\n" |
26710 | " extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *);\n" |
26711 | "\n" |
26712 | " /* OpenMP 4.0 affinity API */\n" |
26713 | " typedef enum omp_proc_bind_t {\n" |
26714 | " omp_proc_bind_false = 0,\n" |
26715 | " omp_proc_bind_true = 1,\n" |
26716 | " omp_proc_bind_master = 2,\n" |
26717 | " omp_proc_bind_close = 3,\n" |
26718 | " omp_proc_bind_spread = 4\n" |
26719 | " } omp_proc_bind_t;\n" |
26720 | "\n" |
26721 | " extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void);\n" |
26722 | "\n" |
26723 | " /* OpenMP 4.5 affinity API */\n" |
26724 | " extern int __KAI_KMPC_CONVENTION omp_get_num_places (void);\n" |
26725 | " extern int __KAI_KMPC_CONVENTION omp_get_place_num_procs (int);\n" |
26726 | " extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *);\n" |
26727 | " extern int __KAI_KMPC_CONVENTION omp_get_place_num (void);\n" |
26728 | " extern int __KAI_KMPC_CONVENTION omp_get_partition_num_places (void);\n" |
26729 | " extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *);\n" |
26730 | "\n" |
26731 | " extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t);\n" |
26732 | " extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t);\n" |
26733 | " extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t);\n" |
26734 | " extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t);\n" |
26735 | " extern void __KAI_KMPC_CONVENTION kmp_free (void *);\n" |
26736 | "\n" |
26737 | " extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void);\n" |
26738 | " extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void);\n" |
26739 | "\n" |
26740 | " /* OpenMP 5.0 Tool Control */\n" |
26741 | " typedef enum omp_control_tool_result_t {\n" |
26742 | " omp_control_tool_notool = -2,\n" |
26743 | " omp_control_tool_nocallback = -1,\n" |
26744 | " omp_control_tool_success = 0,\n" |
26745 | " omp_control_tool_ignored = 1\n" |
26746 | " } omp_control_tool_result_t;\n" |
26747 | "\n" |
26748 | " typedef enum omp_control_tool_t {\n" |
26749 | " omp_control_tool_start = 1,\n" |
26750 | " omp_control_tool_pause = 2,\n" |
26751 | " omp_control_tool_flush = 3,\n" |
26752 | " omp_control_tool_end = 4\n" |
26753 | " } omp_control_tool_t;\n" |
26754 | " \n" |
26755 | " extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*);\n" |
26756 | "\n" |
26757 | " /* OpenMP 5.0 Memory Management */\n" |
26758 | " typedef void *omp_allocator_t;\n" |
26759 | " extern __KMP_IMP const omp_allocator_t *OMP_NULL_ALLOCATOR;\n" |
26760 | " extern __KMP_IMP const omp_allocator_t *omp_default_mem_alloc;\n" |
26761 | " extern __KMP_IMP const omp_allocator_t *omp_large_cap_mem_alloc;\n" |
26762 | " extern __KMP_IMP const omp_allocator_t *omp_const_mem_alloc;\n" |
26763 | " extern __KMP_IMP const omp_allocator_t *omp_high_bw_mem_alloc;\n" |
26764 | " extern __KMP_IMP const omp_allocator_t *omp_low_lat_mem_alloc;\n" |
26765 | " extern __KMP_IMP const omp_allocator_t *omp_cgroup_mem_alloc;\n" |
26766 | " extern __KMP_IMP const omp_allocator_t *omp_pteam_mem_alloc;\n" |
26767 | " extern __KMP_IMP const omp_allocator_t *omp_thread_mem_alloc;\n" |
26768 | "\n" |
26769 | " extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(const omp_allocator_t *);\n" |
26770 | " extern const omp_allocator_t * __KAI_KMPC_CONVENTION omp_get_default_allocator(void);\n" |
26771 | "#ifdef __cplusplus\n" |
26772 | " extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR);\n" |
26773 | " extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR);\n" |
26774 | "#else\n" |
26775 | " extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator);\n" |
26776 | " extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator);\n" |
26777 | "#endif\n" |
26778 | "\n" |
26779 | " /* OpenMP 5.0 Affinity Format */\n" |
26780 | " extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *);\n" |
26781 | " extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t);\n" |
26782 | " extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *);\n" |
26783 | " extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *);\n" |
26784 | "\n" |
26785 | "# undef __KAI_KMPC_CONVENTION\n" |
26786 | "# undef __KMP_IMP\n" |
26787 | "\n" |
26788 | " /* Warning:\n" |
26789 | " The following typedefs are not standard, deprecated and will be removed in a future release.\n" |
26790 | " */\n" |
26791 | " typedef int omp_int_t;\n" |
26792 | " typedef double omp_wtime_t;\n" |
26793 | "\n" |
26794 | "# ifdef __cplusplus\n" |
26795 | " }\n" |
26796 | "# endif\n" |
26797 | "\n" |
26798 | "#endif /* __OMP_H */\n" |
26799 | "" } , |
26800 | { "/builtins/ompt.h" , "/*\n" |
26801 | " * include/50/omp-tools.h.var\n" |
26802 | " */\n" |
26803 | "\n" |
26804 | "//===----------------------------------------------------------------------===//\n" |
26805 | "//\n" |
26806 | "// The LLVM Compiler Infrastructure\n" |
26807 | "//\n" |
26808 | "// This file is dual licensed under the MIT and the University of Illinois Open\n" |
26809 | "// Source Licenses. See LICENSE.txt for details.\n" |
26810 | "//\n" |
26811 | "//===----------------------------------------------------------------------===//\n" |
26812 | "\n" |
26813 | "#ifndef __OMPT__\n" |
26814 | "#define __OMPT__\n" |
26815 | "\n" |
26816 | "/*****************************************************************************\n" |
26817 | " * system include files\n" |
26818 | " *****************************************************************************/\n" |
26819 | "\n" |
26820 | "#include <stdint.h>\n" |
26821 | "#include <stddef.h>\n" |
26822 | "\n" |
26823 | "/*****************************************************************************\n" |
26824 | " * iteration macros\n" |
26825 | " *****************************************************************************/\n" |
26826 | "\n" |
26827 | "#define FOREACH_OMPT_INQUIRY_FN(macro) \\\n" |
26828 | " macro (ompt_enumerate_states) \\\n" |
26829 | " macro (ompt_enumerate_mutex_impls) \\\n" |
26830 | " \\\n" |
26831 | " macro (ompt_set_callback) \\\n" |
26832 | " macro (ompt_get_callback) \\\n" |
26833 | " \\\n" |
26834 | " macro (ompt_get_state) \\\n" |
26835 | " \\\n" |
26836 | " macro (ompt_get_parallel_info) \\\n" |
26837 | " macro (ompt_get_task_info) \\\n" |
26838 | " macro (ompt_get_task_memory) \\\n" |
26839 | " macro (ompt_get_thread_data) \\\n" |
26840 | " macro (ompt_get_unique_id) \\\n" |
26841 | " macro (ompt_finalize_tool) \\\n" |
26842 | " \\\n" |
26843 | " macro(ompt_get_num_procs) \\\n" |
26844 | " macro(ompt_get_num_places) \\\n" |
26845 | " macro(ompt_get_place_proc_ids) \\\n" |
26846 | " macro(ompt_get_place_num) \\\n" |
26847 | " macro(ompt_get_partition_place_nums) \\\n" |
26848 | " macro(ompt_get_proc_id) \\\n" |
26849 | " \\\n" |
26850 | " macro(ompt_get_target_info) \\\n" |
26851 | " macro(ompt_get_num_devices)\n" |
26852 | "\n" |
26853 | "#define FOREACH_OMPT_STATE(macro) \\\n" |
26854 | " \\\n" |
26855 | " /* first available state */ \\\n" |
26856 | " macro (ompt_state_undefined, 0x102) /* undefined thread state */ \\\n" |
26857 | " \\\n" |
26858 | " /* work states (0..15) */ \\\n" |
26859 | " macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \\\n" |
26860 | " macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \\\n" |
26861 | " macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \\\n" |
26862 | " \\\n" |
26863 | " /* barrier wait states (16..31) */ \\\n" |
26864 | " macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \\\n" |
26865 | " macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \\\n" |
26866 | " /* implicit barrier at the end of parallel region */\\\n" |
26867 | " macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \\\n" |
26868 | " /* implicit barrier at the end of worksharing */ \\\n" |
26869 | " macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \\\n" |
26870 | " macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \\\n" |
26871 | " \\\n" |
26872 | " /* task wait states (32..63) */ \\\n" |
26873 | " macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \\\n" |
26874 | " macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \\\n" |
26875 | " \\\n" |
26876 | " /* mutex wait states (64..127) */ \\\n" |
26877 | " macro (ompt_state_wait_mutex, 0x040) \\\n" |
26878 | " macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \\\n" |
26879 | " macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \\\n" |
26880 | " macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \\\n" |
26881 | " macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \\\n" |
26882 | " \\\n" |
26883 | " /* target wait states (128..255) */ \\\n" |
26884 | " macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \\\n" |
26885 | " macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \\\n" |
26886 | " macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \\\n" |
26887 | " \\\n" |
26888 | " /* misc (256..511) */ \\\n" |
26889 | " macro (ompt_state_idle, 0x100) /* waiting for work */ \\\n" |
26890 | " macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \\\n" |
26891 | " \\\n" |
26892 | " /* implementation-specific states (512..) */\n" |
26893 | "\n" |
26894 | "\n" |
26895 | "#define FOREACH_KMP_MUTEX_IMPL(macro) \\\n" |
26896 | " macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \\\n" |
26897 | " macro (kmp_mutex_impl_spin, 1) /* based on spin */ \\\n" |
26898 | " macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \\\n" |
26899 | " macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */\n" |
26900 | "\n" |
26901 | "#define FOREACH_OMPT_EVENT(macro) \\\n" |
26902 | " \\\n" |
26903 | " /*--- Mandatory Events ---*/ \\\n" |
26904 | " macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \\\n" |
26905 | " macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \\\n" |
26906 | " \\\n" |
26907 | " macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \\\n" |
26908 | " macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \\\n" |
26909 | " \\\n" |
26910 | " macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \\\n" |
26911 | " macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \\\n" |
26912 | " macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \\\n" |
26913 | " \\\n" |
26914 | " macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \\\n" |
26915 | " macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \\\n" |
26916 | " macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \\\n" |
26917 | " \\\n" |
26918 | " macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \\\n" |
26919 | " \\\n" |
26920 | " macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \\\n" |
26921 | " macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \\\n" |
26922 | " \\\n" |
26923 | " macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \\\n" |
26924 | " macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \\\n" |
26925 | " \\\n" |
26926 | " /* Optional Events */ \\\n" |
26927 | " macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \\\n" |
26928 | " \\\n" |
26929 | " macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \\\n" |
26930 | " \\\n" |
26931 | " macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \\\n" |
26932 | " macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \\\n" |
26933 | " \\\n" |
26934 | " macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \\\n" |
26935 | " \\\n" |
26936 | " macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \\\n" |
26937 | " \\\n" |
26938 | " macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \\\n" |
26939 | " \\\n" |
26940 | " macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \\\n" |
26941 | " \\\n" |
26942 | " macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \\\n" |
26943 | " macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \\\n" |
26944 | " \\\n" |
26945 | " macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \\\n" |
26946 | " macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \\\n" |
26947 | " \\\n" |
26948 | " macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \\\n" |
26949 | " \\\n" |
26950 | " macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \\\n" |
26951 | " \\\n" |
26952 | " macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \\\n" |
26953 | " \\\n" |
26954 | " macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \\\n" |
26955 | " \\\n" |
26956 | " macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */\n" |
26957 | "\n" |
26958 | "/*****************************************************************************\n" |
26959 | " * implementation specific types\n" |
26960 | " *****************************************************************************/\n" |
26961 | "\n" |
26962 | "typedef enum kmp_mutex_impl_t {\n" |
26963 | "#define kmp_mutex_impl_macro(impl, code) impl = code,\n" |
26964 | " FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)\n" |
26965 | "#undef kmp_mutex_impl_macro\n" |
26966 | "} kmp_mutex_impl_t;\n" |
26967 | "\n" |
26968 | "/*****************************************************************************\n" |
26969 | " * definitions generated from spec\n" |
26970 | " *****************************************************************************/\n" |
26971 | "\n" |
26972 | "typedef enum ompt_callbacks_t {\n" |
26973 | " ompt_callback_thread_begin = 1,\n" |
26974 | " ompt_callback_thread_end = 2,\n" |
26975 | " ompt_callback_parallel_begin = 3,\n" |
26976 | " ompt_callback_parallel_end = 4,\n" |
26977 | " ompt_callback_task_create = 5,\n" |
26978 | " ompt_callback_task_schedule = 6,\n" |
26979 | " ompt_callback_implicit_task = 7,\n" |
26980 | " ompt_callback_target = 8,\n" |
26981 | " ompt_callback_target_data_op = 9,\n" |
26982 | " ompt_callback_target_submit = 10,\n" |
26983 | " ompt_callback_control_tool = 11,\n" |
26984 | " ompt_callback_device_initialize = 12,\n" |
26985 | " ompt_callback_device_finalize = 13,\n" |
26986 | " ompt_callback_device_load = 14,\n" |
26987 | " ompt_callback_device_unload = 15,\n" |
26988 | " ompt_callback_sync_region_wait = 16,\n" |
26989 | " ompt_callback_mutex_released = 17,\n" |
26990 | " ompt_callback_dependences = 18,\n" |
26991 | " ompt_callback_task_dependence = 19,\n" |
26992 | " ompt_callback_work = 20,\n" |
26993 | " ompt_callback_master = 21,\n" |
26994 | " ompt_callback_target_map = 22,\n" |
26995 | " ompt_callback_sync_region = 23,\n" |
26996 | " ompt_callback_lock_init = 24,\n" |
26997 | " ompt_callback_lock_destroy = 25,\n" |
26998 | " ompt_callback_mutex_acquire = 26,\n" |
26999 | " ompt_callback_mutex_acquired = 27,\n" |
27000 | " ompt_callback_nest_lock = 28,\n" |
27001 | " ompt_callback_flush = 29,\n" |
27002 | " ompt_callback_cancel = 30,\n" |
27003 | " ompt_callback_reduction = 31,\n" |
27004 | " ompt_callback_dispatch = 32\n" |
27005 | "} ompt_callbacks_t;\n" |
27006 | "\n" |
27007 | "typedef enum ompt_record_t {\n" |
27008 | " ompt_record_ompt = 1,\n" |
27009 | " ompt_record_native = 2,\n" |
27010 | " ompt_record_invalid = 3\n" |
27011 | "} ompt_record_t;\n" |
27012 | "\n" |
27013 | "typedef enum ompt_record_native_t {\n" |
27014 | " ompt_record_native_info = 1,\n" |
27015 | " ompt_record_native_event = 2\n" |
27016 | "} ompt_record_native_t;\n" |
27017 | "\n" |
27018 | "typedef enum ompt_set_result_t {\n" |
27019 | " ompt_set_error = 0,\n" |
27020 | " ompt_set_never = 1,\n" |
27021 | " ompt_set_impossible = 2,\n" |
27022 | " ompt_set_sometimes = 3,\n" |
27023 | " ompt_set_sometimes_paired = 4,\n" |
27024 | " ompt_set_always = 5\n" |
27025 | "} ompt_set_result_t;\n" |
27026 | "\n" |
27027 | "typedef uint64_t ompt_id_t;\n" |
27028 | "\n" |
27029 | "typedef uint64_t ompt_device_time_t;\n" |
27030 | "\n" |
27031 | "typedef uint64_t ompt_buffer_cursor_t;\n" |
27032 | "\n" |
27033 | "typedef enum ompt_thread_t {\n" |
27034 | " ompt_thread_initial = 1,\n" |
27035 | " ompt_thread_worker = 2,\n" |
27036 | " ompt_thread_other = 3,\n" |
27037 | " ompt_thread_unknown = 4\n" |
27038 | "} ompt_thread_t;\n" |
27039 | "\n" |
27040 | "typedef enum ompt_scope_endpoint_t {\n" |
27041 | " ompt_scope_begin = 1,\n" |
27042 | " ompt_scope_end = 2\n" |
27043 | "} ompt_scope_endpoint_t;\n" |
27044 | "\n" |
27045 | "typedef enum ompt_dispatch_t {\n" |
27046 | " ompt_dispatch_iteration = 1,\n" |
27047 | " ompt_dispatch_section = 2\n" |
27048 | "} ompt_dispatch_t;\n" |
27049 | "\n" |
27050 | "typedef enum ompt_sync_region_t {\n" |
27051 | " ompt_sync_region_barrier = 1,\n" |
27052 | " ompt_sync_region_barrier_implicit = 2,\n" |
27053 | " ompt_sync_region_barrier_explicit = 3,\n" |
27054 | " ompt_sync_region_barrier_implementation = 4,\n" |
27055 | " ompt_sync_region_taskwait = 5,\n" |
27056 | " ompt_sync_region_taskgroup = 6,\n" |
27057 | " ompt_sync_region_reduction = 7\n" |
27058 | "} ompt_sync_region_t;\n" |
27059 | "\n" |
27060 | "typedef enum ompt_target_data_op_t {\n" |
27061 | " ompt_target_data_alloc = 1,\n" |
27062 | " ompt_target_data_transfer_to_device = 2,\n" |
27063 | " ompt_target_data_transfer_from_device = 3,\n" |
27064 | " ompt_target_data_delete = 4,\n" |
27065 | " ompt_target_data_associate = 5,\n" |
27066 | " ompt_target_data_disassociate = 6\n" |
27067 | "} ompt_target_data_op_t;\n" |
27068 | "\n" |
27069 | "typedef enum ompt_work_t {\n" |
27070 | " ompt_work_loop = 1,\n" |
27071 | " ompt_work_sections = 2,\n" |
27072 | " ompt_work_single_executor = 3,\n" |
27073 | " ompt_work_single_other = 4,\n" |
27074 | " ompt_work_workshare = 5,\n" |
27075 | " ompt_work_distribute = 6,\n" |
27076 | " ompt_work_taskloop = 7\n" |
27077 | "} ompt_work_t;\n" |
27078 | "\n" |
27079 | "typedef enum ompt_mutex_t {\n" |
27080 | " ompt_mutex_lock = 1,\n" |
27081 | " ompt_mutex_test_lock = 2,\n" |
27082 | " ompt_mutex_nest_lock = 3,\n" |
27083 | " ompt_mutex_test_nest_lock = 4,\n" |
27084 | " ompt_mutex_critical = 5,\n" |
27085 | " ompt_mutex_atomic = 6,\n" |
27086 | " ompt_mutex_ordered = 7\n" |
27087 | "} ompt_mutex_t;\n" |
27088 | "\n" |
27089 | "typedef enum ompt_native_mon_flag_t {\n" |
27090 | " ompt_native_data_motion_explicit = 0x01,\n" |
27091 | " ompt_native_data_motion_implicit = 0x02,\n" |
27092 | " ompt_native_kernel_invocation = 0x04,\n" |
27093 | " ompt_native_kernel_execution = 0x08,\n" |
27094 | " ompt_native_driver = 0x10,\n" |
27095 | " ompt_native_runtime = 0x20,\n" |
27096 | " ompt_native_overhead = 0x40,\n" |
27097 | " ompt_native_idleness = 0x80\n" |
27098 | "} ompt_native_mon_flag_t;\n" |
27099 | "\n" |
27100 | "typedef enum ompt_task_flag_t {\n" |
27101 | " ompt_task_initial = 0x00000001,\n" |
27102 | " ompt_task_implicit = 0x00000002,\n" |
27103 | " ompt_task_explicit = 0x00000004,\n" |
27104 | " ompt_task_target = 0x00000008,\n" |
27105 | " ompt_task_undeferred = 0x08000000,\n" |
27106 | " ompt_task_untied = 0x10000000,\n" |
27107 | " ompt_task_final = 0x20000000,\n" |
27108 | " ompt_task_mergeable = 0x40000000,\n" |
27109 | " ompt_task_merged = 0x80000000\n" |
27110 | "} ompt_task_flag_t;\n" |
27111 | "\n" |
27112 | "typedef enum ompt_task_status_t {\n" |
27113 | " ompt_task_complete = 1,\n" |
27114 | " ompt_task_yield = 2,\n" |
27115 | " ompt_task_cancel = 3,\n" |
27116 | " ompt_task_detach = 4,\n" |
27117 | " ompt_task_early_fulfill = 5,\n" |
27118 | " ompt_task_late_fulfill = 6,\n" |
27119 | " ompt_task_switch = 7\n" |
27120 | "} ompt_task_status_t;\n" |
27121 | "\n" |
27122 | "typedef enum ompt_target_t {\n" |
27123 | " ompt_target = 1,\n" |
27124 | " ompt_target_enter_data = 2,\n" |
27125 | " ompt_target_exit_data = 3,\n" |
27126 | " ompt_target_update = 4\n" |
27127 | "} ompt_target_t;\n" |
27128 | "\n" |
27129 | "typedef enum ompt_parallel_flag_t {\n" |
27130 | " ompt_parallel_invoker_program = 0x00000001,\n" |
27131 | " ompt_parallel_invoker_runtime = 0x00000002,\n" |
27132 | " ompt_parallel_league = 0x40000000,\n" |
27133 | " ompt_parallel_team = 0x80000000\n" |
27134 | "} ompt_parallel_flag_t;\n" |
27135 | "\n" |
27136 | "typedef enum ompt_target_map_flag_t {\n" |
27137 | " ompt_target_map_flag_to = 0x01,\n" |
27138 | " ompt_target_map_flag_from = 0x02,\n" |
27139 | " ompt_target_map_flag_alloc = 0x04,\n" |
27140 | " ompt_target_map_flag_release = 0x08,\n" |
27141 | " ompt_target_map_flag_delete = 0x10,\n" |
27142 | " ompt_target_map_flag_implicit = 0x20\n" |
27143 | "} ompt_target_map_flag_t;\n" |
27144 | "\n" |
27145 | "typedef enum ompt_dependence_type_t {\n" |
27146 | " ompt_dependence_type_in = 1,\n" |
27147 | " ompt_dependence_type_out = 2,\n" |
27148 | " ompt_dependence_type_inout = 3,\n" |
27149 | " ompt_dependence_type_mutexinoutset = 4,\n" |
27150 | " ompt_dependence_type_source = 5,\n" |
27151 | " ompt_dependence_type_sink = 6\n" |
27152 | "} ompt_dependence_type_t;\n" |
27153 | "\n" |
27154 | "typedef enum ompt_cancel_flag_t {\n" |
27155 | " ompt_cancel_parallel = 0x01,\n" |
27156 | " ompt_cancel_sections = 0x02,\n" |
27157 | " ompt_cancel_loop = 0x04,\n" |
27158 | " ompt_cancel_taskgroup = 0x08,\n" |
27159 | " ompt_cancel_activated = 0x10,\n" |
27160 | " ompt_cancel_detected = 0x20,\n" |
27161 | " ompt_cancel_discarded_task = 0x40\n" |
27162 | "} ompt_cancel_flag_t;\n" |
27163 | "\n" |
27164 | "typedef uint64_t ompt_hwid_t;\n" |
27165 | "\n" |
27166 | "typedef uint64_t ompt_wait_id_t;\n" |
27167 | "\n" |
27168 | "typedef enum ompt_frame_flag_t {\n" |
27169 | " ompt_frame_runtime = 0x00,\n" |
27170 | " ompt_frame_application = 0x01,\n" |
27171 | " ompt_frame_cfa = 0x10,\n" |
27172 | " ompt_frame_framepointer = 0x20,\n" |
27173 | " ompt_frame_stackaddress = 0x30\n" |
27174 | "} ompt_frame_flag_t; \n" |
27175 | "\n" |
27176 | "typedef enum ompt_state_t {\n" |
27177 | " ompt_state_work_serial = 0x000,\n" |
27178 | " ompt_state_work_parallel = 0x001,\n" |
27179 | " ompt_state_work_reduction = 0x002,\n" |
27180 | "\n" |
27181 | " ompt_state_wait_barrier = 0x010,\n" |
27182 | " ompt_state_wait_barrier_implicit_parallel = 0x011,\n" |
27183 | " ompt_state_wait_barrier_implicit_workshare = 0x012,\n" |
27184 | " ompt_state_wait_barrier_implicit = 0x013,\n" |
27185 | " ompt_state_wait_barrier_explicit = 0x014,\n" |
27186 | "\n" |
27187 | " ompt_state_wait_taskwait = 0x020,\n" |
27188 | " ompt_state_wait_taskgroup = 0x021,\n" |
27189 | "\n" |
27190 | " ompt_state_wait_mutex = 0x040,\n" |
27191 | " ompt_state_wait_lock = 0x041,\n" |
27192 | " ompt_state_wait_critical = 0x042,\n" |
27193 | " ompt_state_wait_atomic = 0x043,\n" |
27194 | " ompt_state_wait_ordered = 0x044,\n" |
27195 | "\n" |
27196 | " ompt_state_wait_target = 0x080,\n" |
27197 | " ompt_state_wait_target_map = 0x081,\n" |
27198 | " ompt_state_wait_target_update = 0x082,\n" |
27199 | "\n" |
27200 | " ompt_state_idle = 0x100,\n" |
27201 | " ompt_state_overhead = 0x101,\n" |
27202 | " ompt_state_undefined = 0x102\n" |
27203 | "} ompt_state_t;\n" |
27204 | "\n" |
27205 | "typedef uint64_t (*ompt_get_unique_id_t) (void);\n" |
27206 | "\n" |
27207 | "typedef uint64_t ompd_size_t;\n" |
27208 | "\n" |
27209 | "typedef uint64_t ompd_wait_id_t;\n" |
27210 | "\n" |
27211 | "typedef uint64_t ompd_addr_t;\n" |
27212 | "typedef int64_t ompd_word_t;\n" |
27213 | "typedef uint64_t ompd_seg_t;\n" |
27214 | "\n" |
27215 | "typedef uint64_t ompd_device_t;\n" |
27216 | "\n" |
27217 | "typedef uint64_t ompd_thread_id_t;\n" |
27218 | "\n" |
27219 | "typedef enum ompd_scope_t {\n" |
27220 | " ompd_scope_global = 1,\n" |
27221 | " ompd_scope_address_space = 2,\n" |
27222 | " ompd_scope_thread = 3,\n" |
27223 | " ompd_scope_parallel = 4,\n" |
27224 | " ompd_scope_implicit_task = 5,\n" |
27225 | " ompd_scope_task = 6\n" |
27226 | "} ompd_scope_t;\n" |
27227 | "\n" |
27228 | "typedef uint64_t ompd_icv_id_t;\n" |
27229 | "\n" |
27230 | "typedef enum ompd_rc_t {\n" |
27231 | " ompd_rc_ok = 0,\n" |
27232 | " ompd_rc_unavailable = 1,\n" |
27233 | " ompd_rc_stale_handle = 2,\n" |
27234 | " ompd_rc_bad_input = 3,\n" |
27235 | " ompd_rc_error = 4,\n" |
27236 | " ompd_rc_unsupported = 5,\n" |
27237 | " ompd_rc_needs_state_tracking = 6,\n" |
27238 | " ompd_rc_incompatible = 7,\n" |
27239 | " ompd_rc_device_read_error = 8,\n" |
27240 | " ompd_rc_device_write_error = 9,\n" |
27241 | " ompd_rc_nomem = 10,\n" |
27242 | "} ompd_rc_t;\n" |
27243 | "\n" |
27244 | "typedef void (*ompt_interface_fn_t) (void);\n" |
27245 | "\n" |
27246 | "typedef ompt_interface_fn_t (*ompt_function_lookup_t) (\n" |
27247 | " const char *interface_function_name\n" |
27248 | ");\n" |
27249 | "\n" |
27250 | "typedef union ompt_data_t {\n" |
27251 | " uint64_t value;\n" |
27252 | " void *ptr;\n" |
27253 | "} ompt_data_t;\n" |
27254 | "\n" |
27255 | "typedef struct ompt_frame_t {\n" |
27256 | " ompt_data_t exit_frame;\n" |
27257 | " ompt_data_t enter_frame;\n" |
27258 | " int exit_frame_flags;\n" |
27259 | " int enter_frame_flags;\n" |
27260 | "} ompt_frame_t;\n" |
27261 | "\n" |
27262 | "typedef void (*ompt_callback_t) (void);\n" |
27263 | "\n" |
27264 | "typedef void ompt_device_t;\n" |
27265 | "\n" |
27266 | "typedef void ompt_buffer_t;\n" |
27267 | "\n" |
27268 | "typedef void (*ompt_callback_buffer_request_t) (\n" |
27269 | " int device_num,\n" |
27270 | " ompt_buffer_t **buffer,\n" |
27271 | " size_t *bytes\n" |
27272 | ");\n" |
27273 | "\n" |
27274 | "typedef void (*ompt_callback_buffer_complete_t) (\n" |
27275 | " int device_num,\n" |
27276 | " ompt_buffer_t *buffer,\n" |
27277 | " size_t bytes,\n" |
27278 | " ompt_buffer_cursor_t begin,\n" |
27279 | " int buffer_owned\n" |
27280 | ");\n" |
27281 | "\n" |
27282 | "typedef void (*ompt_finalize_t) (\n" |
27283 | " ompt_data_t *tool_data\n" |
27284 | ");\n" |
27285 | "\n" |
27286 | "typedef int (*ompt_initialize_t) (\n" |
27287 | " ompt_function_lookup_t lookup,\n" |
27288 | " int initial_device_num,\n" |
27289 | " ompt_data_t *tool_data\n" |
27290 | ");\n" |
27291 | "\n" |
27292 | "typedef struct ompt_start_tool_result_t {\n" |
27293 | " ompt_initialize_t initialize;\n" |
27294 | " ompt_finalize_t finalize;\n" |
27295 | " ompt_data_t tool_data;\n" |
27296 | "} ompt_start_tool_result_t;\n" |
27297 | "\n" |
27298 | "typedef struct ompt_record_abstract_t {\n" |
27299 | " ompt_record_native_t rclass;\n" |
27300 | " const char *type;\n" |
27301 | " ompt_device_time_t start_time;\n" |
27302 | " ompt_device_time_t end_time;\n" |
27303 | " ompt_hwid_t hwid;\n" |
27304 | "} ompt_record_abstract_t;\n" |
27305 | "\n" |
27306 | "typedef struct ompt_dependence_t {\n" |
27307 | " ompt_data_t variable;\n" |
27308 | " ompt_dependence_type_t dependence_type;\n" |
27309 | "} ompt_dependence_t;\n" |
27310 | "\n" |
27311 | "typedef int (*ompt_enumerate_states_t) (\n" |
27312 | " int current_state,\n" |
27313 | " int *next_state,\n" |
27314 | " const char **next_state_name\n" |
27315 | ");\n" |
27316 | "\n" |
27317 | "typedef int (*ompt_enumerate_mutex_impls_t) (\n" |
27318 | " int current_impl,\n" |
27319 | " int *next_impl,\n" |
27320 | " const char **next_impl_name\n" |
27321 | ");\n" |
27322 | "\n" |
27323 | "typedef ompt_set_result_t (*ompt_set_callback_t) (\n" |
27324 | " ompt_callbacks_t event,\n" |
27325 | " ompt_callback_t callback\n" |
27326 | ");\n" |
27327 | "\n" |
27328 | "typedef int (*ompt_get_callback_t) (\n" |
27329 | " ompt_callbacks_t event,\n" |
27330 | " ompt_callback_t *callback\n" |
27331 | ");\n" |
27332 | "\n" |
27333 | "typedef ompt_data_t *(*ompt_get_thread_data_t) (void);\n" |
27334 | "\n" |
27335 | "typedef int (*ompt_get_num_procs_t) (void);\n" |
27336 | "\n" |
27337 | "typedef int (*ompt_get_num_places_t) (void);\n" |
27338 | "\n" |
27339 | "typedef int (*ompt_get_place_proc_ids_t) (\n" |
27340 | " int place_num,\n" |
27341 | " int ids_size,\n" |
27342 | " int *ids\n" |
27343 | ");\n" |
27344 | "\n" |
27345 | "typedef int (*ompt_get_place_num_t) (void);\n" |
27346 | "\n" |
27347 | "typedef int (*ompt_get_partition_place_nums_t) (\n" |
27348 | " int place_nums_size,\n" |
27349 | " int *place_nums\n" |
27350 | ");\n" |
27351 | "\n" |
27352 | "typedef int (*ompt_get_proc_id_t) (void);\n" |
27353 | "\n" |
27354 | "typedef int (*ompt_get_state_t) (\n" |
27355 | " ompt_wait_id_t *wait_id\n" |
27356 | ");\n" |
27357 | "\n" |
27358 | "typedef int (*ompt_get_parallel_info_t) (\n" |
27359 | " int ancestor_level,\n" |
27360 | " ompt_data_t **parallel_data,\n" |
27361 | " int *team_size\n" |
27362 | ");\n" |
27363 | "\n" |
27364 | "typedef int (*ompt_get_task_info_t) (\n" |
27365 | " int ancestor_level,\n" |
27366 | " int *flags,\n" |
27367 | " ompt_data_t **task_data,\n" |
27368 | " ompt_frame_t **task_frame,\n" |
27369 | " ompt_data_t **parallel_data,\n" |
27370 | " int *thread_num\n" |
27371 | ");\n" |
27372 | "\n" |
27373 | "typedef int (*ompt_get_task_memory_t)(\n" |
27374 | " void **addr,\n" |
27375 | " size_t *size,\n" |
27376 | " int block\n" |
27377 | ");\n" |
27378 | "\n" |
27379 | "typedef int (*ompt_get_target_info_t) (\n" |
27380 | " uint64_t *device_num,\n" |
27381 | " ompt_id_t *target_id,\n" |
27382 | " ompt_id_t *host_op_id\n" |
27383 | ");\n" |
27384 | "\n" |
27385 | "typedef int (*ompt_get_num_devices_t) (void);\n" |
27386 | "\n" |
27387 | "typedef void (*ompt_finalize_tool_t) (void);\n" |
27388 | "\n" |
27389 | "typedef int (*ompt_get_device_num_procs_t) (\n" |
27390 | " ompt_device_t *device\n" |
27391 | ");\n" |
27392 | "\n" |
27393 | "typedef ompt_device_time_t (*ompt_get_device_time_t) (\n" |
27394 | " ompt_device_t *device\n" |
27395 | ");\n" |
27396 | "\n" |
27397 | "typedef double (*ompt_translate_time_t) (\n" |
27398 | " ompt_device_t *device,\n" |
27399 | " ompt_device_time_t time\n" |
27400 | ");\n" |
27401 | "\n" |
27402 | "typedef ompt_set_result_t (*ompt_set_trace_ompt_t) (\n" |
27403 | " ompt_device_t *device,\n" |
27404 | " unsigned int enable,\n" |
27405 | " unsigned int etype\n" |
27406 | ");\n" |
27407 | "\n" |
27408 | "typedef ompt_set_result_t (*ompt_set_trace_native_t) (\n" |
27409 | " ompt_device_t *device,\n" |
27410 | " int enable,\n" |
27411 | " int flags\n" |
27412 | ");\n" |
27413 | "\n" |
27414 | "typedef int (*ompt_start_trace_t) (\n" |
27415 | " ompt_device_t *device,\n" |
27416 | " ompt_callback_buffer_request_t request,\n" |
27417 | " ompt_callback_buffer_complete_t complete\n" |
27418 | ");\n" |
27419 | "\n" |
27420 | "typedef int (*ompt_pause_trace_t) (\n" |
27421 | " ompt_device_t *device,\n" |
27422 | " int begin_pause\n" |
27423 | ");\n" |
27424 | "\n" |
27425 | "typedef int (*ompt_flush_trace_t) (\n" |
27426 | " ompt_device_t *device\n" |
27427 | ");\n" |
27428 | "\n" |
27429 | "typedef int (*ompt_stop_trace_t) (\n" |
27430 | " ompt_device_t *device\n" |
27431 | ");\n" |
27432 | "\n" |
27433 | "typedef int (*ompt_advance_buffer_cursor_t) (\n" |
27434 | " ompt_device_t *device,\n" |
27435 | " ompt_buffer_t *buffer,\n" |
27436 | " size_t size,\n" |
27437 | " ompt_buffer_cursor_t current,\n" |
27438 | " ompt_buffer_cursor_t *next\n" |
27439 | ");\n" |
27440 | "\n" |
27441 | "typedef ompt_record_t (*ompt_get_record_type_t) (\n" |
27442 | " ompt_buffer_t *buffer,\n" |
27443 | " ompt_buffer_cursor_t current\n" |
27444 | ");\n" |
27445 | "\n" |
27446 | "typedef void *(*ompt_get_record_native_t) (\n" |
27447 | " ompt_buffer_t *buffer,\n" |
27448 | " ompt_buffer_cursor_t current,\n" |
27449 | " ompt_id_t *host_op_id\n" |
27450 | ");\n" |
27451 | "\n" |
27452 | "typedef ompt_record_abstract_t *\n" |
27453 | "(*ompt_get_record_abstract_t) (\n" |
27454 | " void *native_record\n" |
27455 | ");\n" |
27456 | "\n" |
27457 | "typedef void (*ompt_callback_thread_begin_t) (\n" |
27458 | " ompt_thread_t thread_type,\n" |
27459 | " ompt_data_t *thread_data\n" |
27460 | ");\n" |
27461 | "\n" |
27462 | "typedef struct ompt_record_thread_begin_t {\n" |
27463 | " ompt_thread_t thread_type;\n" |
27464 | "} ompt_record_thread_begin_t;\n" |
27465 | "\n" |
27466 | "typedef void (*ompt_callback_thread_end_t) (\n" |
27467 | " ompt_data_t *thread_data\n" |
27468 | ");\n" |
27469 | "\n" |
27470 | "typedef void (*ompt_callback_parallel_begin_t) (\n" |
27471 | " ompt_data_t *encountering_task_data,\n" |
27472 | " const ompt_frame_t *encountering_task_frame,\n" |
27473 | " ompt_data_t *parallel_data,\n" |
27474 | " unsigned int requested_parallelism,\n" |
27475 | " int flags,\n" |
27476 | " const void *codeptr_ra\n" |
27477 | ");\n" |
27478 | "\n" |
27479 | "typedef struct ompt_record_parallel_begin_t {\n" |
27480 | " ompt_id_t encountering_task_id;\n" |
27481 | " ompt_id_t parallel_id;\n" |
27482 | " unsigned int requested_parallelism;\n" |
27483 | " int flags;\n" |
27484 | " const void *codeptr_ra;\n" |
27485 | "} ompt_record_parallel_begin_t;\n" |
27486 | "\n" |
27487 | "typedef void (*ompt_callback_parallel_end_t) (\n" |
27488 | " ompt_data_t *parallel_data,\n" |
27489 | " ompt_data_t *encountering_task_data,\n" |
27490 | " int flags,\n" |
27491 | " const void *codeptr_ra\n" |
27492 | ");\n" |
27493 | "\n" |
27494 | "typedef struct ompt_record_parallel_end_t {\n" |
27495 | " ompt_id_t parallel_id;\n" |
27496 | " ompt_id_t encountering_task_id;\n" |
27497 | " int flags;\n" |
27498 | " const void *codeptr_ra;\n" |
27499 | "} ompt_record_parallel_end_t;\n" |
27500 | "\n" |
27501 | "typedef void (*ompt_callback_work_t) (\n" |
27502 | " ompt_work_t wstype,\n" |
27503 | " ompt_scope_endpoint_t endpoint,\n" |
27504 | " ompt_data_t *parallel_data,\n" |
27505 | " ompt_data_t *task_data,\n" |
27506 | " uint64_t count,\n" |
27507 | " const void *codeptr_ra\n" |
27508 | ");\n" |
27509 | "\n" |
27510 | "typedef struct ompt_record_work_t {\n" |
27511 | " ompt_work_t wstype;\n" |
27512 | " ompt_scope_endpoint_t endpoint;\n" |
27513 | " ompt_id_t parallel_id;\n" |
27514 | " ompt_id_t task_id;\n" |
27515 | " uint64_t count;\n" |
27516 | " const void *codeptr_ra;\n" |
27517 | "} ompt_record_work_t;\n" |
27518 | "\n" |
27519 | "typedef void (*ompt_callback_dispatch_t) (\n" |
27520 | " ompt_data_t *parallel_data,\n" |
27521 | " ompt_data_t *task_data,\n" |
27522 | " ompt_dispatch_t kind,\n" |
27523 | " ompt_data_t instance \n" |
27524 | ");\n" |
27525 | "\n" |
27526 | "typedef struct ompt_record_dispatch_t {\n" |
27527 | " ompt_id_t parallel_id;\n" |
27528 | " ompt_id_t task_id;\n" |
27529 | " ompt_dispatch_t kind;\n" |
27530 | " ompt_data_t instance; \n" |
27531 | "} ompt_record_dispatch_t;\n" |
27532 | "\n" |
27533 | "typedef void (*ompt_callback_task_create_t) (\n" |
27534 | " ompt_data_t *encountering_task_data,\n" |
27535 | " const ompt_frame_t *encountering_task_frame,\n" |
27536 | " ompt_data_t *new_task_data,\n" |
27537 | " int flags,\n" |
27538 | " int has_dependences,\n" |
27539 | " const void *codeptr_ra\n" |
27540 | ");\n" |
27541 | "\n" |
27542 | "typedef struct ompt_record_task_create_t {\n" |
27543 | " ompt_id_t encountering_task_id;\n" |
27544 | " ompt_id_t new_task_id;\n" |
27545 | " int flags;\n" |
27546 | " int has_dependences;\n" |
27547 | " const void *codeptr_ra;\n" |
27548 | "} ompt_record_task_create_t;\n" |
27549 | "\n" |
27550 | "typedef void (*ompt_callback_dependences_t) (\n" |
27551 | " ompt_data_t *task_data,\n" |
27552 | " const ompt_dependence_t *deps,\n" |
27553 | " int ndeps\n" |
27554 | ");\n" |
27555 | "\n" |
27556 | "typedef struct ompt_record_dependences_t {\n" |
27557 | " ompt_id_t task_id;\n" |
27558 | " ompt_dependence_t dep;\n" |
27559 | " int ndeps;\n" |
27560 | "} ompt_record_dependences_t;\n" |
27561 | "\n" |
27562 | "typedef void (*ompt_callback_task_dependence_t) (\n" |
27563 | " ompt_data_t *src_task_data,\n" |
27564 | " ompt_data_t *sink_task_data\n" |
27565 | ");\n" |
27566 | "\n" |
27567 | "typedef struct ompt_record_task_dependence_t {\n" |
27568 | " ompt_id_t src_task_id;\n" |
27569 | " ompt_id_t sink_task_id;\n" |
27570 | "} ompt_record_task_dependence_t;\n" |
27571 | "\n" |
27572 | "typedef void (*ompt_callback_task_schedule_t) (\n" |
27573 | " ompt_data_t *prior_task_data,\n" |
27574 | " ompt_task_status_t prior_task_status,\n" |
27575 | " ompt_data_t *next_task_data\n" |
27576 | ");\n" |
27577 | "\n" |
27578 | "typedef struct ompt_record_task_schedule_t {\n" |
27579 | " ompt_id_t prior_task_id;\n" |
27580 | " ompt_task_status_t prior_task_status;\n" |
27581 | " ompt_id_t next_task_id;\n" |
27582 | "} ompt_record_task_schedule_t;\n" |
27583 | "\n" |
27584 | "typedef void (*ompt_callback_implicit_task_t) (\n" |
27585 | " ompt_scope_endpoint_t endpoint,\n" |
27586 | " ompt_data_t *parallel_data,\n" |
27587 | " ompt_data_t *task_data,\n" |
27588 | " unsigned int actual_parallelism,\n" |
27589 | " unsigned int index,\n" |
27590 | " int flags\n" |
27591 | ");\n" |
27592 | "\n" |
27593 | "typedef struct ompt_record_implicit_task_t {\n" |
27594 | " ompt_scope_endpoint_t endpoint;\n" |
27595 | " ompt_id_t parallel_id;\n" |
27596 | " ompt_id_t task_id;\n" |
27597 | " unsigned int actual_parallelism;\n" |
27598 | " unsigned int index;\n" |
27599 | " int flags;\n" |
27600 | "} ompt_record_implicit_task_t;\n" |
27601 | "\n" |
27602 | "typedef void (*ompt_callback_master_t) (\n" |
27603 | " ompt_scope_endpoint_t endpoint,\n" |
27604 | " ompt_data_t *parallel_data,\n" |
27605 | " ompt_data_t *task_data,\n" |
27606 | " const void *codeptr_ra\n" |
27607 | ");\n" |
27608 | "\n" |
27609 | "typedef struct ompt_record_master_t {\n" |
27610 | " ompt_scope_endpoint_t endpoint;\n" |
27611 | " ompt_id_t parallel_id;\n" |
27612 | " ompt_id_t task_id;\n" |
27613 | " const void *codeptr_ra;\n" |
27614 | "} ompt_record_master_t;\n" |
27615 | "\n" |
27616 | "typedef void (*ompt_callback_sync_region_t) (\n" |
27617 | " ompt_sync_region_t kind,\n" |
27618 | " ompt_scope_endpoint_t endpoint,\n" |
27619 | " ompt_data_t *parallel_data,\n" |
27620 | " ompt_data_t *task_data,\n" |
27621 | " const void *codeptr_ra\n" |
27622 | ");\n" |
27623 | "\n" |
27624 | "typedef struct ompt_record_sync_region_t {\n" |
27625 | " ompt_sync_region_t kind;\n" |
27626 | " ompt_scope_endpoint_t endpoint;\n" |
27627 | " ompt_id_t parallel_id;\n" |
27628 | " ompt_id_t task_id;\n" |
27629 | " const void *codeptr_ra;\n" |
27630 | "} ompt_record_sync_region_t;\n" |
27631 | "\n" |
27632 | "typedef void (*ompt_callback_mutex_acquire_t) (\n" |
27633 | " ompt_mutex_t kind,\n" |
27634 | " unsigned int hint,\n" |
27635 | " unsigned int impl,\n" |
27636 | " ompt_wait_id_t wait_id,\n" |
27637 | " const void *codeptr_ra\n" |
27638 | ");\n" |
27639 | "\n" |
27640 | "typedef struct ompt_record_mutex_acquire_t {\n" |
27641 | " ompt_mutex_t kind;\n" |
27642 | " unsigned int hint;\n" |
27643 | " unsigned int impl;\n" |
27644 | " ompt_wait_id_t wait_id;\n" |
27645 | " const void *codeptr_ra;\n" |
27646 | "} ompt_record_mutex_acquire_t;\n" |
27647 | "\n" |
27648 | "typedef void (*ompt_callback_mutex_t) (\n" |
27649 | " ompt_mutex_t kind,\n" |
27650 | " ompt_wait_id_t wait_id,\n" |
27651 | " const void *codeptr_ra\n" |
27652 | ");\n" |
27653 | "\n" |
27654 | "typedef struct ompt_record_mutex_t {\n" |
27655 | " ompt_mutex_t kind;\n" |
27656 | " ompt_wait_id_t wait_id;\n" |
27657 | " const void *codeptr_ra;\n" |
27658 | "} ompt_record_mutex_t;\n" |
27659 | "\n" |
27660 | "typedef void (*ompt_callback_nest_lock_t) (\n" |
27661 | " ompt_scope_endpoint_t endpoint,\n" |
27662 | " ompt_wait_id_t wait_id,\n" |
27663 | " const void *codeptr_ra\n" |
27664 | ");\n" |
27665 | "\n" |
27666 | "typedef struct ompt_record_nest_lock_t {\n" |
27667 | " ompt_scope_endpoint_t endpoint;\n" |
27668 | " ompt_wait_id_t wait_id;\n" |
27669 | " const void *codeptr_ra;\n" |
27670 | "} ompt_record_nest_lock_t;\n" |
27671 | "\n" |
27672 | "typedef void (*ompt_callback_flush_t) (\n" |
27673 | " ompt_data_t *thread_data,\n" |
27674 | " const void *codeptr_ra\n" |
27675 | ");\n" |
27676 | "\n" |
27677 | "typedef struct ompt_record_flush_t {\n" |
27678 | " const void *codeptr_ra;\n" |
27679 | "} ompt_record_flush_t;\n" |
27680 | "\n" |
27681 | "typedef void (*ompt_callback_cancel_t) (\n" |
27682 | " ompt_data_t *task_data,\n" |
27683 | " int flags,\n" |
27684 | " const void *codeptr_ra\n" |
27685 | ");\n" |
27686 | "\n" |
27687 | "typedef struct ompt_record_cancel_t {\n" |
27688 | " ompt_id_t task_id;\n" |
27689 | " int flags;\n" |
27690 | " const void *codeptr_ra;\n" |
27691 | "} ompt_record_cancel_t;\n" |
27692 | "\n" |
27693 | "typedef void (*ompt_callback_device_initialize_t) (\n" |
27694 | " int device_num,\n" |
27695 | " const char *type,\n" |
27696 | " ompt_device_t *device,\n" |
27697 | " ompt_function_lookup_t lookup,\n" |
27698 | " const char *documentation\n" |
27699 | ");\n" |
27700 | "\n" |
27701 | "typedef void (*ompt_callback_device_finalize_t) (\n" |
27702 | " int device_num\n" |
27703 | ");\n" |
27704 | "\n" |
27705 | "typedef void (*ompt_callback_device_load_t) (\n" |
27706 | " int device_num,\n" |
27707 | " const char *filename,\n" |
27708 | " int64_t offset_in_file,\n" |
27709 | " void *vma_in_file,\n" |
27710 | " size_t bytes,\n" |
27711 | " void *host_addr,\n" |
27712 | " void *device_addr,\n" |
27713 | " uint64_t module_id\n" |
27714 | ");\n" |
27715 | "\n" |
27716 | "typedef void (*ompt_callback_device_unload_t) (\n" |
27717 | " int device_num,\n" |
27718 | " uint64_t module_id\n" |
27719 | ");\n" |
27720 | "\n" |
27721 | "typedef void (*ompt_callback_target_data_op_t) (\n" |
27722 | " ompt_id_t target_id,\n" |
27723 | " ompt_id_t host_op_id,\n" |
27724 | " ompt_target_data_op_t optype,\n" |
27725 | " void *src_addr,\n" |
27726 | " int src_device_num,\n" |
27727 | " void *dest_addr,\n" |
27728 | " int dest_device_num,\n" |
27729 | " size_t bytes,\n" |
27730 | " const void *codeptr_ra\n" |
27731 | ");\n" |
27732 | "\n" |
27733 | "typedef struct ompt_record_target_data_op_t {\n" |
27734 | " ompt_id_t host_op_id;\n" |
27735 | " ompt_target_data_op_t optype;\n" |
27736 | " void *src_addr;\n" |
27737 | " int src_device_num;\n" |
27738 | " void *dest_addr;\n" |
27739 | " int dest_device_num;\n" |
27740 | " size_t bytes;\n" |
27741 | " ompt_device_time_t end_time;\n" |
27742 | " const void *codeptr_ra;\n" |
27743 | "} ompt_record_target_data_op_t;\n" |
27744 | "\n" |
27745 | "typedef void (*ompt_callback_target_t) (\n" |
27746 | " ompt_target_t kind,\n" |
27747 | " ompt_scope_endpoint_t endpoint,\n" |
27748 | " int device_num,\n" |
27749 | " ompt_data_t *task_data,\n" |
27750 | " ompt_id_t target_id,\n" |
27751 | " const void *codeptr_ra\n" |
27752 | ");\n" |
27753 | "\n" |
27754 | "typedef struct ompt_record_target_t {\n" |
27755 | " ompt_target_t kind;\n" |
27756 | " ompt_scope_endpoint_t endpoint;\n" |
27757 | " int device_num;\n" |
27758 | " ompt_id_t task_id;\n" |
27759 | " ompt_id_t target_id;\n" |
27760 | " const void *codeptr_ra;\n" |
27761 | "} ompt_record_target_t;\n" |
27762 | "\n" |
27763 | "typedef void (*ompt_callback_target_map_t) (\n" |
27764 | " ompt_id_t target_id,\n" |
27765 | " unsigned int nitems,\n" |
27766 | " void **host_addr,\n" |
27767 | " void **device_addr,\n" |
27768 | " size_t *bytes,\n" |
27769 | " unsigned int *mapping_flags,\n" |
27770 | " const void *codeptr_ra\n" |
27771 | ");\n" |
27772 | "\n" |
27773 | "typedef struct ompt_record_target_map_t {\n" |
27774 | " ompt_id_t target_id;\n" |
27775 | " unsigned int nitems;\n" |
27776 | " void **host_addr;\n" |
27777 | " void **device_addr;\n" |
27778 | " size_t *bytes;\n" |
27779 | " unsigned int *mapping_flags;\n" |
27780 | " const void *codeptr_ra;\n" |
27781 | "} ompt_record_target_map_t;\n" |
27782 | "\n" |
27783 | "typedef void (*ompt_callback_target_submit_t) (\n" |
27784 | " ompt_id_t target_id,\n" |
27785 | " ompt_id_t host_op_id,\n" |
27786 | " unsigned int requested_num_teams\n" |
27787 | ");\n" |
27788 | "\n" |
27789 | "typedef struct ompt_record_target_kernel_t {\n" |
27790 | " ompt_id_t host_op_id;\n" |
27791 | " unsigned int requested_num_teams;\n" |
27792 | " unsigned int granted_num_teams;\n" |
27793 | " ompt_device_time_t end_time;\n" |
27794 | "} ompt_record_target_kernel_t;\n" |
27795 | "\n" |
27796 | "typedef int (*ompt_callback_control_tool_t) (\n" |
27797 | " uint64_t command,\n" |
27798 | " uint64_t modifier,\n" |
27799 | " void *arg,\n" |
27800 | " const void *codeptr_ra\n" |
27801 | ");\n" |
27802 | "\n" |
27803 | "typedef struct ompt_record_control_tool_t {\n" |
27804 | " uint64_t command;\n" |
27805 | " uint64_t modifier;\n" |
27806 | " const void *codeptr_ra;\n" |
27807 | "} ompt_record_control_tool_t;\n" |
27808 | "\n" |
27809 | "typedef struct ompd_address_t {\n" |
27810 | " ompd_seg_t segment;\n" |
27811 | " ompd_addr_t address;\n" |
27812 | "} ompd_address_t;\n" |
27813 | "\n" |
27814 | "typedef struct ompd_frame_info_t {\n" |
27815 | " ompd_address_t frame_address;\n" |
27816 | " ompd_word_t frame_flag;\n" |
27817 | "} ompd_frame_info_t;\n" |
27818 | "\n" |
27819 | "typedef struct _ompd_aspace_handle ompd_address_space_handle_t;\n" |
27820 | "typedef struct _ompd_thread_handle ompd_thread_handle_t;\n" |
27821 | "typedef struct _ompd_parallel_handle ompd_parallel_handle_t;\n" |
27822 | "typedef struct _ompd_task_handle ompd_task_handle_t;\n" |
27823 | "\n" |
27824 | "typedef struct _ompd_aspace_cont ompd_address_space_context_t;\n" |
27825 | "typedef struct _ompd_thread_cont ompd_thread_context_t;\n" |
27826 | "\n" |
27827 | "typedef struct ompd_device_type_sizes_t {\n" |
27828 | " uint8_t sizeof_char;\n" |
27829 | " uint8_t sizeof_short;\n" |
27830 | " uint8_t sizeof_int;\n" |
27831 | " uint8_t sizeof_long;\n" |
27832 | " uint8_t sizeof_long_long;\n" |
27833 | " uint8_t sizeof_pointer;\n" |
27834 | "} ompd_device_type_sizes_t;\n" |
27835 | "\n" |
27836 | "typedef struct ompt_record_ompt_t {\n" |
27837 | " ompt_callbacks_t type;\n" |
27838 | " ompt_device_time_t time;\n" |
27839 | " ompt_id_t thread_id;\n" |
27840 | " ompt_id_t target_id;\n" |
27841 | " union {\n" |
27842 | " ompt_record_thread_begin_t thread_begin;\n" |
27843 | " ompt_record_parallel_begin_t parallel_begin;\n" |
27844 | " ompt_record_parallel_end_t parallel_end;\n" |
27845 | " ompt_record_work_t work;\n" |
27846 | " ompt_record_dispatch_t dispatch;\n" |
27847 | " ompt_record_task_create_t task_create;\n" |
27848 | " ompt_record_dependences_t dependences;\n" |
27849 | " ompt_record_task_dependence_t task_dependence;\n" |
27850 | " ompt_record_task_schedule_t task_schedule;\n" |
27851 | " ompt_record_implicit_task_t implicit_task;\n" |
27852 | " ompt_record_master_t master;\n" |
27853 | " ompt_record_sync_region_t sync_region;\n" |
27854 | " ompt_record_mutex_acquire_t mutex_acquire;\n" |
27855 | " ompt_record_mutex_t mutex;\n" |
27856 | " ompt_record_nest_lock_t nest_lock;\n" |
27857 | " ompt_record_flush_t flush;\n" |
27858 | " ompt_record_cancel_t cancel;\n" |
27859 | " ompt_record_target_t target;\n" |
27860 | " ompt_record_target_data_op_t target_data_op;\n" |
27861 | " ompt_record_target_map_t target_map;\n" |
27862 | " ompt_record_target_kernel_t target_kernel;\n" |
27863 | " ompt_record_control_tool_t control_tool;\n" |
27864 | " } record;\n" |
27865 | "} ompt_record_ompt_t;\n" |
27866 | "\n" |
27867 | "typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (\n" |
27868 | " ompt_buffer_t *buffer,\n" |
27869 | " ompt_buffer_cursor_t current\n" |
27870 | ");\n" |
27871 | "\n" |
27872 | "#define ompt_id_none 0\n" |
27873 | "#define ompt_data_none {0}\n" |
27874 | "#define ompt_time_none 0\n" |
27875 | "#define ompt_hwid_none 0\n" |
27876 | "#define ompt_addr_none ~0\n" |
27877 | "#define ompt_mutex_impl_none 0\n" |
27878 | "#define ompt_wait_id_none 0\n" |
27879 | "\n" |
27880 | "#define ompd_segment_none 0\n" |
27881 | "\n" |
27882 | "#endif /* __OMPT__ */\n" |
27883 | "" } , |
27884 | { "/builtins/opencl-c.h" , "//===--- opencl-c.h - OpenCL C language builtin function header -----------===//\n" |
27885 | "//\n" |
27886 | "// The LLVM Compiler Infrastructure\n" |
27887 | "//\n" |
27888 | "// This file is distributed under the University of Illinois Open Source\n" |
27889 | "// License. See LICENSE.TXT for details.\n" |
27890 | "//\n" |
27891 | "//===----------------------------------------------------------------------===//\n" |
27892 | "\n" |
27893 | "#ifndef _OPENCL_H_\n" |
27894 | "#define _OPENCL_H_\n" |
27895 | "\n" |
27896 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
27897 | "#ifndef cl_khr_depth_images\n" |
27898 | "#define cl_khr_depth_images\n" |
27899 | "#endif //cl_khr_depth_images\n" |
27900 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
27901 | "\n" |
27902 | "#if __OPENCL_C_VERSION__ < CL_VERSION_2_0\n" |
27903 | "#ifdef cl_khr_3d_image_writes\n" |
27904 | "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n" |
27905 | "#endif //cl_khr_3d_image_writes\n" |
27906 | "#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0\n" |
27907 | "\n" |
27908 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
27909 | "#ifndef cl_intel_planar_yuv\n" |
27910 | "#define cl_intel_planar_yuv\n" |
27911 | "#endif // cl_intel_planar_yuv\n" |
27912 | "#pragma OPENCL EXTENSION cl_intel_planar_yuv : begin\n" |
27913 | "#pragma OPENCL EXTENSION cl_intel_planar_yuv : end\n" |
27914 | "#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
27915 | "\n" |
27916 | "#define __ovld __attribute__((overloadable))\n" |
27917 | "#define __conv __attribute__((convergent))\n" |
27918 | "\n" |
27919 | "// Optimizations\n" |
27920 | "#define __purefn __attribute__((pure))\n" |
27921 | "#define __cnfn __attribute__((const))\n" |
27922 | "\n" |
27923 | "// built-in scalar data types:\n" |
27924 | "\n" |
27925 | "/**\n" |
27926 | " * An unsigned 8-bit integer.\n" |
27927 | " */\n" |
27928 | "typedef unsigned char uchar;\n" |
27929 | "\n" |
27930 | "/**\n" |
27931 | " * An unsigned 16-bit integer.\n" |
27932 | " */\n" |
27933 | "typedef unsigned short ushort;\n" |
27934 | "\n" |
27935 | "/**\n" |
27936 | " * An unsigned 32-bit integer.\n" |
27937 | " */\n" |
27938 | "typedef unsigned int uint;\n" |
27939 | "\n" |
27940 | "/**\n" |
27941 | " * An unsigned 64-bit integer.\n" |
27942 | " */\n" |
27943 | "typedef unsigned long ulong;\n" |
27944 | "\n" |
27945 | "/**\n" |
27946 | " * The unsigned integer type of the result of the sizeof operator. This\n" |
27947 | " * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS\n" |
27948 | " * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if\n" |
27949 | " * CL_DEVICE_ADDRESS_BITS is 64-bits.\n" |
27950 | " */\n" |
27951 | "typedef __SIZE_TYPE__ size_t;\n" |
27952 | "\n" |
27953 | "/**\n" |
27954 | " * A signed integer type that is the result of subtracting two pointers.\n" |
27955 | " * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS\n" |
27956 | " * defined in table 4.3 is 32-bits and is a 64-bit signed integer if\n" |
27957 | " * CL_DEVICE_ADDRESS_BITS is 64-bits.\n" |
27958 | " */\n" |
27959 | "typedef __PTRDIFF_TYPE__ ptrdiff_t;\n" |
27960 | "\n" |
27961 | "/**\n" |
27962 | "* A signed integer type with the property that any valid pointer to\n" |
27963 | "* void can be converted to this type, then converted back to pointer\n" |
27964 | "* to void, and the result will compare equal to the original pointer.\n" |
27965 | "*/\n" |
27966 | "typedef __INTPTR_TYPE__ intptr_t;\n" |
27967 | "\n" |
27968 | "/**\n" |
27969 | "* An unsigned integer type with the property that any valid pointer to\n" |
27970 | "* void can be converted to this type, then converted back to pointer\n" |
27971 | "* to void, and the result will compare equal to the original pointer.\n" |
27972 | "*/\n" |
27973 | "typedef __UINTPTR_TYPE__ uintptr_t;\n" |
27974 | "\n" |
27975 | "// built-in vector data types:\n" |
27976 | "typedef char char2 __attribute__((ext_vector_type(2)));\n" |
27977 | "typedef char char3 __attribute__((ext_vector_type(3)));\n" |
27978 | "typedef char char4 __attribute__((ext_vector_type(4)));\n" |
27979 | "typedef char char8 __attribute__((ext_vector_type(8)));\n" |
27980 | "typedef char char16 __attribute__((ext_vector_type(16)));\n" |
27981 | "typedef uchar uchar2 __attribute__((ext_vector_type(2)));\n" |
27982 | "typedef uchar uchar3 __attribute__((ext_vector_type(3)));\n" |
27983 | "typedef uchar uchar4 __attribute__((ext_vector_type(4)));\n" |
27984 | "typedef uchar uchar8 __attribute__((ext_vector_type(8)));\n" |
27985 | "typedef uchar uchar16 __attribute__((ext_vector_type(16)));\n" |
27986 | "typedef short short2 __attribute__((ext_vector_type(2)));\n" |
27987 | "typedef short short3 __attribute__((ext_vector_type(3)));\n" |
27988 | "typedef short short4 __attribute__((ext_vector_type(4)));\n" |
27989 | "typedef short short8 __attribute__((ext_vector_type(8)));\n" |
27990 | "typedef short short16 __attribute__((ext_vector_type(16)));\n" |
27991 | "typedef ushort ushort2 __attribute__((ext_vector_type(2)));\n" |
27992 | "typedef ushort ushort3 __attribute__((ext_vector_type(3)));\n" |
27993 | "typedef ushort ushort4 __attribute__((ext_vector_type(4)));\n" |
27994 | "typedef ushort ushort8 __attribute__((ext_vector_type(8)));\n" |
27995 | "typedef ushort ushort16 __attribute__((ext_vector_type(16)));\n" |
27996 | "typedef int int2 __attribute__((ext_vector_type(2)));\n" |
27997 | "typedef int int3 __attribute__((ext_vector_type(3)));\n" |
27998 | "typedef int int4 __attribute__((ext_vector_type(4)));\n" |
27999 | "typedef int int8 __attribute__((ext_vector_type(8)));\n" |
28000 | "typedef int int16 __attribute__((ext_vector_type(16)));\n" |
28001 | "typedef uint uint2 __attribute__((ext_vector_type(2)));\n" |
28002 | "typedef uint uint3 __attribute__((ext_vector_type(3)));\n" |
28003 | "typedef uint uint4 __attribute__((ext_vector_type(4)));\n" |
28004 | "typedef uint uint8 __attribute__((ext_vector_type(8)));\n" |
28005 | "typedef uint uint16 __attribute__((ext_vector_type(16)));\n" |
28006 | "typedef long long2 __attribute__((ext_vector_type(2)));\n" |
28007 | "typedef long long3 __attribute__((ext_vector_type(3)));\n" |
28008 | "typedef long long4 __attribute__((ext_vector_type(4)));\n" |
28009 | "typedef long long8 __attribute__((ext_vector_type(8)));\n" |
28010 | "typedef long long16 __attribute__((ext_vector_type(16)));\n" |
28011 | "typedef ulong ulong2 __attribute__((ext_vector_type(2)));\n" |
28012 | "typedef ulong ulong3 __attribute__((ext_vector_type(3)));\n" |
28013 | "typedef ulong ulong4 __attribute__((ext_vector_type(4)));\n" |
28014 | "typedef ulong ulong8 __attribute__((ext_vector_type(8)));\n" |
28015 | "typedef ulong ulong16 __attribute__((ext_vector_type(16)));\n" |
28016 | "typedef float float2 __attribute__((ext_vector_type(2)));\n" |
28017 | "typedef float float3 __attribute__((ext_vector_type(3)));\n" |
28018 | "typedef float float4 __attribute__((ext_vector_type(4)));\n" |
28019 | "typedef float float8 __attribute__((ext_vector_type(8)));\n" |
28020 | "typedef float float16 __attribute__((ext_vector_type(16)));\n" |
28021 | "#ifdef cl_khr_fp16\n" |
28022 | "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" |
28023 | "typedef half half2 __attribute__((ext_vector_type(2)));\n" |
28024 | "typedef half half3 __attribute__((ext_vector_type(3)));\n" |
28025 | "typedef half half4 __attribute__((ext_vector_type(4)));\n" |
28026 | "typedef half half8 __attribute__((ext_vector_type(8)));\n" |
28027 | "typedef half half16 __attribute__((ext_vector_type(16)));\n" |
28028 | "#endif\n" |
28029 | "#ifdef cl_khr_fp64\n" |
28030 | "#if __OPENCL_C_VERSION__ < CL_VERSION_1_2\n" |
28031 | "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" |
28032 | "#endif\n" |
28033 | "typedef double double2 __attribute__((ext_vector_type(2)));\n" |
28034 | "typedef double double3 __attribute__((ext_vector_type(3)));\n" |
28035 | "typedef double double4 __attribute__((ext_vector_type(4)));\n" |
28036 | "typedef double double8 __attribute__((ext_vector_type(8)));\n" |
28037 | "typedef double double16 __attribute__((ext_vector_type(16)));\n" |
28038 | "#endif\n" |
28039 | "\n" |
28040 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
28041 | "#define NULL ((void*)0)\n" |
28042 | "#endif\n" |
28043 | "\n" |
28044 | "/**\n" |
28045 | " * Value of maximum non-infinite single-precision floating-point\n" |
28046 | " * number.\n" |
28047 | " */\n" |
28048 | "#define MAXFLOAT 0x1.fffffep127f\n" |
28049 | "\n" |
28050 | "/**\n" |
28051 | " * A positive float constant expression. HUGE_VALF evaluates\n" |
28052 | " * to +infinity. Used as an error value returned by the built-in\n" |
28053 | " * math functions.\n" |
28054 | " */\n" |
28055 | "#define HUGE_VALF (__builtin_huge_valf())\n" |
28056 | "\n" |
28057 | "/**\n" |
28058 | " * A positive double constant expression. HUGE_VAL evaluates\n" |
28059 | " * to +infinity. Used as an error value returned by the built-in\n" |
28060 | " * math functions.\n" |
28061 | " */\n" |
28062 | "#define HUGE_VAL (__builtin_huge_val())\n" |
28063 | "\n" |
28064 | "/**\n" |
28065 | " * A constant expression of type float representing positive or\n" |
28066 | " * unsigned infinity.\n" |
28067 | " */\n" |
28068 | "#define INFINITY (__builtin_inff())\n" |
28069 | "\n" |
28070 | "/**\n" |
28071 | " * A constant expression of type float representing a quiet NaN.\n" |
28072 | " */\n" |
28073 | "#define NAN as_float(INT_MAX)\n" |
28074 | "\n" |
28075 | "#define FP_ILOGB0 INT_MIN\n" |
28076 | "#define FP_ILOGBNAN INT_MAX\n" |
28077 | "\n" |
28078 | "#define FLT_DIG 6\n" |
28079 | "#define FLT_MANT_DIG 24\n" |
28080 | "#define FLT_MAX_10_EXP +38\n" |
28081 | "#define FLT_MAX_EXP +128\n" |
28082 | "#define FLT_MIN_10_EXP -37\n" |
28083 | "#define FLT_MIN_EXP -125\n" |
28084 | "#define FLT_RADIX 2\n" |
28085 | "#define FLT_MAX 0x1.fffffep127f\n" |
28086 | "#define FLT_MIN 0x1.0p-126f\n" |
28087 | "#define FLT_EPSILON 0x1.0p-23f\n" |
28088 | "\n" |
28089 | "#define M_E_F 2.71828182845904523536028747135266250f\n" |
28090 | "#define M_LOG2E_F 1.44269504088896340735992468100189214f\n" |
28091 | "#define M_LOG10E_F 0.434294481903251827651128918916605082f\n" |
28092 | "#define M_LN2_F 0.693147180559945309417232121458176568f\n" |
28093 | "#define M_LN10_F 2.30258509299404568401799145468436421f\n" |
28094 | "#define M_PI_F 3.14159265358979323846264338327950288f\n" |
28095 | "#define M_PI_2_F 1.57079632679489661923132169163975144f\n" |
28096 | "#define M_PI_4_F 0.785398163397448309615660845819875721f\n" |
28097 | "#define M_1_PI_F 0.318309886183790671537767526745028724f\n" |
28098 | "#define M_2_PI_F 0.636619772367581343075535053490057448f\n" |
28099 | "#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f\n" |
28100 | "#define M_SQRT2_F 1.41421356237309504880168872420969808f\n" |
28101 | "#define M_SQRT1_2_F 0.707106781186547524400844362104849039f\n" |
28102 | "\n" |
28103 | "#define DBL_DIG 15\n" |
28104 | "#define DBL_MANT_DIG 53\n" |
28105 | "#define DBL_MAX_10_EXP +308\n" |
28106 | "#define DBL_MAX_EXP +1024\n" |
28107 | "#define DBL_MIN_10_EXP -307\n" |
28108 | "#define DBL_MIN_EXP -1021\n" |
28109 | "#define DBL_RADIX 2\n" |
28110 | "#define DBL_MAX 0x1.fffffffffffffp1023\n" |
28111 | "#define DBL_MIN 0x1.0p-1022\n" |
28112 | "#define DBL_EPSILON 0x1.0p-52\n" |
28113 | "\n" |
28114 | "#define M_E 0x1.5bf0a8b145769p+1\n" |
28115 | "#define M_LOG2E 0x1.71547652b82fep+0\n" |
28116 | "#define M_LOG10E 0x1.bcb7b1526e50ep-2\n" |
28117 | "#define M_LN2 0x1.62e42fefa39efp-1\n" |
28118 | "#define M_LN10 0x1.26bb1bbb55516p+1\n" |
28119 | "#define M_PI 0x1.921fb54442d18p+1\n" |
28120 | "#define M_PI_2 0x1.921fb54442d18p+0\n" |
28121 | "#define M_PI_4 0x1.921fb54442d18p-1\n" |
28122 | "#define M_1_PI 0x1.45f306dc9c883p-2\n" |
28123 | "#define M_2_PI 0x1.45f306dc9c883p-1\n" |
28124 | "#define M_2_SQRTPI 0x1.20dd750429b6dp+0\n" |
28125 | "#define M_SQRT2 0x1.6a09e667f3bcdp+0\n" |
28126 | "#define M_SQRT1_2 0x1.6a09e667f3bcdp-1\n" |
28127 | "\n" |
28128 | "#ifdef cl_khr_fp16\n" |
28129 | "\n" |
28130 | "#define HALF_DIG 3\n" |
28131 | "#define HALF_MANT_DIG 11\n" |
28132 | "#define HALF_MAX_10_EXP +4\n" |
28133 | "#define HALF_MAX_EXP +16\n" |
28134 | "#define HALF_MIN_10_EXP -4\n" |
28135 | "#define HALF_MIN_EXP -13\n" |
28136 | "#define HALF_RADIX 2\n" |
28137 | "#define HALF_MAX ((0x1.ffcp15h))\n" |
28138 | "#define HALF_MIN ((0x1.0p-14h))\n" |
28139 | "#define HALF_EPSILON ((0x1.0p-10h))\n" |
28140 | "\n" |
28141 | "#define M_E_H 2.71828182845904523536028747135266250h\n" |
28142 | "#define M_LOG2E_H 1.44269504088896340735992468100189214h\n" |
28143 | "#define M_LOG10E_H 0.434294481903251827651128918916605082h\n" |
28144 | "#define M_LN2_H 0.693147180559945309417232121458176568h\n" |
28145 | "#define M_LN10_H 2.30258509299404568401799145468436421h\n" |
28146 | "#define M_PI_H 3.14159265358979323846264338327950288h\n" |
28147 | "#define M_PI_2_H 1.57079632679489661923132169163975144h\n" |
28148 | "#define M_PI_4_H 0.785398163397448309615660845819875721h\n" |
28149 | "#define M_1_PI_H 0.318309886183790671537767526745028724h\n" |
28150 | "#define M_2_PI_H 0.636619772367581343075535053490057448h\n" |
28151 | "#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h\n" |
28152 | "#define M_SQRT2_H 1.41421356237309504880168872420969808h\n" |
28153 | "#define M_SQRT1_2_H 0.707106781186547524400844362104849039h\n" |
28154 | "\n" |
28155 | "#endif //cl_khr_fp16\n" |
28156 | "\n" |
28157 | "#define CHAR_BIT 8\n" |
28158 | "#define SCHAR_MAX 127\n" |
28159 | "#define SCHAR_MIN (-128)\n" |
28160 | "#define UCHAR_MAX 255\n" |
28161 | "#define CHAR_MAX SCHAR_MAX\n" |
28162 | "#define CHAR_MIN SCHAR_MIN\n" |
28163 | "#define USHRT_MAX 65535\n" |
28164 | "#define SHRT_MAX 32767\n" |
28165 | "#define SHRT_MIN (-32768)\n" |
28166 | "#define UINT_MAX 0xffffffff\n" |
28167 | "#define INT_MAX 2147483647\n" |
28168 | "#define INT_MIN (-2147483647-1)\n" |
28169 | "#define ULONG_MAX 0xffffffffffffffffUL\n" |
28170 | "#define LONG_MAX 0x7fffffffffffffffL\n" |
28171 | "#define LONG_MIN (-0x7fffffffffffffffL-1)\n" |
28172 | "\n" |
28173 | "// OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions\n" |
28174 | "\n" |
28175 | "char __ovld __cnfn convert_char_rte(char);\n" |
28176 | "char __ovld __cnfn convert_char_sat_rte(char);\n" |
28177 | "char __ovld __cnfn convert_char_rtz(char);\n" |
28178 | "char __ovld __cnfn convert_char_sat_rtz(char);\n" |
28179 | "char __ovld __cnfn convert_char_rtp(char);\n" |
28180 | "char __ovld __cnfn convert_char_sat_rtp(char);\n" |
28181 | "char __ovld __cnfn convert_char_rtn(char);\n" |
28182 | "char __ovld __cnfn convert_char_sat_rtn(char);\n" |
28183 | "char __ovld __cnfn convert_char(char);\n" |
28184 | "char __ovld __cnfn convert_char_sat(char);\n" |
28185 | "char __ovld __cnfn convert_char_rte(uchar);\n" |
28186 | "char __ovld __cnfn convert_char_sat_rte(uchar);\n" |
28187 | "char __ovld __cnfn convert_char_rtz(uchar);\n" |
28188 | "char __ovld __cnfn convert_char_sat_rtz(uchar);\n" |
28189 | "char __ovld __cnfn convert_char_rtp(uchar);\n" |
28190 | "char __ovld __cnfn convert_char_sat_rtp(uchar);\n" |
28191 | "char __ovld __cnfn convert_char_rtn(uchar);\n" |
28192 | "char __ovld __cnfn convert_char_sat_rtn(uchar);\n" |
28193 | "char __ovld __cnfn convert_char(uchar);\n" |
28194 | "char __ovld __cnfn convert_char_sat(uchar);\n" |
28195 | "char __ovld __cnfn convert_char_rte(short);\n" |
28196 | "char __ovld __cnfn convert_char_sat_rte(short);\n" |
28197 | "char __ovld __cnfn convert_char_rtz(short);\n" |
28198 | "char __ovld __cnfn convert_char_sat_rtz(short);\n" |
28199 | "char __ovld __cnfn convert_char_rtp(short);\n" |
28200 | "char __ovld __cnfn convert_char_sat_rtp(short);\n" |
28201 | "char __ovld __cnfn convert_char_rtn(short);\n" |
28202 | "char __ovld __cnfn convert_char_sat_rtn(short);\n" |
28203 | "char __ovld __cnfn convert_char(short);\n" |
28204 | "char __ovld __cnfn convert_char_sat(short);\n" |
28205 | "char __ovld __cnfn convert_char_rte(ushort);\n" |
28206 | "char __ovld __cnfn convert_char_sat_rte(ushort);\n" |
28207 | "char __ovld __cnfn convert_char_rtz(ushort);\n" |
28208 | "char __ovld __cnfn convert_char_sat_rtz(ushort);\n" |
28209 | "char __ovld __cnfn convert_char_rtp(ushort);\n" |
28210 | "char __ovld __cnfn convert_char_sat_rtp(ushort);\n" |
28211 | "char __ovld __cnfn convert_char_rtn(ushort);\n" |
28212 | "char __ovld __cnfn convert_char_sat_rtn(ushort);\n" |
28213 | "char __ovld __cnfn convert_char(ushort);\n" |
28214 | "char __ovld __cnfn convert_char_sat(ushort);\n" |
28215 | "char __ovld __cnfn convert_char_rte(int);\n" |
28216 | "char __ovld __cnfn convert_char_sat_rte(int);\n" |
28217 | "char __ovld __cnfn convert_char_rtz(int);\n" |
28218 | "char __ovld __cnfn convert_char_sat_rtz(int);\n" |
28219 | "char __ovld __cnfn convert_char_rtp(int);\n" |
28220 | "char __ovld __cnfn convert_char_sat_rtp(int);\n" |
28221 | "char __ovld __cnfn convert_char_rtn(int);\n" |
28222 | "char __ovld __cnfn convert_char_sat_rtn(int);\n" |
28223 | "char __ovld __cnfn convert_char(int);\n" |
28224 | "char __ovld __cnfn convert_char_sat(int);\n" |
28225 | "char __ovld __cnfn convert_char_rte(uint);\n" |
28226 | "char __ovld __cnfn convert_char_sat_rte(uint);\n" |
28227 | "char __ovld __cnfn convert_char_rtz(uint);\n" |
28228 | "char __ovld __cnfn convert_char_sat_rtz(uint);\n" |
28229 | "char __ovld __cnfn convert_char_rtp(uint);\n" |
28230 | "char __ovld __cnfn convert_char_sat_rtp(uint);\n" |
28231 | "char __ovld __cnfn convert_char_rtn(uint);\n" |
28232 | "char __ovld __cnfn convert_char_sat_rtn(uint);\n" |
28233 | "char __ovld __cnfn convert_char(uint);\n" |
28234 | "char __ovld __cnfn convert_char_sat(uint);\n" |
28235 | "char __ovld __cnfn convert_char_rte(long);\n" |
28236 | "char __ovld __cnfn convert_char_sat_rte(long);\n" |
28237 | "char __ovld __cnfn convert_char_rtz(long);\n" |
28238 | "char __ovld __cnfn convert_char_sat_rtz(long);\n" |
28239 | "char __ovld __cnfn convert_char_rtp(long);\n" |
28240 | "char __ovld __cnfn convert_char_sat_rtp(long);\n" |
28241 | "char __ovld __cnfn convert_char_rtn(long);\n" |
28242 | "char __ovld __cnfn convert_char_sat_rtn(long);\n" |
28243 | "char __ovld __cnfn convert_char(long);\n" |
28244 | "char __ovld __cnfn convert_char_sat(long);\n" |
28245 | "char __ovld __cnfn convert_char_rte(ulong);\n" |
28246 | "char __ovld __cnfn convert_char_sat_rte(ulong);\n" |
28247 | "char __ovld __cnfn convert_char_rtz(ulong);\n" |
28248 | "char __ovld __cnfn convert_char_sat_rtz(ulong);\n" |
28249 | "char __ovld __cnfn convert_char_rtp(ulong);\n" |
28250 | "char __ovld __cnfn convert_char_sat_rtp(ulong);\n" |
28251 | "char __ovld __cnfn convert_char_rtn(ulong);\n" |
28252 | "char __ovld __cnfn convert_char_sat_rtn(ulong);\n" |
28253 | "char __ovld __cnfn convert_char(ulong);\n" |
28254 | "char __ovld __cnfn convert_char_sat(ulong);\n" |
28255 | "char __ovld __cnfn convert_char_rte(float);\n" |
28256 | "char __ovld __cnfn convert_char_sat_rte(float);\n" |
28257 | "char __ovld __cnfn convert_char_rtz(float);\n" |
28258 | "char __ovld __cnfn convert_char_sat_rtz(float);\n" |
28259 | "char __ovld __cnfn convert_char_rtp(float);\n" |
28260 | "char __ovld __cnfn convert_char_sat_rtp(float);\n" |
28261 | "char __ovld __cnfn convert_char_rtn(float);\n" |
28262 | "char __ovld __cnfn convert_char_sat_rtn(float);\n" |
28263 | "char __ovld __cnfn convert_char(float);\n" |
28264 | "char __ovld __cnfn convert_char_sat(float);\n" |
28265 | "uchar __ovld __cnfn convert_uchar_rte(char);\n" |
28266 | "uchar __ovld __cnfn convert_uchar_sat_rte(char);\n" |
28267 | "uchar __ovld __cnfn convert_uchar_rtz(char);\n" |
28268 | "uchar __ovld __cnfn convert_uchar_sat_rtz(char);\n" |
28269 | "uchar __ovld __cnfn convert_uchar_rtp(char);\n" |
28270 | "uchar __ovld __cnfn convert_uchar_sat_rtp(char);\n" |
28271 | "uchar __ovld __cnfn convert_uchar_rtn(char);\n" |
28272 | "uchar __ovld __cnfn convert_uchar_sat_rtn(char);\n" |
28273 | "uchar __ovld __cnfn convert_uchar(char);\n" |
28274 | "uchar __ovld __cnfn convert_uchar_sat(char);\n" |
28275 | "uchar __ovld __cnfn convert_uchar_rte(uchar);\n" |
28276 | "uchar __ovld __cnfn convert_uchar_sat_rte(uchar);\n" |
28277 | "uchar __ovld __cnfn convert_uchar_rtz(uchar);\n" |
28278 | "uchar __ovld __cnfn convert_uchar_sat_rtz(uchar);\n" |
28279 | "uchar __ovld __cnfn convert_uchar_rtp(uchar);\n" |
28280 | "uchar __ovld __cnfn convert_uchar_sat_rtp(uchar);\n" |
28281 | "uchar __ovld __cnfn convert_uchar_rtn(uchar);\n" |
28282 | "uchar __ovld __cnfn convert_uchar_sat_rtn(uchar);\n" |
28283 | "uchar __ovld __cnfn convert_uchar(uchar);\n" |
28284 | "uchar __ovld __cnfn convert_uchar_sat(uchar);\n" |
28285 | "uchar __ovld __cnfn convert_uchar_rte(short);\n" |
28286 | "uchar __ovld __cnfn convert_uchar_sat_rte(short);\n" |
28287 | "uchar __ovld __cnfn convert_uchar_rtz(short);\n" |
28288 | "uchar __ovld __cnfn convert_uchar_sat_rtz(short);\n" |
28289 | "uchar __ovld __cnfn convert_uchar_rtp(short);\n" |
28290 | "uchar __ovld __cnfn convert_uchar_sat_rtp(short);\n" |
28291 | "uchar __ovld __cnfn convert_uchar_rtn(short);\n" |
28292 | "uchar __ovld __cnfn convert_uchar_sat_rtn(short);\n" |
28293 | "uchar __ovld __cnfn convert_uchar(short);\n" |
28294 | "uchar __ovld __cnfn convert_uchar_sat(short);\n" |
28295 | "uchar __ovld __cnfn convert_uchar_rte(ushort);\n" |
28296 | "uchar __ovld __cnfn convert_uchar_sat_rte(ushort);\n" |
28297 | "uchar __ovld __cnfn convert_uchar_rtz(ushort);\n" |
28298 | "uchar __ovld __cnfn convert_uchar_sat_rtz(ushort);\n" |
28299 | "uchar __ovld __cnfn convert_uchar_rtp(ushort);\n" |
28300 | "uchar __ovld __cnfn convert_uchar_sat_rtp(ushort);\n" |
28301 | "uchar __ovld __cnfn convert_uchar_rtn(ushort);\n" |
28302 | "uchar __ovld __cnfn convert_uchar_sat_rtn(ushort);\n" |
28303 | "uchar __ovld __cnfn convert_uchar(ushort);\n" |
28304 | "uchar __ovld __cnfn convert_uchar_sat(ushort);\n" |
28305 | "uchar __ovld __cnfn convert_uchar_rte(int);\n" |
28306 | "uchar __ovld __cnfn convert_uchar_sat_rte(int);\n" |
28307 | "uchar __ovld __cnfn convert_uchar_rtz(int);\n" |
28308 | "uchar __ovld __cnfn convert_uchar_sat_rtz(int);\n" |
28309 | "uchar __ovld __cnfn convert_uchar_rtp(int);\n" |
28310 | "uchar __ovld __cnfn convert_uchar_sat_rtp(int);\n" |
28311 | "uchar __ovld __cnfn convert_uchar_rtn(int);\n" |
28312 | "uchar __ovld __cnfn convert_uchar_sat_rtn(int);\n" |
28313 | "uchar __ovld __cnfn convert_uchar(int);\n" |
28314 | "uchar __ovld __cnfn convert_uchar_sat(int);\n" |
28315 | "uchar __ovld __cnfn convert_uchar_rte(uint);\n" |
28316 | "uchar __ovld __cnfn convert_uchar_sat_rte(uint);\n" |
28317 | "uchar __ovld __cnfn convert_uchar_rtz(uint);\n" |
28318 | "uchar __ovld __cnfn convert_uchar_sat_rtz(uint);\n" |
28319 | "uchar __ovld __cnfn convert_uchar_rtp(uint);\n" |
28320 | "uchar __ovld __cnfn convert_uchar_sat_rtp(uint);\n" |
28321 | "uchar __ovld __cnfn convert_uchar_rtn(uint);\n" |
28322 | "uchar __ovld __cnfn convert_uchar_sat_rtn(uint);\n" |
28323 | "uchar __ovld __cnfn convert_uchar(uint);\n" |
28324 | "uchar __ovld __cnfn convert_uchar_sat(uint);\n" |
28325 | "uchar __ovld __cnfn convert_uchar_rte(long);\n" |
28326 | "uchar __ovld __cnfn convert_uchar_sat_rte(long);\n" |
28327 | "uchar __ovld __cnfn convert_uchar_rtz(long);\n" |
28328 | "uchar __ovld __cnfn convert_uchar_sat_rtz(long);\n" |
28329 | "uchar __ovld __cnfn convert_uchar_rtp(long);\n" |
28330 | "uchar __ovld __cnfn convert_uchar_sat_rtp(long);\n" |
28331 | "uchar __ovld __cnfn convert_uchar_rtn(long);\n" |
28332 | "uchar __ovld __cnfn convert_uchar_sat_rtn(long);\n" |
28333 | "uchar __ovld __cnfn convert_uchar(long);\n" |
28334 | "uchar __ovld __cnfn convert_uchar_sat(long);\n" |
28335 | "uchar __ovld __cnfn convert_uchar_rte(ulong);\n" |
28336 | "uchar __ovld __cnfn convert_uchar_sat_rte(ulong);\n" |
28337 | "uchar __ovld __cnfn convert_uchar_rtz(ulong);\n" |
28338 | "uchar __ovld __cnfn convert_uchar_sat_rtz(ulong);\n" |
28339 | "uchar __ovld __cnfn convert_uchar_rtp(ulong);\n" |
28340 | "uchar __ovld __cnfn convert_uchar_sat_rtp(ulong);\n" |
28341 | "uchar __ovld __cnfn convert_uchar_rtn(ulong);\n" |
28342 | "uchar __ovld __cnfn convert_uchar_sat_rtn(ulong);\n" |
28343 | "uchar __ovld __cnfn convert_uchar(ulong);\n" |
28344 | "uchar __ovld __cnfn convert_uchar_sat(ulong);\n" |
28345 | "uchar __ovld __cnfn convert_uchar_rte(float);\n" |
28346 | "uchar __ovld __cnfn convert_uchar_sat_rte(float);\n" |
28347 | "uchar __ovld __cnfn convert_uchar_rtz(float);\n" |
28348 | "uchar __ovld __cnfn convert_uchar_sat_rtz(float);\n" |
28349 | "uchar __ovld __cnfn convert_uchar_rtp(float);\n" |
28350 | "uchar __ovld __cnfn convert_uchar_sat_rtp(float);\n" |
28351 | "uchar __ovld __cnfn convert_uchar_rtn(float);\n" |
28352 | "uchar __ovld __cnfn convert_uchar_sat_rtn(float);\n" |
28353 | "uchar __ovld __cnfn convert_uchar(float);\n" |
28354 | "uchar __ovld __cnfn convert_uchar_sat(float);\n" |
28355 | "\n" |
28356 | "short __ovld __cnfn convert_short_rte(char);\n" |
28357 | "short __ovld __cnfn convert_short_sat_rte(char);\n" |
28358 | "short __ovld __cnfn convert_short_rtz(char);\n" |
28359 | "short __ovld __cnfn convert_short_sat_rtz(char);\n" |
28360 | "short __ovld __cnfn convert_short_rtp(char);\n" |
28361 | "short __ovld __cnfn convert_short_sat_rtp(char);\n" |
28362 | "short __ovld __cnfn convert_short_rtn(char);\n" |
28363 | "short __ovld __cnfn convert_short_sat_rtn(char);\n" |
28364 | "short __ovld __cnfn convert_short(char);\n" |
28365 | "short __ovld __cnfn convert_short_sat(char);\n" |
28366 | "short __ovld __cnfn convert_short_rte(uchar);\n" |
28367 | "short __ovld __cnfn convert_short_sat_rte(uchar);\n" |
28368 | "short __ovld __cnfn convert_short_rtz(uchar);\n" |
28369 | "short __ovld __cnfn convert_short_sat_rtz(uchar);\n" |
28370 | "short __ovld __cnfn convert_short_rtp(uchar);\n" |
28371 | "short __ovld __cnfn convert_short_sat_rtp(uchar);\n" |
28372 | "short __ovld __cnfn convert_short_rtn(uchar);\n" |
28373 | "short __ovld __cnfn convert_short_sat_rtn(uchar);\n" |
28374 | "short __ovld __cnfn convert_short(uchar);\n" |
28375 | "short __ovld __cnfn convert_short_sat(uchar);\n" |
28376 | "short __ovld __cnfn convert_short_rte(short);\n" |
28377 | "short __ovld __cnfn convert_short_sat_rte(short);\n" |
28378 | "short __ovld __cnfn convert_short_rtz(short);\n" |
28379 | "short __ovld __cnfn convert_short_sat_rtz(short);\n" |
28380 | "short __ovld __cnfn convert_short_rtp(short);\n" |
28381 | "short __ovld __cnfn convert_short_sat_rtp(short);\n" |
28382 | "short __ovld __cnfn convert_short_rtn(short);\n" |
28383 | "short __ovld __cnfn convert_short_sat_rtn(short);\n" |
28384 | "short __ovld __cnfn convert_short(short);\n" |
28385 | "short __ovld __cnfn convert_short_sat(short);\n" |
28386 | "short __ovld __cnfn convert_short_rte(ushort);\n" |
28387 | "short __ovld __cnfn convert_short_sat_rte(ushort);\n" |
28388 | "short __ovld __cnfn convert_short_rtz(ushort);\n" |
28389 | "short __ovld __cnfn convert_short_sat_rtz(ushort);\n" |
28390 | "short __ovld __cnfn convert_short_rtp(ushort);\n" |
28391 | "short __ovld __cnfn convert_short_sat_rtp(ushort);\n" |
28392 | "short __ovld __cnfn convert_short_rtn(ushort);\n" |
28393 | "short __ovld __cnfn convert_short_sat_rtn(ushort);\n" |
28394 | "short __ovld __cnfn convert_short(ushort);\n" |
28395 | "short __ovld __cnfn convert_short_sat(ushort);\n" |
28396 | "short __ovld __cnfn convert_short_rte(int);\n" |
28397 | "short __ovld __cnfn convert_short_sat_rte(int);\n" |
28398 | "short __ovld __cnfn convert_short_rtz(int);\n" |
28399 | "short __ovld __cnfn convert_short_sat_rtz(int);\n" |
28400 | "short __ovld __cnfn convert_short_rtp(int);\n" |
28401 | "short __ovld __cnfn convert_short_sat_rtp(int);\n" |
28402 | "short __ovld __cnfn convert_short_rtn(int);\n" |
28403 | "short __ovld __cnfn convert_short_sat_rtn(int);\n" |
28404 | "short __ovld __cnfn convert_short(int);\n" |
28405 | "short __ovld __cnfn convert_short_sat(int);\n" |
28406 | "short __ovld __cnfn convert_short_rte(uint);\n" |
28407 | "short __ovld __cnfn convert_short_sat_rte(uint);\n" |
28408 | "short __ovld __cnfn convert_short_rtz(uint);\n" |
28409 | "short __ovld __cnfn convert_short_sat_rtz(uint);\n" |
28410 | "short __ovld __cnfn convert_short_rtp(uint);\n" |
28411 | "short __ovld __cnfn convert_short_sat_rtp(uint);\n" |
28412 | "short __ovld __cnfn convert_short_rtn(uint);\n" |
28413 | "short __ovld __cnfn convert_short_sat_rtn(uint);\n" |
28414 | "short __ovld __cnfn convert_short(uint);\n" |
28415 | "short __ovld __cnfn convert_short_sat(uint);\n" |
28416 | "short __ovld __cnfn convert_short_rte(long);\n" |
28417 | "short __ovld __cnfn convert_short_sat_rte(long);\n" |
28418 | "short __ovld __cnfn convert_short_rtz(long);\n" |
28419 | "short __ovld __cnfn convert_short_sat_rtz(long);\n" |
28420 | "short __ovld __cnfn convert_short_rtp(long);\n" |
28421 | "short __ovld __cnfn convert_short_sat_rtp(long);\n" |
28422 | "short __ovld __cnfn convert_short_rtn(long);\n" |
28423 | "short __ovld __cnfn convert_short_sat_rtn(long);\n" |
28424 | "short __ovld __cnfn convert_short(long);\n" |
28425 | "short __ovld __cnfn convert_short_sat(long);\n" |
28426 | "short __ovld __cnfn convert_short_rte(ulong);\n" |
28427 | "short __ovld __cnfn convert_short_sat_rte(ulong);\n" |
28428 | "short __ovld __cnfn convert_short_rtz(ulong);\n" |
28429 | "short __ovld __cnfn convert_short_sat_rtz(ulong);\n" |
28430 | "short __ovld __cnfn convert_short_rtp(ulong);\n" |
28431 | "short __ovld __cnfn convert_short_sat_rtp(ulong);\n" |
28432 | "short __ovld __cnfn convert_short_rtn(ulong);\n" |
28433 | "short __ovld __cnfn convert_short_sat_rtn(ulong);\n" |
28434 | "short __ovld __cnfn convert_short(ulong);\n" |
28435 | "short __ovld __cnfn convert_short_sat(ulong);\n" |
28436 | "short __ovld __cnfn convert_short_rte(float);\n" |
28437 | "short __ovld __cnfn convert_short_sat_rte(float);\n" |
28438 | "short __ovld __cnfn convert_short_rtz(float);\n" |
28439 | "short __ovld __cnfn convert_short_sat_rtz(float);\n" |
28440 | "short __ovld __cnfn convert_short_rtp(float);\n" |
28441 | "short __ovld __cnfn convert_short_sat_rtp(float);\n" |
28442 | "short __ovld __cnfn convert_short_rtn(float);\n" |
28443 | "short __ovld __cnfn convert_short_sat_rtn(float);\n" |
28444 | "short __ovld __cnfn convert_short(float);\n" |
28445 | "short __ovld __cnfn convert_short_sat(float);\n" |
28446 | "ushort __ovld __cnfn convert_ushort_rte(char);\n" |
28447 | "ushort __ovld __cnfn convert_ushort_sat_rte(char);\n" |
28448 | "ushort __ovld __cnfn convert_ushort_rtz(char);\n" |
28449 | "ushort __ovld __cnfn convert_ushort_sat_rtz(char);\n" |
28450 | "ushort __ovld __cnfn convert_ushort_rtp(char);\n" |
28451 | "ushort __ovld __cnfn convert_ushort_sat_rtp(char);\n" |
28452 | "ushort __ovld __cnfn convert_ushort_rtn(char);\n" |
28453 | "ushort __ovld __cnfn convert_ushort_sat_rtn(char);\n" |
28454 | "ushort __ovld __cnfn convert_ushort(char);\n" |
28455 | "ushort __ovld __cnfn convert_ushort_sat(char);\n" |
28456 | "ushort __ovld __cnfn convert_ushort_rte(uchar);\n" |
28457 | "ushort __ovld __cnfn convert_ushort_sat_rte(uchar);\n" |
28458 | "ushort __ovld __cnfn convert_ushort_rtz(uchar);\n" |
28459 | "ushort __ovld __cnfn convert_ushort_sat_rtz(uchar);\n" |
28460 | "ushort __ovld __cnfn convert_ushort_rtp(uchar);\n" |
28461 | "ushort __ovld __cnfn convert_ushort_sat_rtp(uchar);\n" |
28462 | "ushort __ovld __cnfn convert_ushort_rtn(uchar);\n" |
28463 | "ushort __ovld __cnfn convert_ushort_sat_rtn(uchar);\n" |
28464 | "ushort __ovld __cnfn convert_ushort(uchar);\n" |
28465 | "ushort __ovld __cnfn convert_ushort_sat(uchar);\n" |
28466 | "ushort __ovld __cnfn convert_ushort_rte(short);\n" |
28467 | "ushort __ovld __cnfn convert_ushort_sat_rte(short);\n" |
28468 | "ushort __ovld __cnfn convert_ushort_rtz(short);\n" |
28469 | "ushort __ovld __cnfn convert_ushort_sat_rtz(short);\n" |
28470 | "ushort __ovld __cnfn convert_ushort_rtp(short);\n" |
28471 | "ushort __ovld __cnfn convert_ushort_sat_rtp(short);\n" |
28472 | "ushort __ovld __cnfn convert_ushort_rtn(short);\n" |
28473 | "ushort __ovld __cnfn convert_ushort_sat_rtn(short);\n" |
28474 | "ushort __ovld __cnfn convert_ushort(short);\n" |
28475 | "ushort __ovld __cnfn convert_ushort_sat(short);\n" |
28476 | "ushort __ovld __cnfn convert_ushort_rte(ushort);\n" |
28477 | "ushort __ovld __cnfn convert_ushort_sat_rte(ushort);\n" |
28478 | "ushort __ovld __cnfn convert_ushort_rtz(ushort);\n" |
28479 | "ushort __ovld __cnfn convert_ushort_sat_rtz(ushort);\n" |
28480 | "ushort __ovld __cnfn convert_ushort_rtp(ushort);\n" |
28481 | "ushort __ovld __cnfn convert_ushort_sat_rtp(ushort);\n" |
28482 | "ushort __ovld __cnfn convert_ushort_rtn(ushort);\n" |
28483 | "ushort __ovld __cnfn convert_ushort_sat_rtn(ushort);\n" |
28484 | "ushort __ovld __cnfn convert_ushort(ushort);\n" |
28485 | "ushort __ovld __cnfn convert_ushort_sat(ushort);\n" |
28486 | "ushort __ovld __cnfn convert_ushort_rte(int);\n" |
28487 | "ushort __ovld __cnfn convert_ushort_sat_rte(int);\n" |
28488 | "ushort __ovld __cnfn convert_ushort_rtz(int);\n" |
28489 | "ushort __ovld __cnfn convert_ushort_sat_rtz(int);\n" |
28490 | "ushort __ovld __cnfn convert_ushort_rtp(int);\n" |
28491 | "ushort __ovld __cnfn convert_ushort_sat_rtp(int);\n" |
28492 | "ushort __ovld __cnfn convert_ushort_rtn(int);\n" |
28493 | "ushort __ovld __cnfn convert_ushort_sat_rtn(int);\n" |
28494 | "ushort __ovld __cnfn convert_ushort(int);\n" |
28495 | "ushort __ovld __cnfn convert_ushort_sat(int);\n" |
28496 | "ushort __ovld __cnfn convert_ushort_rte(uint);\n" |
28497 | "ushort __ovld __cnfn convert_ushort_sat_rte(uint);\n" |
28498 | "ushort __ovld __cnfn convert_ushort_rtz(uint);\n" |
28499 | "ushort __ovld __cnfn convert_ushort_sat_rtz(uint);\n" |
28500 | "ushort __ovld __cnfn convert_ushort_rtp(uint);\n" |
28501 | "ushort __ovld __cnfn convert_ushort_sat_rtp(uint);\n" |
28502 | "ushort __ovld __cnfn convert_ushort_rtn(uint);\n" |
28503 | "ushort __ovld __cnfn convert_ushort_sat_rtn(uint);\n" |
28504 | "ushort __ovld __cnfn convert_ushort(uint);\n" |
28505 | "ushort __ovld __cnfn convert_ushort_sat(uint);\n" |
28506 | "ushort __ovld __cnfn convert_ushort_rte(long);\n" |
28507 | "ushort __ovld __cnfn convert_ushort_sat_rte(long);\n" |
28508 | "ushort __ovld __cnfn convert_ushort_rtz(long);\n" |
28509 | "ushort __ovld __cnfn convert_ushort_sat_rtz(long);\n" |
28510 | "ushort __ovld __cnfn convert_ushort_rtp(long);\n" |
28511 | "ushort __ovld __cnfn convert_ushort_sat_rtp(long);\n" |
28512 | "ushort __ovld __cnfn convert_ushort_rtn(long);\n" |
28513 | "ushort __ovld __cnfn convert_ushort_sat_rtn(long);\n" |
28514 | "ushort __ovld __cnfn convert_ushort(long);\n" |
28515 | "ushort __ovld __cnfn convert_ushort_sat(long);\n" |
28516 | "ushort __ovld __cnfn convert_ushort_rte(ulong);\n" |
28517 | "ushort __ovld __cnfn convert_ushort_sat_rte(ulong);\n" |
28518 | "ushort __ovld __cnfn convert_ushort_rtz(ulong);\n" |
28519 | "ushort __ovld __cnfn convert_ushort_sat_rtz(ulong);\n" |
28520 | "ushort __ovld __cnfn convert_ushort_rtp(ulong);\n" |
28521 | "ushort __ovld __cnfn convert_ushort_sat_rtp(ulong);\n" |
28522 | "ushort __ovld __cnfn convert_ushort_rtn(ulong);\n" |
28523 | "ushort __ovld __cnfn convert_ushort_sat_rtn(ulong);\n" |
28524 | "ushort __ovld __cnfn convert_ushort(ulong);\n" |
28525 | "ushort __ovld __cnfn convert_ushort_sat(ulong);\n" |
28526 | "ushort __ovld __cnfn convert_ushort_rte(float);\n" |
28527 | "ushort __ovld __cnfn convert_ushort_sat_rte(float);\n" |
28528 | "ushort __ovld __cnfn convert_ushort_rtz(float);\n" |
28529 | "ushort __ovld __cnfn convert_ushort_sat_rtz(float);\n" |
28530 | "ushort __ovld __cnfn convert_ushort_rtp(float);\n" |
28531 | "ushort __ovld __cnfn convert_ushort_sat_rtp(float);\n" |
28532 | "ushort __ovld __cnfn convert_ushort_rtn(float);\n" |
28533 | "ushort __ovld __cnfn convert_ushort_sat_rtn(float);\n" |
28534 | "ushort __ovld __cnfn convert_ushort(float);\n" |
28535 | "ushort __ovld __cnfn convert_ushort_sat(float);\n" |
28536 | "int __ovld __cnfn convert_int_rte(char);\n" |
28537 | "int __ovld __cnfn convert_int_sat_rte(char);\n" |
28538 | "int __ovld __cnfn convert_int_rtz(char);\n" |
28539 | "int __ovld __cnfn convert_int_sat_rtz(char);\n" |
28540 | "int __ovld __cnfn convert_int_rtp(char);\n" |
28541 | "int __ovld __cnfn convert_int_sat_rtp(char);\n" |
28542 | "int __ovld __cnfn convert_int_rtn(char);\n" |
28543 | "int __ovld __cnfn convert_int_sat_rtn(char);\n" |
28544 | "int __ovld __cnfn convert_int(char);\n" |
28545 | "int __ovld __cnfn convert_int_sat(char);\n" |
28546 | "int __ovld __cnfn convert_int_rte(uchar);\n" |
28547 | "int __ovld __cnfn convert_int_sat_rte(uchar);\n" |
28548 | "int __ovld __cnfn convert_int_rtz(uchar);\n" |
28549 | "int __ovld __cnfn convert_int_sat_rtz(uchar);\n" |
28550 | "int __ovld __cnfn convert_int_rtp(uchar);\n" |
28551 | "int __ovld __cnfn convert_int_sat_rtp(uchar);\n" |
28552 | "int __ovld __cnfn convert_int_rtn(uchar);\n" |
28553 | "int __ovld __cnfn convert_int_sat_rtn(uchar);\n" |
28554 | "int __ovld __cnfn convert_int(uchar);\n" |
28555 | "int __ovld __cnfn convert_int_sat(uchar);\n" |
28556 | "int __ovld __cnfn convert_int_rte(short);\n" |
28557 | "int __ovld __cnfn convert_int_sat_rte(short);\n" |
28558 | "int __ovld __cnfn convert_int_rtz(short);\n" |
28559 | "int __ovld __cnfn convert_int_sat_rtz(short);\n" |
28560 | "int __ovld __cnfn convert_int_rtp(short);\n" |
28561 | "int __ovld __cnfn convert_int_sat_rtp(short);\n" |
28562 | "int __ovld __cnfn convert_int_rtn(short);\n" |
28563 | "int __ovld __cnfn convert_int_sat_rtn(short);\n" |
28564 | "int __ovld __cnfn convert_int(short);\n" |
28565 | "int __ovld __cnfn convert_int_sat(short);\n" |
28566 | "int __ovld __cnfn convert_int_rte(ushort);\n" |
28567 | "int __ovld __cnfn convert_int_sat_rte(ushort);\n" |
28568 | "int __ovld __cnfn convert_int_rtz(ushort);\n" |
28569 | "int __ovld __cnfn convert_int_sat_rtz(ushort);\n" |
28570 | "int __ovld __cnfn convert_int_rtp(ushort);\n" |
28571 | "int __ovld __cnfn convert_int_sat_rtp(ushort);\n" |
28572 | "int __ovld __cnfn convert_int_rtn(ushort);\n" |
28573 | "int __ovld __cnfn convert_int_sat_rtn(ushort);\n" |
28574 | "int __ovld __cnfn convert_int(ushort);\n" |
28575 | "int __ovld __cnfn convert_int_sat(ushort);\n" |
28576 | "int __ovld __cnfn convert_int_rte(int);\n" |
28577 | "int __ovld __cnfn convert_int_sat_rte(int);\n" |
28578 | "int __ovld __cnfn convert_int_rtz(int);\n" |
28579 | "int __ovld __cnfn convert_int_sat_rtz(int);\n" |
28580 | "int __ovld __cnfn convert_int_rtp(int);\n" |
28581 | "int __ovld __cnfn convert_int_sat_rtp(int);\n" |
28582 | "int __ovld __cnfn convert_int_rtn(int);\n" |
28583 | "int __ovld __cnfn convert_int_sat_rtn(int);\n" |
28584 | "int __ovld __cnfn convert_int(int);\n" |
28585 | "int __ovld __cnfn convert_int_sat(int);\n" |
28586 | "int __ovld __cnfn convert_int_rte(uint);\n" |
28587 | "int __ovld __cnfn convert_int_sat_rte(uint);\n" |
28588 | "int __ovld __cnfn convert_int_rtz(uint);\n" |
28589 | "int __ovld __cnfn convert_int_sat_rtz(uint);\n" |
28590 | "int __ovld __cnfn convert_int_rtp(uint);\n" |
28591 | "int __ovld __cnfn convert_int_sat_rtp(uint);\n" |
28592 | "int __ovld __cnfn convert_int_rtn(uint);\n" |
28593 | "int __ovld __cnfn convert_int_sat_rtn(uint);\n" |
28594 | "int __ovld __cnfn convert_int(uint);\n" |
28595 | "int __ovld __cnfn convert_int_sat(uint);\n" |
28596 | "int __ovld __cnfn convert_int_rte(long);\n" |
28597 | "int __ovld __cnfn convert_int_sat_rte(long);\n" |
28598 | "int __ovld __cnfn convert_int_rtz(long);\n" |
28599 | "int __ovld __cnfn convert_int_sat_rtz(long);\n" |
28600 | "int __ovld __cnfn convert_int_rtp(long);\n" |
28601 | "int __ovld __cnfn convert_int_sat_rtp(long);\n" |
28602 | "int __ovld __cnfn convert_int_rtn(long);\n" |
28603 | "int __ovld __cnfn convert_int_sat_rtn(long);\n" |
28604 | "int __ovld __cnfn convert_int(long);\n" |
28605 | "int __ovld __cnfn convert_int_sat(long);\n" |
28606 | "int __ovld __cnfn convert_int_rte(ulong);\n" |
28607 | "int __ovld __cnfn convert_int_sat_rte(ulong);\n" |
28608 | "int __ovld __cnfn convert_int_rtz(ulong);\n" |
28609 | "int __ovld __cnfn convert_int_sat_rtz(ulong);\n" |
28610 | "int __ovld __cnfn convert_int_rtp(ulong);\n" |
28611 | "int __ovld __cnfn convert_int_sat_rtp(ulong);\n" |
28612 | "int __ovld __cnfn convert_int_rtn(ulong);\n" |
28613 | "int __ovld __cnfn convert_int_sat_rtn(ulong);\n" |
28614 | "int __ovld __cnfn convert_int(ulong);\n" |
28615 | "int __ovld __cnfn convert_int_sat(ulong);\n" |
28616 | "int __ovld __cnfn convert_int_rte(float);\n" |
28617 | "int __ovld __cnfn convert_int_sat_rte(float);\n" |
28618 | "int __ovld __cnfn convert_int_rtz(float);\n" |
28619 | "int __ovld __cnfn convert_int_sat_rtz(float);\n" |
28620 | "int __ovld __cnfn convert_int_rtp(float);\n" |
28621 | "int __ovld __cnfn convert_int_sat_rtp(float);\n" |
28622 | "int __ovld __cnfn convert_int_rtn(float);\n" |
28623 | "int __ovld __cnfn convert_int_sat_rtn(float);\n" |
28624 | "int __ovld __cnfn convert_int(float);\n" |
28625 | "int __ovld __cnfn convert_int_sat(float);\n" |
28626 | "uint __ovld __cnfn convert_uint_rte(char);\n" |
28627 | "uint __ovld __cnfn convert_uint_sat_rte(char);\n" |
28628 | "uint __ovld __cnfn convert_uint_rtz(char);\n" |
28629 | "uint __ovld __cnfn convert_uint_sat_rtz(char);\n" |
28630 | "uint __ovld __cnfn convert_uint_rtp(char);\n" |
28631 | "uint __ovld __cnfn convert_uint_sat_rtp(char);\n" |
28632 | "uint __ovld __cnfn convert_uint_rtn(char);\n" |
28633 | "uint __ovld __cnfn convert_uint_sat_rtn(char);\n" |
28634 | "uint __ovld __cnfn convert_uint(char);\n" |
28635 | "uint __ovld __cnfn convert_uint_sat(char);\n" |
28636 | "uint __ovld __cnfn convert_uint_rte(uchar);\n" |
28637 | "uint __ovld __cnfn convert_uint_sat_rte(uchar);\n" |
28638 | "uint __ovld __cnfn convert_uint_rtz(uchar);\n" |
28639 | "uint __ovld __cnfn convert_uint_sat_rtz(uchar);\n" |
28640 | "uint __ovld __cnfn convert_uint_rtp(uchar);\n" |
28641 | "uint __ovld __cnfn convert_uint_sat_rtp(uchar);\n" |
28642 | "uint __ovld __cnfn convert_uint_rtn(uchar);\n" |
28643 | "uint __ovld __cnfn convert_uint_sat_rtn(uchar);\n" |
28644 | "uint __ovld __cnfn convert_uint(uchar);\n" |
28645 | "uint __ovld __cnfn convert_uint_sat(uchar);\n" |
28646 | "uint __ovld __cnfn convert_uint_rte(short);\n" |
28647 | "uint __ovld __cnfn convert_uint_sat_rte(short);\n" |
28648 | "uint __ovld __cnfn convert_uint_rtz(short);\n" |
28649 | "uint __ovld __cnfn convert_uint_sat_rtz(short);\n" |
28650 | "uint __ovld __cnfn convert_uint_rtp(short);\n" |
28651 | "uint __ovld __cnfn convert_uint_sat_rtp(short);\n" |
28652 | "uint __ovld __cnfn convert_uint_rtn(short);\n" |
28653 | "uint __ovld __cnfn convert_uint_sat_rtn(short);\n" |
28654 | "uint __ovld __cnfn convert_uint(short);\n" |
28655 | "uint __ovld __cnfn convert_uint_sat(short);\n" |
28656 | "uint __ovld __cnfn convert_uint_rte(ushort);\n" |
28657 | "uint __ovld __cnfn convert_uint_sat_rte(ushort);\n" |
28658 | "uint __ovld __cnfn convert_uint_rtz(ushort);\n" |
28659 | "uint __ovld __cnfn convert_uint_sat_rtz(ushort);\n" |
28660 | "uint __ovld __cnfn convert_uint_rtp(ushort);\n" |
28661 | "uint __ovld __cnfn convert_uint_sat_rtp(ushort);\n" |
28662 | "uint __ovld __cnfn convert_uint_rtn(ushort);\n" |
28663 | "uint __ovld __cnfn convert_uint_sat_rtn(ushort);\n" |
28664 | "uint __ovld __cnfn convert_uint(ushort);\n" |
28665 | "uint __ovld __cnfn convert_uint_sat(ushort);\n" |
28666 | "uint __ovld __cnfn convert_uint_rte(int);\n" |
28667 | "uint __ovld __cnfn convert_uint_sat_rte(int);\n" |
28668 | "uint __ovld __cnfn convert_uint_rtz(int);\n" |
28669 | "uint __ovld __cnfn convert_uint_sat_rtz(int);\n" |
28670 | "uint __ovld __cnfn convert_uint_rtp(int);\n" |
28671 | "uint __ovld __cnfn convert_uint_sat_rtp(int);\n" |
28672 | "uint __ovld __cnfn convert_uint_rtn(int);\n" |
28673 | "uint __ovld __cnfn convert_uint_sat_rtn(int);\n" |
28674 | "uint __ovld __cnfn convert_uint(int);\n" |
28675 | "uint __ovld __cnfn convert_uint_sat(int);\n" |
28676 | "uint __ovld __cnfn convert_uint_rte(uint);\n" |
28677 | "uint __ovld __cnfn convert_uint_sat_rte(uint);\n" |
28678 | "uint __ovld __cnfn convert_uint_rtz(uint);\n" |
28679 | "uint __ovld __cnfn convert_uint_sat_rtz(uint);\n" |
28680 | "uint __ovld __cnfn convert_uint_rtp(uint);\n" |
28681 | "uint __ovld __cnfn convert_uint_sat_rtp(uint);\n" |
28682 | "uint __ovld __cnfn convert_uint_rtn(uint);\n" |
28683 | "uint __ovld __cnfn convert_uint_sat_rtn(uint);\n" |
28684 | "uint __ovld __cnfn convert_uint(uint);\n" |
28685 | "uint __ovld __cnfn convert_uint_sat(uint);\n" |
28686 | "uint __ovld __cnfn convert_uint_rte(long);\n" |
28687 | "uint __ovld __cnfn convert_uint_sat_rte(long);\n" |
28688 | "uint __ovld __cnfn convert_uint_rtz(long);\n" |
28689 | "uint __ovld __cnfn convert_uint_sat_rtz(long);\n" |
28690 | "uint __ovld __cnfn convert_uint_rtp(long);\n" |
28691 | "uint __ovld __cnfn convert_uint_sat_rtp(long);\n" |
28692 | "uint __ovld __cnfn convert_uint_rtn(long);\n" |
28693 | "uint __ovld __cnfn convert_uint_sat_rtn(long);\n" |
28694 | "uint __ovld __cnfn convert_uint(long);\n" |
28695 | "uint __ovld __cnfn convert_uint_sat(long);\n" |
28696 | "uint __ovld __cnfn convert_uint_rte(ulong);\n" |
28697 | "uint __ovld __cnfn convert_uint_sat_rte(ulong);\n" |
28698 | "uint __ovld __cnfn convert_uint_rtz(ulong);\n" |
28699 | "uint __ovld __cnfn convert_uint_sat_rtz(ulong);\n" |
28700 | "uint __ovld __cnfn convert_uint_rtp(ulong);\n" |
28701 | "uint __ovld __cnfn convert_uint_sat_rtp(ulong);\n" |
28702 | "uint __ovld __cnfn convert_uint_rtn(ulong);\n" |
28703 | "uint __ovld __cnfn convert_uint_sat_rtn(ulong);\n" |
28704 | "uint __ovld __cnfn convert_uint(ulong);\n" |
28705 | "uint __ovld __cnfn convert_uint_sat(ulong);\n" |
28706 | "uint __ovld __cnfn convert_uint_rte(float);\n" |
28707 | "uint __ovld __cnfn convert_uint_sat_rte(float);\n" |
28708 | "uint __ovld __cnfn convert_uint_rtz(float);\n" |
28709 | "uint __ovld __cnfn convert_uint_sat_rtz(float);\n" |
28710 | "uint __ovld __cnfn convert_uint_rtp(float);\n" |
28711 | "uint __ovld __cnfn convert_uint_sat_rtp(float);\n" |
28712 | "uint __ovld __cnfn convert_uint_rtn(float);\n" |
28713 | "uint __ovld __cnfn convert_uint_sat_rtn(float);\n" |
28714 | "uint __ovld __cnfn convert_uint(float);\n" |
28715 | "uint __ovld __cnfn convert_uint_sat(float);\n" |
28716 | "long __ovld __cnfn convert_long_rte(char);\n" |
28717 | "long __ovld __cnfn convert_long_sat_rte(char);\n" |
28718 | "long __ovld __cnfn convert_long_rtz(char);\n" |
28719 | "long __ovld __cnfn convert_long_sat_rtz(char);\n" |
28720 | "long __ovld __cnfn convert_long_rtp(char);\n" |
28721 | "long __ovld __cnfn convert_long_sat_rtp(char);\n" |
28722 | "long __ovld __cnfn convert_long_rtn(char);\n" |
28723 | "long __ovld __cnfn convert_long_sat_rtn(char);\n" |
28724 | "long __ovld __cnfn convert_long(char);\n" |
28725 | "long __ovld __cnfn convert_long_sat(char);\n" |
28726 | "long __ovld __cnfn convert_long_rte(uchar);\n" |
28727 | "long __ovld __cnfn convert_long_sat_rte(uchar);\n" |
28728 | "long __ovld __cnfn convert_long_rtz(uchar);\n" |
28729 | "long __ovld __cnfn convert_long_sat_rtz(uchar);\n" |
28730 | "long __ovld __cnfn convert_long_rtp(uchar);\n" |
28731 | "long __ovld __cnfn convert_long_sat_rtp(uchar);\n" |
28732 | "long __ovld __cnfn convert_long_rtn(uchar);\n" |
28733 | "long __ovld __cnfn convert_long_sat_rtn(uchar);\n" |
28734 | "long __ovld __cnfn convert_long(uchar);\n" |
28735 | "long __ovld __cnfn convert_long_sat(uchar);\n" |
28736 | "long __ovld __cnfn convert_long_rte(short);\n" |
28737 | "long __ovld __cnfn convert_long_sat_rte(short);\n" |
28738 | "long __ovld __cnfn convert_long_rtz(short);\n" |
28739 | "long __ovld __cnfn convert_long_sat_rtz(short);\n" |
28740 | "long __ovld __cnfn convert_long_rtp(short);\n" |
28741 | "long __ovld __cnfn convert_long_sat_rtp(short);\n" |
28742 | "long __ovld __cnfn convert_long_rtn(short);\n" |
28743 | "long __ovld __cnfn convert_long_sat_rtn(short);\n" |
28744 | "long __ovld __cnfn convert_long(short);\n" |
28745 | "long __ovld __cnfn convert_long_sat(short);\n" |
28746 | "long __ovld __cnfn convert_long_rte(ushort);\n" |
28747 | "long __ovld __cnfn convert_long_sat_rte(ushort);\n" |
28748 | "long __ovld __cnfn convert_long_rtz(ushort);\n" |
28749 | "long __ovld __cnfn convert_long_sat_rtz(ushort);\n" |
28750 | "long __ovld __cnfn convert_long_rtp(ushort);\n" |
28751 | "long __ovld __cnfn convert_long_sat_rtp(ushort);\n" |
28752 | "long __ovld __cnfn convert_long_rtn(ushort);\n" |
28753 | "long __ovld __cnfn convert_long_sat_rtn(ushort);\n" |
28754 | "long __ovld __cnfn convert_long(ushort);\n" |
28755 | "long __ovld __cnfn convert_long_sat(ushort);\n" |
28756 | "long __ovld __cnfn convert_long_rte(int);\n" |
28757 | "long __ovld __cnfn convert_long_sat_rte(int);\n" |
28758 | "long __ovld __cnfn convert_long_rtz(int);\n" |
28759 | "long __ovld __cnfn convert_long_sat_rtz(int);\n" |
28760 | "long __ovld __cnfn convert_long_rtp(int);\n" |
28761 | "long __ovld __cnfn convert_long_sat_rtp(int);\n" |
28762 | "long __ovld __cnfn convert_long_rtn(int);\n" |
28763 | "long __ovld __cnfn convert_long_sat_rtn(int);\n" |
28764 | "long __ovld __cnfn convert_long(int);\n" |
28765 | "long __ovld __cnfn convert_long_sat(int);\n" |
28766 | "long __ovld __cnfn convert_long_rte(uint);\n" |
28767 | "long __ovld __cnfn convert_long_sat_rte(uint);\n" |
28768 | "long __ovld __cnfn convert_long_rtz(uint);\n" |
28769 | "long __ovld __cnfn convert_long_sat_rtz(uint);\n" |
28770 | "long __ovld __cnfn convert_long_rtp(uint);\n" |
28771 | "long __ovld __cnfn convert_long_sat_rtp(uint);\n" |
28772 | "long __ovld __cnfn convert_long_rtn(uint);\n" |
28773 | "long __ovld __cnfn convert_long_sat_rtn(uint);\n" |
28774 | "long __ovld __cnfn convert_long(uint);\n" |
28775 | "long __ovld __cnfn convert_long_sat(uint);\n" |
28776 | "long __ovld __cnfn convert_long_rte(long);\n" |
28777 | "long __ovld __cnfn convert_long_sat_rte(long);\n" |
28778 | "long __ovld __cnfn convert_long_rtz(long);\n" |
28779 | "long __ovld __cnfn convert_long_sat_rtz(long);\n" |
28780 | "long __ovld __cnfn convert_long_rtp(long);\n" |
28781 | "long __ovld __cnfn convert_long_sat_rtp(long);\n" |
28782 | "long __ovld __cnfn convert_long_rtn(long);\n" |
28783 | "long __ovld __cnfn convert_long_sat_rtn(long);\n" |
28784 | "long __ovld __cnfn convert_long(long);\n" |
28785 | "long __ovld __cnfn convert_long_sat(long);\n" |
28786 | "long __ovld __cnfn convert_long_rte(ulong);\n" |
28787 | "long __ovld __cnfn convert_long_sat_rte(ulong);\n" |
28788 | "long __ovld __cnfn convert_long_rtz(ulong);\n" |
28789 | "long __ovld __cnfn convert_long_sat_rtz(ulong);\n" |
28790 | "long __ovld __cnfn convert_long_rtp(ulong);\n" |
28791 | "long __ovld __cnfn convert_long_sat_rtp(ulong);\n" |
28792 | "long __ovld __cnfn convert_long_rtn(ulong);\n" |
28793 | "long __ovld __cnfn convert_long_sat_rtn(ulong);\n" |
28794 | "long __ovld __cnfn convert_long(ulong);\n" |
28795 | "long __ovld __cnfn convert_long_sat(ulong);\n" |
28796 | "long __ovld __cnfn convert_long_rte(float);\n" |
28797 | "long __ovld __cnfn convert_long_sat_rte(float);\n" |
28798 | "long __ovld __cnfn convert_long_rtz(float);\n" |
28799 | "long __ovld __cnfn convert_long_sat_rtz(float);\n" |
28800 | "long __ovld __cnfn convert_long_rtp(float);\n" |
28801 | "long __ovld __cnfn convert_long_sat_rtp(float);\n" |
28802 | "long __ovld __cnfn convert_long_rtn(float);\n" |
28803 | "long __ovld __cnfn convert_long_sat_rtn(float);\n" |
28804 | "long __ovld __cnfn convert_long(float);\n" |
28805 | "long __ovld __cnfn convert_long_sat(float);\n" |
28806 | "ulong __ovld __cnfn convert_ulong_rte(char);\n" |
28807 | "ulong __ovld __cnfn convert_ulong_sat_rte(char);\n" |
28808 | "ulong __ovld __cnfn convert_ulong_rtz(char);\n" |
28809 | "ulong __ovld __cnfn convert_ulong_sat_rtz(char);\n" |
28810 | "ulong __ovld __cnfn convert_ulong_rtp(char);\n" |
28811 | "ulong __ovld __cnfn convert_ulong_sat_rtp(char);\n" |
28812 | "ulong __ovld __cnfn convert_ulong_rtn(char);\n" |
28813 | "ulong __ovld __cnfn convert_ulong_sat_rtn(char);\n" |
28814 | "ulong __ovld __cnfn convert_ulong(char);\n" |
28815 | "ulong __ovld __cnfn convert_ulong_sat(char);\n" |
28816 | "ulong __ovld __cnfn convert_ulong_rte(uchar);\n" |
28817 | "ulong __ovld __cnfn convert_ulong_sat_rte(uchar);\n" |
28818 | "ulong __ovld __cnfn convert_ulong_rtz(uchar);\n" |
28819 | "ulong __ovld __cnfn convert_ulong_sat_rtz(uchar);\n" |
28820 | "ulong __ovld __cnfn convert_ulong_rtp(uchar);\n" |
28821 | "ulong __ovld __cnfn convert_ulong_sat_rtp(uchar);\n" |
28822 | "ulong __ovld __cnfn convert_ulong_rtn(uchar);\n" |
28823 | "ulong __ovld __cnfn convert_ulong_sat_rtn(uchar);\n" |
28824 | "ulong __ovld __cnfn convert_ulong(uchar);\n" |
28825 | "ulong __ovld __cnfn convert_ulong_sat(uchar);\n" |
28826 | "ulong __ovld __cnfn convert_ulong_rte(short);\n" |
28827 | "ulong __ovld __cnfn convert_ulong_sat_rte(short);\n" |
28828 | "ulong __ovld __cnfn convert_ulong_rtz(short);\n" |
28829 | "ulong __ovld __cnfn convert_ulong_sat_rtz(short);\n" |
28830 | "ulong __ovld __cnfn convert_ulong_rtp(short);\n" |
28831 | "ulong __ovld __cnfn convert_ulong_sat_rtp(short);\n" |
28832 | "ulong __ovld __cnfn convert_ulong_rtn(short);\n" |
28833 | "ulong __ovld __cnfn convert_ulong_sat_rtn(short);\n" |
28834 | "ulong __ovld __cnfn convert_ulong(short);\n" |
28835 | "ulong __ovld __cnfn convert_ulong_sat(short);\n" |
28836 | "ulong __ovld __cnfn convert_ulong_rte(ushort);\n" |
28837 | "ulong __ovld __cnfn convert_ulong_sat_rte(ushort);\n" |
28838 | "ulong __ovld __cnfn convert_ulong_rtz(ushort);\n" |
28839 | "ulong __ovld __cnfn convert_ulong_sat_rtz(ushort);\n" |
28840 | "ulong __ovld __cnfn convert_ulong_rtp(ushort);\n" |
28841 | "ulong __ovld __cnfn convert_ulong_sat_rtp(ushort);\n" |
28842 | "ulong __ovld __cnfn convert_ulong_rtn(ushort);\n" |
28843 | "ulong __ovld __cnfn convert_ulong_sat_rtn(ushort);\n" |
28844 | "ulong __ovld __cnfn convert_ulong(ushort);\n" |
28845 | "ulong __ovld __cnfn convert_ulong_sat(ushort);\n" |
28846 | "ulong __ovld __cnfn convert_ulong_rte(int);\n" |
28847 | "ulong __ovld __cnfn convert_ulong_sat_rte(int);\n" |
28848 | "ulong __ovld __cnfn convert_ulong_rtz(int);\n" |
28849 | "ulong __ovld __cnfn convert_ulong_sat_rtz(int);\n" |
28850 | "ulong __ovld __cnfn convert_ulong_rtp(int);\n" |
28851 | "ulong __ovld __cnfn convert_ulong_sat_rtp(int);\n" |
28852 | "ulong __ovld __cnfn convert_ulong_rtn(int);\n" |
28853 | "ulong __ovld __cnfn convert_ulong_sat_rtn(int);\n" |
28854 | "ulong __ovld __cnfn convert_ulong(int);\n" |
28855 | "ulong __ovld __cnfn convert_ulong_sat(int);\n" |
28856 | "ulong __ovld __cnfn convert_ulong_rte(uint);\n" |
28857 | "ulong __ovld __cnfn convert_ulong_sat_rte(uint);\n" |
28858 | "ulong __ovld __cnfn convert_ulong_rtz(uint);\n" |
28859 | "ulong __ovld __cnfn convert_ulong_sat_rtz(uint);\n" |
28860 | "ulong __ovld __cnfn convert_ulong_rtp(uint);\n" |
28861 | "ulong __ovld __cnfn convert_ulong_sat_rtp(uint);\n" |
28862 | "ulong __ovld __cnfn convert_ulong_rtn(uint);\n" |
28863 | "ulong __ovld __cnfn convert_ulong_sat_rtn(uint);\n" |
28864 | "ulong __ovld __cnfn convert_ulong(uint);\n" |
28865 | "ulong __ovld __cnfn convert_ulong_sat(uint);\n" |
28866 | "ulong __ovld __cnfn convert_ulong_rte(long);\n" |
28867 | "ulong __ovld __cnfn convert_ulong_sat_rte(long);\n" |
28868 | "ulong __ovld __cnfn convert_ulong_rtz(long);\n" |
28869 | "ulong __ovld __cnfn convert_ulong_sat_rtz(long);\n" |
28870 | "ulong __ovld __cnfn convert_ulong_rtp(long);\n" |
28871 | "ulong __ovld __cnfn convert_ulong_sat_rtp(long);\n" |
28872 | "ulong __ovld __cnfn convert_ulong_rtn(long);\n" |
28873 | "ulong __ovld __cnfn convert_ulong_sat_rtn(long);\n" |
28874 | "ulong __ovld __cnfn convert_ulong(long);\n" |
28875 | "ulong __ovld __cnfn convert_ulong_sat(long);\n" |
28876 | "ulong __ovld __cnfn convert_ulong_rte(ulong);\n" |
28877 | "ulong __ovld __cnfn convert_ulong_sat_rte(ulong);\n" |
28878 | "ulong __ovld __cnfn convert_ulong_rtz(ulong);\n" |
28879 | "ulong __ovld __cnfn convert_ulong_sat_rtz(ulong);\n" |
28880 | "ulong __ovld __cnfn convert_ulong_rtp(ulong);\n" |
28881 | "ulong __ovld __cnfn convert_ulong_sat_rtp(ulong);\n" |
28882 | "ulong __ovld __cnfn convert_ulong_rtn(ulong);\n" |
28883 | "ulong __ovld __cnfn convert_ulong_sat_rtn(ulong);\n" |
28884 | "ulong __ovld __cnfn convert_ulong(ulong);\n" |
28885 | "ulong __ovld __cnfn convert_ulong_sat(ulong);\n" |
28886 | "ulong __ovld __cnfn convert_ulong_rte(float);\n" |
28887 | "ulong __ovld __cnfn convert_ulong_sat_rte(float);\n" |
28888 | "ulong __ovld __cnfn convert_ulong_rtz(float);\n" |
28889 | "ulong __ovld __cnfn convert_ulong_sat_rtz(float);\n" |
28890 | "ulong __ovld __cnfn convert_ulong_rtp(float);\n" |
28891 | "ulong __ovld __cnfn convert_ulong_sat_rtp(float);\n" |
28892 | "ulong __ovld __cnfn convert_ulong_rtn(float);\n" |
28893 | "ulong __ovld __cnfn convert_ulong_sat_rtn(float);\n" |
28894 | "ulong __ovld __cnfn convert_ulong(float);\n" |
28895 | "ulong __ovld __cnfn convert_ulong_sat(float);\n" |
28896 | "float __ovld __cnfn convert_float_rte(char);\n" |
28897 | "float __ovld __cnfn convert_float_rtz(char);\n" |
28898 | "float __ovld __cnfn convert_float_rtp(char);\n" |
28899 | "float __ovld __cnfn convert_float_rtn(char);\n" |
28900 | "float __ovld __cnfn convert_float(char);\n" |
28901 | "float __ovld __cnfn convert_float_rte(uchar);\n" |
28902 | "float __ovld __cnfn convert_float_rtz(uchar);\n" |
28903 | "float __ovld __cnfn convert_float_rtp(uchar);\n" |
28904 | "float __ovld __cnfn convert_float_rtn(uchar);\n" |
28905 | "float __ovld __cnfn convert_float(uchar);\n" |
28906 | "float __ovld __cnfn convert_float_rte(short);\n" |
28907 | "float __ovld __cnfn convert_float_rtz(short);\n" |
28908 | "float __ovld __cnfn convert_float_rtp(short);\n" |
28909 | "float __ovld __cnfn convert_float_rtn(short);\n" |
28910 | "float __ovld __cnfn convert_float(short);\n" |
28911 | "float __ovld __cnfn convert_float_rte(ushort);\n" |
28912 | "float __ovld __cnfn convert_float_rtz(ushort);\n" |
28913 | "float __ovld __cnfn convert_float_rtp(ushort);\n" |
28914 | "float __ovld __cnfn convert_float_rtn(ushort);\n" |
28915 | "float __ovld __cnfn convert_float(ushort);\n" |
28916 | "float __ovld __cnfn convert_float_rte(int);\n" |
28917 | "float __ovld __cnfn convert_float_rtz(int);\n" |
28918 | "float __ovld __cnfn convert_float_rtp(int);\n" |
28919 | "float __ovld __cnfn convert_float_rtn(int);\n" |
28920 | "float __ovld __cnfn convert_float(int);\n" |
28921 | "float __ovld __cnfn convert_float_rte(uint);\n" |
28922 | "float __ovld __cnfn convert_float_rtz(uint);\n" |
28923 | "float __ovld __cnfn convert_float_rtp(uint);\n" |
28924 | "float __ovld __cnfn convert_float_rtn(uint);\n" |
28925 | "float __ovld __cnfn convert_float(uint);\n" |
28926 | "float __ovld __cnfn convert_float_rte(long);\n" |
28927 | "float __ovld __cnfn convert_float_rtz(long);\n" |
28928 | "float __ovld __cnfn convert_float_rtp(long);\n" |
28929 | "float __ovld __cnfn convert_float_rtn(long);\n" |
28930 | "float __ovld __cnfn convert_float(long);\n" |
28931 | "float __ovld __cnfn convert_float_rte(ulong);\n" |
28932 | "float __ovld __cnfn convert_float_rtz(ulong);\n" |
28933 | "float __ovld __cnfn convert_float_rtp(ulong);\n" |
28934 | "float __ovld __cnfn convert_float_rtn(ulong);\n" |
28935 | "float __ovld __cnfn convert_float(ulong);\n" |
28936 | "float __ovld __cnfn convert_float_rte(float);\n" |
28937 | "float __ovld __cnfn convert_float_rtz(float);\n" |
28938 | "float __ovld __cnfn convert_float_rtp(float);\n" |
28939 | "float __ovld __cnfn convert_float_rtn(float);\n" |
28940 | "float __ovld __cnfn convert_float(float);\n" |
28941 | "char2 __ovld __cnfn convert_char2_rte(char2);\n" |
28942 | "char2 __ovld __cnfn convert_char2_sat_rte(char2);\n" |
28943 | "char2 __ovld __cnfn convert_char2_rtz(char2);\n" |
28944 | "char2 __ovld __cnfn convert_char2_sat_rtz(char2);\n" |
28945 | "char2 __ovld __cnfn convert_char2_rtp(char2);\n" |
28946 | "char2 __ovld __cnfn convert_char2_sat_rtp(char2);\n" |
28947 | "char2 __ovld __cnfn convert_char2_rtn(char2);\n" |
28948 | "char2 __ovld __cnfn convert_char2_sat_rtn(char2);\n" |
28949 | "char2 __ovld __cnfn convert_char2(char2);\n" |
28950 | "char2 __ovld __cnfn convert_char2_sat(char2);\n" |
28951 | "char2 __ovld __cnfn convert_char2_rte(uchar2);\n" |
28952 | "char2 __ovld __cnfn convert_char2_sat_rte(uchar2);\n" |
28953 | "char2 __ovld __cnfn convert_char2_rtz(uchar2);\n" |
28954 | "char2 __ovld __cnfn convert_char2_sat_rtz(uchar2);\n" |
28955 | "char2 __ovld __cnfn convert_char2_rtp(uchar2);\n" |
28956 | "char2 __ovld __cnfn convert_char2_sat_rtp(uchar2);\n" |
28957 | "char2 __ovld __cnfn convert_char2_rtn(uchar2);\n" |
28958 | "char2 __ovld __cnfn convert_char2_sat_rtn(uchar2);\n" |
28959 | "char2 __ovld __cnfn convert_char2(uchar2);\n" |
28960 | "char2 __ovld __cnfn convert_char2_sat(uchar2);\n" |
28961 | "char2 __ovld __cnfn convert_char2_rte(short2);\n" |
28962 | "char2 __ovld __cnfn convert_char2_sat_rte(short2);\n" |
28963 | "char2 __ovld __cnfn convert_char2_rtz(short2);\n" |
28964 | "char2 __ovld __cnfn convert_char2_sat_rtz(short2);\n" |
28965 | "char2 __ovld __cnfn convert_char2_rtp(short2);\n" |
28966 | "char2 __ovld __cnfn convert_char2_sat_rtp(short2);\n" |
28967 | "char2 __ovld __cnfn convert_char2_rtn(short2);\n" |
28968 | "char2 __ovld __cnfn convert_char2_sat_rtn(short2);\n" |
28969 | "char2 __ovld __cnfn convert_char2(short2);\n" |
28970 | "char2 __ovld __cnfn convert_char2_sat(short2);\n" |
28971 | "char2 __ovld __cnfn convert_char2_rte(ushort2);\n" |
28972 | "char2 __ovld __cnfn convert_char2_sat_rte(ushort2);\n" |
28973 | "char2 __ovld __cnfn convert_char2_rtz(ushort2);\n" |
28974 | "char2 __ovld __cnfn convert_char2_sat_rtz(ushort2);\n" |
28975 | "char2 __ovld __cnfn convert_char2_rtp(ushort2);\n" |
28976 | "char2 __ovld __cnfn convert_char2_sat_rtp(ushort2);\n" |
28977 | "char2 __ovld __cnfn convert_char2_rtn(ushort2);\n" |
28978 | "char2 __ovld __cnfn convert_char2_sat_rtn(ushort2);\n" |
28979 | "char2 __ovld __cnfn convert_char2(ushort2);\n" |
28980 | "char2 __ovld __cnfn convert_char2_sat(ushort2);\n" |
28981 | "char2 __ovld __cnfn convert_char2_rte(int2);\n" |
28982 | "char2 __ovld __cnfn convert_char2_sat_rte(int2);\n" |
28983 | "char2 __ovld __cnfn convert_char2_rtz(int2);\n" |
28984 | "char2 __ovld __cnfn convert_char2_sat_rtz(int2);\n" |
28985 | "char2 __ovld __cnfn convert_char2_rtp(int2);\n" |
28986 | "char2 __ovld __cnfn convert_char2_sat_rtp(int2);\n" |
28987 | "char2 __ovld __cnfn convert_char2_rtn(int2);\n" |
28988 | "char2 __ovld __cnfn convert_char2_sat_rtn(int2);\n" |
28989 | "char2 __ovld __cnfn convert_char2(int2);\n" |
28990 | "char2 __ovld __cnfn convert_char2_sat(int2);\n" |
28991 | "char2 __ovld __cnfn convert_char2_rte(uint2);\n" |
28992 | "char2 __ovld __cnfn convert_char2_sat_rte(uint2);\n" |
28993 | "char2 __ovld __cnfn convert_char2_rtz(uint2);\n" |
28994 | "char2 __ovld __cnfn convert_char2_sat_rtz(uint2);\n" |
28995 | "char2 __ovld __cnfn convert_char2_rtp(uint2);\n" |
28996 | "char2 __ovld __cnfn convert_char2_sat_rtp(uint2);\n" |
28997 | "char2 __ovld __cnfn convert_char2_rtn(uint2);\n" |
28998 | "char2 __ovld __cnfn convert_char2_sat_rtn(uint2);\n" |
28999 | "char2 __ovld __cnfn convert_char2(uint2);\n" |
29000 | "char2 __ovld __cnfn convert_char2_sat(uint2);\n" |
29001 | "char2 __ovld __cnfn convert_char2_rte(long2);\n" |
29002 | "char2 __ovld __cnfn convert_char2_sat_rte(long2);\n" |
29003 | "char2 __ovld __cnfn convert_char2_rtz(long2);\n" |
29004 | "char2 __ovld __cnfn convert_char2_sat_rtz(long2);\n" |
29005 | "char2 __ovld __cnfn convert_char2_rtp(long2);\n" |
29006 | "char2 __ovld __cnfn convert_char2_sat_rtp(long2);\n" |
29007 | "char2 __ovld __cnfn convert_char2_rtn(long2);\n" |
29008 | "char2 __ovld __cnfn convert_char2_sat_rtn(long2);\n" |
29009 | "char2 __ovld __cnfn convert_char2(long2);\n" |
29010 | "char2 __ovld __cnfn convert_char2_sat(long2);\n" |
29011 | "char2 __ovld __cnfn convert_char2_rte(ulong2);\n" |
29012 | "char2 __ovld __cnfn convert_char2_sat_rte(ulong2);\n" |
29013 | "char2 __ovld __cnfn convert_char2_rtz(ulong2);\n" |
29014 | "char2 __ovld __cnfn convert_char2_sat_rtz(ulong2);\n" |
29015 | "char2 __ovld __cnfn convert_char2_rtp(ulong2);\n" |
29016 | "char2 __ovld __cnfn convert_char2_sat_rtp(ulong2);\n" |
29017 | "char2 __ovld __cnfn convert_char2_rtn(ulong2);\n" |
29018 | "char2 __ovld __cnfn convert_char2_sat_rtn(ulong2);\n" |
29019 | "char2 __ovld __cnfn convert_char2(ulong2);\n" |
29020 | "char2 __ovld __cnfn convert_char2_sat(ulong2);\n" |
29021 | "char2 __ovld __cnfn convert_char2_rte(float2);\n" |
29022 | "char2 __ovld __cnfn convert_char2_sat_rte(float2);\n" |
29023 | "char2 __ovld __cnfn convert_char2_rtz(float2);\n" |
29024 | "char2 __ovld __cnfn convert_char2_sat_rtz(float2);\n" |
29025 | "char2 __ovld __cnfn convert_char2_rtp(float2);\n" |
29026 | "char2 __ovld __cnfn convert_char2_sat_rtp(float2);\n" |
29027 | "char2 __ovld __cnfn convert_char2_rtn(float2);\n" |
29028 | "char2 __ovld __cnfn convert_char2_sat_rtn(float2);\n" |
29029 | "char2 __ovld __cnfn convert_char2(float2);\n" |
29030 | "char2 __ovld __cnfn convert_char2_sat(float2);\n" |
29031 | "uchar2 __ovld __cnfn convert_uchar2_rte(char2);\n" |
29032 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(char2);\n" |
29033 | "uchar2 __ovld __cnfn convert_uchar2_rtz(char2);\n" |
29034 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(char2);\n" |
29035 | "uchar2 __ovld __cnfn convert_uchar2_rtp(char2);\n" |
29036 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(char2);\n" |
29037 | "uchar2 __ovld __cnfn convert_uchar2_rtn(char2);\n" |
29038 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(char2);\n" |
29039 | "uchar2 __ovld __cnfn convert_uchar2(char2);\n" |
29040 | "uchar2 __ovld __cnfn convert_uchar2_sat(char2);\n" |
29041 | "uchar2 __ovld __cnfn convert_uchar2_rte(uchar2);\n" |
29042 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(uchar2);\n" |
29043 | "uchar2 __ovld __cnfn convert_uchar2_rtz(uchar2);\n" |
29044 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uchar2);\n" |
29045 | "uchar2 __ovld __cnfn convert_uchar2_rtp(uchar2);\n" |
29046 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uchar2);\n" |
29047 | "uchar2 __ovld __cnfn convert_uchar2_rtn(uchar2);\n" |
29048 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uchar2);\n" |
29049 | "uchar2 __ovld __cnfn convert_uchar2(uchar2);\n" |
29050 | "uchar2 __ovld __cnfn convert_uchar2_sat(uchar2);\n" |
29051 | "uchar2 __ovld __cnfn convert_uchar2_rte(short2);\n" |
29052 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(short2);\n" |
29053 | "uchar2 __ovld __cnfn convert_uchar2_rtz(short2);\n" |
29054 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(short2);\n" |
29055 | "uchar2 __ovld __cnfn convert_uchar2_rtp(short2);\n" |
29056 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(short2);\n" |
29057 | "uchar2 __ovld __cnfn convert_uchar2_rtn(short2);\n" |
29058 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(short2);\n" |
29059 | "uchar2 __ovld __cnfn convert_uchar2(short2);\n" |
29060 | "uchar2 __ovld __cnfn convert_uchar2_sat(short2);\n" |
29061 | "uchar2 __ovld __cnfn convert_uchar2_rte(ushort2);\n" |
29062 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(ushort2);\n" |
29063 | "uchar2 __ovld __cnfn convert_uchar2_rtz(ushort2);\n" |
29064 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ushort2);\n" |
29065 | "uchar2 __ovld __cnfn convert_uchar2_rtp(ushort2);\n" |
29066 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ushort2);\n" |
29067 | "uchar2 __ovld __cnfn convert_uchar2_rtn(ushort2);\n" |
29068 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ushort2);\n" |
29069 | "uchar2 __ovld __cnfn convert_uchar2(ushort2);\n" |
29070 | "uchar2 __ovld __cnfn convert_uchar2_sat(ushort2);\n" |
29071 | "uchar2 __ovld __cnfn convert_uchar2_rte(int2);\n" |
29072 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(int2);\n" |
29073 | "uchar2 __ovld __cnfn convert_uchar2_rtz(int2);\n" |
29074 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(int2);\n" |
29075 | "uchar2 __ovld __cnfn convert_uchar2_rtp(int2);\n" |
29076 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(int2);\n" |
29077 | "uchar2 __ovld __cnfn convert_uchar2_rtn(int2);\n" |
29078 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(int2);\n" |
29079 | "uchar2 __ovld __cnfn convert_uchar2(int2);\n" |
29080 | "uchar2 __ovld __cnfn convert_uchar2_sat(int2);\n" |
29081 | "uchar2 __ovld __cnfn convert_uchar2_rte(uint2);\n" |
29082 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(uint2);\n" |
29083 | "uchar2 __ovld __cnfn convert_uchar2_rtz(uint2);\n" |
29084 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uint2);\n" |
29085 | "uchar2 __ovld __cnfn convert_uchar2_rtp(uint2);\n" |
29086 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uint2);\n" |
29087 | "uchar2 __ovld __cnfn convert_uchar2_rtn(uint2);\n" |
29088 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uint2);\n" |
29089 | "uchar2 __ovld __cnfn convert_uchar2(uint2);\n" |
29090 | "uchar2 __ovld __cnfn convert_uchar2_sat(uint2);\n" |
29091 | "uchar2 __ovld __cnfn convert_uchar2_rte(long2);\n" |
29092 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(long2);\n" |
29093 | "uchar2 __ovld __cnfn convert_uchar2_rtz(long2);\n" |
29094 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(long2);\n" |
29095 | "uchar2 __ovld __cnfn convert_uchar2_rtp(long2);\n" |
29096 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(long2);\n" |
29097 | "uchar2 __ovld __cnfn convert_uchar2_rtn(long2);\n" |
29098 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(long2);\n" |
29099 | "uchar2 __ovld __cnfn convert_uchar2(long2);\n" |
29100 | "uchar2 __ovld __cnfn convert_uchar2_sat(long2);\n" |
29101 | "uchar2 __ovld __cnfn convert_uchar2_rte(ulong2);\n" |
29102 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(ulong2);\n" |
29103 | "uchar2 __ovld __cnfn convert_uchar2_rtz(ulong2);\n" |
29104 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ulong2);\n" |
29105 | "uchar2 __ovld __cnfn convert_uchar2_rtp(ulong2);\n" |
29106 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ulong2);\n" |
29107 | "uchar2 __ovld __cnfn convert_uchar2_rtn(ulong2);\n" |
29108 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ulong2);\n" |
29109 | "uchar2 __ovld __cnfn convert_uchar2(ulong2);\n" |
29110 | "uchar2 __ovld __cnfn convert_uchar2_sat(ulong2);\n" |
29111 | "uchar2 __ovld __cnfn convert_uchar2_rte(float2);\n" |
29112 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(float2);\n" |
29113 | "uchar2 __ovld __cnfn convert_uchar2_rtz(float2);\n" |
29114 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(float2);\n" |
29115 | "uchar2 __ovld __cnfn convert_uchar2_rtp(float2);\n" |
29116 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(float2);\n" |
29117 | "uchar2 __ovld __cnfn convert_uchar2_rtn(float2);\n" |
29118 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(float2);\n" |
29119 | "uchar2 __ovld __cnfn convert_uchar2(float2);\n" |
29120 | "uchar2 __ovld __cnfn convert_uchar2_sat(float2);\n" |
29121 | "short2 __ovld __cnfn convert_short2_rte(char2);\n" |
29122 | "short2 __ovld __cnfn convert_short2_sat_rte(char2);\n" |
29123 | "short2 __ovld __cnfn convert_short2_rtz(char2);\n" |
29124 | "short2 __ovld __cnfn convert_short2_sat_rtz(char2);\n" |
29125 | "short2 __ovld __cnfn convert_short2_rtp(char2);\n" |
29126 | "short2 __ovld __cnfn convert_short2_sat_rtp(char2);\n" |
29127 | "short2 __ovld __cnfn convert_short2_rtn(char2);\n" |
29128 | "short2 __ovld __cnfn convert_short2_sat_rtn(char2);\n" |
29129 | "short2 __ovld __cnfn convert_short2(char2);\n" |
29130 | "short2 __ovld __cnfn convert_short2_sat(char2);\n" |
29131 | "short2 __ovld __cnfn convert_short2_rte(uchar2);\n" |
29132 | "short2 __ovld __cnfn convert_short2_sat_rte(uchar2);\n" |
29133 | "short2 __ovld __cnfn convert_short2_rtz(uchar2);\n" |
29134 | "short2 __ovld __cnfn convert_short2_sat_rtz(uchar2);\n" |
29135 | "short2 __ovld __cnfn convert_short2_rtp(uchar2);\n" |
29136 | "short2 __ovld __cnfn convert_short2_sat_rtp(uchar2);\n" |
29137 | "short2 __ovld __cnfn convert_short2_rtn(uchar2);\n" |
29138 | "short2 __ovld __cnfn convert_short2_sat_rtn(uchar2);\n" |
29139 | "short2 __ovld __cnfn convert_short2(uchar2);\n" |
29140 | "short2 __ovld __cnfn convert_short2_sat(uchar2);\n" |
29141 | "short2 __ovld __cnfn convert_short2_rte(short2);\n" |
29142 | "short2 __ovld __cnfn convert_short2_sat_rte(short2);\n" |
29143 | "short2 __ovld __cnfn convert_short2_rtz(short2);\n" |
29144 | "short2 __ovld __cnfn convert_short2_sat_rtz(short2);\n" |
29145 | "short2 __ovld __cnfn convert_short2_rtp(short2);\n" |
29146 | "short2 __ovld __cnfn convert_short2_sat_rtp(short2);\n" |
29147 | "short2 __ovld __cnfn convert_short2_rtn(short2);\n" |
29148 | "short2 __ovld __cnfn convert_short2_sat_rtn(short2);\n" |
29149 | "short2 __ovld __cnfn convert_short2(short2);\n" |
29150 | "short2 __ovld __cnfn convert_short2_sat(short2);\n" |
29151 | "short2 __ovld __cnfn convert_short2_rte(ushort2);\n" |
29152 | "short2 __ovld __cnfn convert_short2_sat_rte(ushort2);\n" |
29153 | "short2 __ovld __cnfn convert_short2_rtz(ushort2);\n" |
29154 | "short2 __ovld __cnfn convert_short2_sat_rtz(ushort2);\n" |
29155 | "short2 __ovld __cnfn convert_short2_rtp(ushort2);\n" |
29156 | "short2 __ovld __cnfn convert_short2_sat_rtp(ushort2);\n" |
29157 | "short2 __ovld __cnfn convert_short2_rtn(ushort2);\n" |
29158 | "short2 __ovld __cnfn convert_short2_sat_rtn(ushort2);\n" |
29159 | "short2 __ovld __cnfn convert_short2(ushort2);\n" |
29160 | "short2 __ovld __cnfn convert_short2_sat(ushort2);\n" |
29161 | "short2 __ovld __cnfn convert_short2_rte(int2);\n" |
29162 | "short2 __ovld __cnfn convert_short2_sat_rte(int2);\n" |
29163 | "short2 __ovld __cnfn convert_short2_rtz(int2);\n" |
29164 | "short2 __ovld __cnfn convert_short2_sat_rtz(int2);\n" |
29165 | "short2 __ovld __cnfn convert_short2_rtp(int2);\n" |
29166 | "short2 __ovld __cnfn convert_short2_sat_rtp(int2);\n" |
29167 | "short2 __ovld __cnfn convert_short2_rtn(int2);\n" |
29168 | "short2 __ovld __cnfn convert_short2_sat_rtn(int2);\n" |
29169 | "short2 __ovld __cnfn convert_short2(int2);\n" |
29170 | "short2 __ovld __cnfn convert_short2_sat(int2);\n" |
29171 | "short2 __ovld __cnfn convert_short2_rte(uint2);\n" |
29172 | "short2 __ovld __cnfn convert_short2_sat_rte(uint2);\n" |
29173 | "short2 __ovld __cnfn convert_short2_rtz(uint2);\n" |
29174 | "short2 __ovld __cnfn convert_short2_sat_rtz(uint2);\n" |
29175 | "short2 __ovld __cnfn convert_short2_rtp(uint2);\n" |
29176 | "short2 __ovld __cnfn convert_short2_sat_rtp(uint2);\n" |
29177 | "short2 __ovld __cnfn convert_short2_rtn(uint2);\n" |
29178 | "short2 __ovld __cnfn convert_short2_sat_rtn(uint2);\n" |
29179 | "short2 __ovld __cnfn convert_short2(uint2);\n" |
29180 | "short2 __ovld __cnfn convert_short2_sat(uint2);\n" |
29181 | "short2 __ovld __cnfn convert_short2_rte(long2);\n" |
29182 | "short2 __ovld __cnfn convert_short2_sat_rte(long2);\n" |
29183 | "short2 __ovld __cnfn convert_short2_rtz(long2);\n" |
29184 | "short2 __ovld __cnfn convert_short2_sat_rtz(long2);\n" |
29185 | "short2 __ovld __cnfn convert_short2_rtp(long2);\n" |
29186 | "short2 __ovld __cnfn convert_short2_sat_rtp(long2);\n" |
29187 | "short2 __ovld __cnfn convert_short2_rtn(long2);\n" |
29188 | "short2 __ovld __cnfn convert_short2_sat_rtn(long2);\n" |
29189 | "short2 __ovld __cnfn convert_short2(long2);\n" |
29190 | "short2 __ovld __cnfn convert_short2_sat(long2);\n" |
29191 | "short2 __ovld __cnfn convert_short2_rte(ulong2);\n" |
29192 | "short2 __ovld __cnfn convert_short2_sat_rte(ulong2);\n" |
29193 | "short2 __ovld __cnfn convert_short2_rtz(ulong2);\n" |
29194 | "short2 __ovld __cnfn convert_short2_sat_rtz(ulong2);\n" |
29195 | "short2 __ovld __cnfn convert_short2_rtp(ulong2);\n" |
29196 | "short2 __ovld __cnfn convert_short2_sat_rtp(ulong2);\n" |
29197 | "short2 __ovld __cnfn convert_short2_rtn(ulong2);\n" |
29198 | "short2 __ovld __cnfn convert_short2_sat_rtn(ulong2);\n" |
29199 | "short2 __ovld __cnfn convert_short2(ulong2);\n" |
29200 | "short2 __ovld __cnfn convert_short2_sat(ulong2);\n" |
29201 | "short2 __ovld __cnfn convert_short2_rte(float2);\n" |
29202 | "short2 __ovld __cnfn convert_short2_sat_rte(float2);\n" |
29203 | "short2 __ovld __cnfn convert_short2_rtz(float2);\n" |
29204 | "short2 __ovld __cnfn convert_short2_sat_rtz(float2);\n" |
29205 | "short2 __ovld __cnfn convert_short2_rtp(float2);\n" |
29206 | "short2 __ovld __cnfn convert_short2_sat_rtp(float2);\n" |
29207 | "short2 __ovld __cnfn convert_short2_rtn(float2);\n" |
29208 | "short2 __ovld __cnfn convert_short2_sat_rtn(float2);\n" |
29209 | "short2 __ovld __cnfn convert_short2(float2);\n" |
29210 | "short2 __ovld __cnfn convert_short2_sat(float2);\n" |
29211 | "ushort2 __ovld __cnfn convert_ushort2_rte(char2);\n" |
29212 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(char2);\n" |
29213 | "ushort2 __ovld __cnfn convert_ushort2_rtz(char2);\n" |
29214 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(char2);\n" |
29215 | "ushort2 __ovld __cnfn convert_ushort2_rtp(char2);\n" |
29216 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(char2);\n" |
29217 | "ushort2 __ovld __cnfn convert_ushort2_rtn(char2);\n" |
29218 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(char2);\n" |
29219 | "ushort2 __ovld __cnfn convert_ushort2(char2);\n" |
29220 | "ushort2 __ovld __cnfn convert_ushort2_sat(char2);\n" |
29221 | "ushort2 __ovld __cnfn convert_ushort2_rte(uchar2);\n" |
29222 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(uchar2);\n" |
29223 | "ushort2 __ovld __cnfn convert_ushort2_rtz(uchar2);\n" |
29224 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uchar2);\n" |
29225 | "ushort2 __ovld __cnfn convert_ushort2_rtp(uchar2);\n" |
29226 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uchar2);\n" |
29227 | "ushort2 __ovld __cnfn convert_ushort2_rtn(uchar2);\n" |
29228 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uchar2);\n" |
29229 | "ushort2 __ovld __cnfn convert_ushort2(uchar2);\n" |
29230 | "ushort2 __ovld __cnfn convert_ushort2_sat(uchar2);\n" |
29231 | "ushort2 __ovld __cnfn convert_ushort2_rte(short2);\n" |
29232 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(short2);\n" |
29233 | "ushort2 __ovld __cnfn convert_ushort2_rtz(short2);\n" |
29234 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(short2);\n" |
29235 | "ushort2 __ovld __cnfn convert_ushort2_rtp(short2);\n" |
29236 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(short2);\n" |
29237 | "ushort2 __ovld __cnfn convert_ushort2_rtn(short2);\n" |
29238 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(short2);\n" |
29239 | "ushort2 __ovld __cnfn convert_ushort2(short2);\n" |
29240 | "ushort2 __ovld __cnfn convert_ushort2_sat(short2);\n" |
29241 | "ushort2 __ovld __cnfn convert_ushort2_rte(ushort2);\n" |
29242 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(ushort2);\n" |
29243 | "ushort2 __ovld __cnfn convert_ushort2_rtz(ushort2);\n" |
29244 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ushort2);\n" |
29245 | "ushort2 __ovld __cnfn convert_ushort2_rtp(ushort2);\n" |
29246 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ushort2);\n" |
29247 | "ushort2 __ovld __cnfn convert_ushort2_rtn(ushort2);\n" |
29248 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ushort2);\n" |
29249 | "ushort2 __ovld __cnfn convert_ushort2(ushort2);\n" |
29250 | "ushort2 __ovld __cnfn convert_ushort2_sat(ushort2);\n" |
29251 | "ushort2 __ovld __cnfn convert_ushort2_rte(int2);\n" |
29252 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(int2);\n" |
29253 | "ushort2 __ovld __cnfn convert_ushort2_rtz(int2);\n" |
29254 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(int2);\n" |
29255 | "ushort2 __ovld __cnfn convert_ushort2_rtp(int2);\n" |
29256 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(int2);\n" |
29257 | "ushort2 __ovld __cnfn convert_ushort2_rtn(int2);\n" |
29258 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(int2);\n" |
29259 | "ushort2 __ovld __cnfn convert_ushort2(int2);\n" |
29260 | "ushort2 __ovld __cnfn convert_ushort2_sat(int2);\n" |
29261 | "ushort2 __ovld __cnfn convert_ushort2_rte(uint2);\n" |
29262 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(uint2);\n" |
29263 | "ushort2 __ovld __cnfn convert_ushort2_rtz(uint2);\n" |
29264 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uint2);\n" |
29265 | "ushort2 __ovld __cnfn convert_ushort2_rtp(uint2);\n" |
29266 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uint2);\n" |
29267 | "ushort2 __ovld __cnfn convert_ushort2_rtn(uint2);\n" |
29268 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uint2);\n" |
29269 | "ushort2 __ovld __cnfn convert_ushort2(uint2);\n" |
29270 | "ushort2 __ovld __cnfn convert_ushort2_sat(uint2);\n" |
29271 | "ushort2 __ovld __cnfn convert_ushort2_rte(long2);\n" |
29272 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(long2);\n" |
29273 | "ushort2 __ovld __cnfn convert_ushort2_rtz(long2);\n" |
29274 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(long2);\n" |
29275 | "ushort2 __ovld __cnfn convert_ushort2_rtp(long2);\n" |
29276 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(long2);\n" |
29277 | "ushort2 __ovld __cnfn convert_ushort2_rtn(long2);\n" |
29278 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(long2);\n" |
29279 | "ushort2 __ovld __cnfn convert_ushort2(long2);\n" |
29280 | "ushort2 __ovld __cnfn convert_ushort2_sat(long2);\n" |
29281 | "ushort2 __ovld __cnfn convert_ushort2_rte(ulong2);\n" |
29282 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(ulong2);\n" |
29283 | "ushort2 __ovld __cnfn convert_ushort2_rtz(ulong2);\n" |
29284 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ulong2);\n" |
29285 | "ushort2 __ovld __cnfn convert_ushort2_rtp(ulong2);\n" |
29286 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ulong2);\n" |
29287 | "ushort2 __ovld __cnfn convert_ushort2_rtn(ulong2);\n" |
29288 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ulong2);\n" |
29289 | "ushort2 __ovld __cnfn convert_ushort2(ulong2);\n" |
29290 | "ushort2 __ovld __cnfn convert_ushort2_sat(ulong2);\n" |
29291 | "ushort2 __ovld __cnfn convert_ushort2_rte(float2);\n" |
29292 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(float2);\n" |
29293 | "ushort2 __ovld __cnfn convert_ushort2_rtz(float2);\n" |
29294 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(float2);\n" |
29295 | "ushort2 __ovld __cnfn convert_ushort2_rtp(float2);\n" |
29296 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(float2);\n" |
29297 | "ushort2 __ovld __cnfn convert_ushort2_rtn(float2);\n" |
29298 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(float2);\n" |
29299 | "ushort2 __ovld __cnfn convert_ushort2(float2);\n" |
29300 | "ushort2 __ovld __cnfn convert_ushort2_sat(float2);\n" |
29301 | "int2 __ovld __cnfn convert_int2_rte(char2);\n" |
29302 | "int2 __ovld __cnfn convert_int2_sat_rte(char2);\n" |
29303 | "int2 __ovld __cnfn convert_int2_rtz(char2);\n" |
29304 | "int2 __ovld __cnfn convert_int2_sat_rtz(char2);\n" |
29305 | "int2 __ovld __cnfn convert_int2_rtp(char2);\n" |
29306 | "int2 __ovld __cnfn convert_int2_sat_rtp(char2);\n" |
29307 | "int2 __ovld __cnfn convert_int2_rtn(char2);\n" |
29308 | "int2 __ovld __cnfn convert_int2_sat_rtn(char2);\n" |
29309 | "int2 __ovld __cnfn convert_int2(char2);\n" |
29310 | "int2 __ovld __cnfn convert_int2_sat(char2);\n" |
29311 | "int2 __ovld __cnfn convert_int2_rte(uchar2);\n" |
29312 | "int2 __ovld __cnfn convert_int2_sat_rte(uchar2);\n" |
29313 | "int2 __ovld __cnfn convert_int2_rtz(uchar2);\n" |
29314 | "int2 __ovld __cnfn convert_int2_sat_rtz(uchar2);\n" |
29315 | "int2 __ovld __cnfn convert_int2_rtp(uchar2);\n" |
29316 | "int2 __ovld __cnfn convert_int2_sat_rtp(uchar2);\n" |
29317 | "int2 __ovld __cnfn convert_int2_rtn(uchar2);\n" |
29318 | "int2 __ovld __cnfn convert_int2_sat_rtn(uchar2);\n" |
29319 | "int2 __ovld __cnfn convert_int2(uchar2);\n" |
29320 | "int2 __ovld __cnfn convert_int2_sat(uchar2);\n" |
29321 | "int2 __ovld __cnfn convert_int2_rte(short2);\n" |
29322 | "int2 __ovld __cnfn convert_int2_sat_rte(short2);\n" |
29323 | "int2 __ovld __cnfn convert_int2_rtz(short2);\n" |
29324 | "int2 __ovld __cnfn convert_int2_sat_rtz(short2);\n" |
29325 | "int2 __ovld __cnfn convert_int2_rtp(short2);\n" |
29326 | "int2 __ovld __cnfn convert_int2_sat_rtp(short2);\n" |
29327 | "int2 __ovld __cnfn convert_int2_rtn(short2);\n" |
29328 | "int2 __ovld __cnfn convert_int2_sat_rtn(short2);\n" |
29329 | "int2 __ovld __cnfn convert_int2(short2);\n" |
29330 | "int2 __ovld __cnfn convert_int2_sat(short2);\n" |
29331 | "int2 __ovld __cnfn convert_int2_rte(ushort2);\n" |
29332 | "int2 __ovld __cnfn convert_int2_sat_rte(ushort2);\n" |
29333 | "int2 __ovld __cnfn convert_int2_rtz(ushort2);\n" |
29334 | "int2 __ovld __cnfn convert_int2_sat_rtz(ushort2);\n" |
29335 | "int2 __ovld __cnfn convert_int2_rtp(ushort2);\n" |
29336 | "int2 __ovld __cnfn convert_int2_sat_rtp(ushort2);\n" |
29337 | "int2 __ovld __cnfn convert_int2_rtn(ushort2);\n" |
29338 | "int2 __ovld __cnfn convert_int2_sat_rtn(ushort2);\n" |
29339 | "int2 __ovld __cnfn convert_int2(ushort2);\n" |
29340 | "int2 __ovld __cnfn convert_int2_sat(ushort2);\n" |
29341 | "int2 __ovld __cnfn convert_int2_rte(int2);\n" |
29342 | "int2 __ovld __cnfn convert_int2_sat_rte(int2);\n" |
29343 | "int2 __ovld __cnfn convert_int2_rtz(int2);\n" |
29344 | "int2 __ovld __cnfn convert_int2_sat_rtz(int2);\n" |
29345 | "int2 __ovld __cnfn convert_int2_rtp(int2);\n" |
29346 | "int2 __ovld __cnfn convert_int2_sat_rtp(int2);\n" |
29347 | "int2 __ovld __cnfn convert_int2_rtn(int2);\n" |
29348 | "int2 __ovld __cnfn convert_int2_sat_rtn(int2);\n" |
29349 | "int2 __ovld __cnfn convert_int2(int2);\n" |
29350 | "int2 __ovld __cnfn convert_int2_sat(int2);\n" |
29351 | "int2 __ovld __cnfn convert_int2_rte(uint2);\n" |
29352 | "int2 __ovld __cnfn convert_int2_sat_rte(uint2);\n" |
29353 | "int2 __ovld __cnfn convert_int2_rtz(uint2);\n" |
29354 | "int2 __ovld __cnfn convert_int2_sat_rtz(uint2);\n" |
29355 | "int2 __ovld __cnfn convert_int2_rtp(uint2);\n" |
29356 | "int2 __ovld __cnfn convert_int2_sat_rtp(uint2);\n" |
29357 | "int2 __ovld __cnfn convert_int2_rtn(uint2);\n" |
29358 | "int2 __ovld __cnfn convert_int2_sat_rtn(uint2);\n" |
29359 | "int2 __ovld __cnfn convert_int2(uint2);\n" |
29360 | "int2 __ovld __cnfn convert_int2_sat(uint2);\n" |
29361 | "int2 __ovld __cnfn convert_int2_rte(long2);\n" |
29362 | "int2 __ovld __cnfn convert_int2_sat_rte(long2);\n" |
29363 | "int2 __ovld __cnfn convert_int2_rtz(long2);\n" |
29364 | "int2 __ovld __cnfn convert_int2_sat_rtz(long2);\n" |
29365 | "int2 __ovld __cnfn convert_int2_rtp(long2);\n" |
29366 | "int2 __ovld __cnfn convert_int2_sat_rtp(long2);\n" |
29367 | "int2 __ovld __cnfn convert_int2_rtn(long2);\n" |
29368 | "int2 __ovld __cnfn convert_int2_sat_rtn(long2);\n" |
29369 | "int2 __ovld __cnfn convert_int2(long2);\n" |
29370 | "int2 __ovld __cnfn convert_int2_sat(long2);\n" |
29371 | "int2 __ovld __cnfn convert_int2_rte(ulong2);\n" |
29372 | "int2 __ovld __cnfn convert_int2_sat_rte(ulong2);\n" |
29373 | "int2 __ovld __cnfn convert_int2_rtz(ulong2);\n" |
29374 | "int2 __ovld __cnfn convert_int2_sat_rtz(ulong2);\n" |
29375 | "int2 __ovld __cnfn convert_int2_rtp(ulong2);\n" |
29376 | "int2 __ovld __cnfn convert_int2_sat_rtp(ulong2);\n" |
29377 | "int2 __ovld __cnfn convert_int2_rtn(ulong2);\n" |
29378 | "int2 __ovld __cnfn convert_int2_sat_rtn(ulong2);\n" |
29379 | "int2 __ovld __cnfn convert_int2(ulong2);\n" |
29380 | "int2 __ovld __cnfn convert_int2_sat(ulong2);\n" |
29381 | "int2 __ovld __cnfn convert_int2_rte(float2);\n" |
29382 | "int2 __ovld __cnfn convert_int2_sat_rte(float2);\n" |
29383 | "int2 __ovld __cnfn convert_int2_rtz(float2);\n" |
29384 | "int2 __ovld __cnfn convert_int2_sat_rtz(float2);\n" |
29385 | "int2 __ovld __cnfn convert_int2_rtp(float2);\n" |
29386 | "int2 __ovld __cnfn convert_int2_sat_rtp(float2);\n" |
29387 | "int2 __ovld __cnfn convert_int2_rtn(float2);\n" |
29388 | "int2 __ovld __cnfn convert_int2_sat_rtn(float2);\n" |
29389 | "int2 __ovld __cnfn convert_int2(float2);\n" |
29390 | "int2 __ovld __cnfn convert_int2_sat(float2);\n" |
29391 | "uint2 __ovld __cnfn convert_uint2_rte(char2);\n" |
29392 | "uint2 __ovld __cnfn convert_uint2_sat_rte(char2);\n" |
29393 | "uint2 __ovld __cnfn convert_uint2_rtz(char2);\n" |
29394 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(char2);\n" |
29395 | "uint2 __ovld __cnfn convert_uint2_rtp(char2);\n" |
29396 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(char2);\n" |
29397 | "uint2 __ovld __cnfn convert_uint2_rtn(char2);\n" |
29398 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(char2);\n" |
29399 | "uint2 __ovld __cnfn convert_uint2(char2);\n" |
29400 | "uint2 __ovld __cnfn convert_uint2_sat(char2);\n" |
29401 | "uint2 __ovld __cnfn convert_uint2_rte(uchar2);\n" |
29402 | "uint2 __ovld __cnfn convert_uint2_sat_rte(uchar2);\n" |
29403 | "uint2 __ovld __cnfn convert_uint2_rtz(uchar2);\n" |
29404 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(uchar2);\n" |
29405 | "uint2 __ovld __cnfn convert_uint2_rtp(uchar2);\n" |
29406 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(uchar2);\n" |
29407 | "uint2 __ovld __cnfn convert_uint2_rtn(uchar2);\n" |
29408 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(uchar2);\n" |
29409 | "uint2 __ovld __cnfn convert_uint2(uchar2);\n" |
29410 | "uint2 __ovld __cnfn convert_uint2_sat(uchar2);\n" |
29411 | "uint2 __ovld __cnfn convert_uint2_rte(short2);\n" |
29412 | "uint2 __ovld __cnfn convert_uint2_sat_rte(short2);\n" |
29413 | "uint2 __ovld __cnfn convert_uint2_rtz(short2);\n" |
29414 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(short2);\n" |
29415 | "uint2 __ovld __cnfn convert_uint2_rtp(short2);\n" |
29416 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(short2);\n" |
29417 | "uint2 __ovld __cnfn convert_uint2_rtn(short2);\n" |
29418 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(short2);\n" |
29419 | "uint2 __ovld __cnfn convert_uint2(short2);\n" |
29420 | "uint2 __ovld __cnfn convert_uint2_sat(short2);\n" |
29421 | "uint2 __ovld __cnfn convert_uint2_rte(ushort2);\n" |
29422 | "uint2 __ovld __cnfn convert_uint2_sat_rte(ushort2);\n" |
29423 | "uint2 __ovld __cnfn convert_uint2_rtz(ushort2);\n" |
29424 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(ushort2);\n" |
29425 | "uint2 __ovld __cnfn convert_uint2_rtp(ushort2);\n" |
29426 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(ushort2);\n" |
29427 | "uint2 __ovld __cnfn convert_uint2_rtn(ushort2);\n" |
29428 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(ushort2);\n" |
29429 | "uint2 __ovld __cnfn convert_uint2(ushort2);\n" |
29430 | "uint2 __ovld __cnfn convert_uint2_sat(ushort2);\n" |
29431 | "uint2 __ovld __cnfn convert_uint2_rte(int2);\n" |
29432 | "uint2 __ovld __cnfn convert_uint2_sat_rte(int2);\n" |
29433 | "uint2 __ovld __cnfn convert_uint2_rtz(int2);\n" |
29434 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(int2);\n" |
29435 | "uint2 __ovld __cnfn convert_uint2_rtp(int2);\n" |
29436 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(int2);\n" |
29437 | "uint2 __ovld __cnfn convert_uint2_rtn(int2);\n" |
29438 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(int2);\n" |
29439 | "uint2 __ovld __cnfn convert_uint2(int2);\n" |
29440 | "uint2 __ovld __cnfn convert_uint2_sat(int2);\n" |
29441 | "uint2 __ovld __cnfn convert_uint2_rte(uint2);\n" |
29442 | "uint2 __ovld __cnfn convert_uint2_sat_rte(uint2);\n" |
29443 | "uint2 __ovld __cnfn convert_uint2_rtz(uint2);\n" |
29444 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(uint2);\n" |
29445 | "uint2 __ovld __cnfn convert_uint2_rtp(uint2);\n" |
29446 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(uint2);\n" |
29447 | "uint2 __ovld __cnfn convert_uint2_rtn(uint2);\n" |
29448 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(uint2);\n" |
29449 | "uint2 __ovld __cnfn convert_uint2(uint2);\n" |
29450 | "uint2 __ovld __cnfn convert_uint2_sat(uint2);\n" |
29451 | "uint2 __ovld __cnfn convert_uint2_rte(long2);\n" |
29452 | "uint2 __ovld __cnfn convert_uint2_sat_rte(long2);\n" |
29453 | "uint2 __ovld __cnfn convert_uint2_rtz(long2);\n" |
29454 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(long2);\n" |
29455 | "uint2 __ovld __cnfn convert_uint2_rtp(long2);\n" |
29456 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(long2);\n" |
29457 | "uint2 __ovld __cnfn convert_uint2_rtn(long2);\n" |
29458 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(long2);\n" |
29459 | "uint2 __ovld __cnfn convert_uint2(long2);\n" |
29460 | "uint2 __ovld __cnfn convert_uint2_sat(long2);\n" |
29461 | "uint2 __ovld __cnfn convert_uint2_rte(ulong2);\n" |
29462 | "uint2 __ovld __cnfn convert_uint2_sat_rte(ulong2);\n" |
29463 | "uint2 __ovld __cnfn convert_uint2_rtz(ulong2);\n" |
29464 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(ulong2);\n" |
29465 | "uint2 __ovld __cnfn convert_uint2_rtp(ulong2);\n" |
29466 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(ulong2);\n" |
29467 | "uint2 __ovld __cnfn convert_uint2_rtn(ulong2);\n" |
29468 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(ulong2);\n" |
29469 | "uint2 __ovld __cnfn convert_uint2(ulong2);\n" |
29470 | "uint2 __ovld __cnfn convert_uint2_sat(ulong2);\n" |
29471 | "uint2 __ovld __cnfn convert_uint2_rte(float2);\n" |
29472 | "uint2 __ovld __cnfn convert_uint2_sat_rte(float2);\n" |
29473 | "uint2 __ovld __cnfn convert_uint2_rtz(float2);\n" |
29474 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(float2);\n" |
29475 | "uint2 __ovld __cnfn convert_uint2_rtp(float2);\n" |
29476 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(float2);\n" |
29477 | "uint2 __ovld __cnfn convert_uint2_rtn(float2);\n" |
29478 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(float2);\n" |
29479 | "uint2 __ovld __cnfn convert_uint2(float2);\n" |
29480 | "uint2 __ovld __cnfn convert_uint2_sat(float2);\n" |
29481 | "long2 __ovld __cnfn convert_long2_rte(char2);\n" |
29482 | "long2 __ovld __cnfn convert_long2_sat_rte(char2);\n" |
29483 | "long2 __ovld __cnfn convert_long2_rtz(char2);\n" |
29484 | "long2 __ovld __cnfn convert_long2_sat_rtz(char2);\n" |
29485 | "long2 __ovld __cnfn convert_long2_rtp(char2);\n" |
29486 | "long2 __ovld __cnfn convert_long2_sat_rtp(char2);\n" |
29487 | "long2 __ovld __cnfn convert_long2_rtn(char2);\n" |
29488 | "long2 __ovld __cnfn convert_long2_sat_rtn(char2);\n" |
29489 | "long2 __ovld __cnfn convert_long2(char2);\n" |
29490 | "long2 __ovld __cnfn convert_long2_sat(char2);\n" |
29491 | "long2 __ovld __cnfn convert_long2_rte(uchar2);\n" |
29492 | "long2 __ovld __cnfn convert_long2_sat_rte(uchar2);\n" |
29493 | "long2 __ovld __cnfn convert_long2_rtz(uchar2);\n" |
29494 | "long2 __ovld __cnfn convert_long2_sat_rtz(uchar2);\n" |
29495 | "long2 __ovld __cnfn convert_long2_rtp(uchar2);\n" |
29496 | "long2 __ovld __cnfn convert_long2_sat_rtp(uchar2);\n" |
29497 | "long2 __ovld __cnfn convert_long2_rtn(uchar2);\n" |
29498 | "long2 __ovld __cnfn convert_long2_sat_rtn(uchar2);\n" |
29499 | "long2 __ovld __cnfn convert_long2(uchar2);\n" |
29500 | "long2 __ovld __cnfn convert_long2_sat(uchar2);\n" |
29501 | "long2 __ovld __cnfn convert_long2_rte(short2);\n" |
29502 | "long2 __ovld __cnfn convert_long2_sat_rte(short2);\n" |
29503 | "long2 __ovld __cnfn convert_long2_rtz(short2);\n" |
29504 | "long2 __ovld __cnfn convert_long2_sat_rtz(short2);\n" |
29505 | "long2 __ovld __cnfn convert_long2_rtp(short2);\n" |
29506 | "long2 __ovld __cnfn convert_long2_sat_rtp(short2);\n" |
29507 | "long2 __ovld __cnfn convert_long2_rtn(short2);\n" |
29508 | "long2 __ovld __cnfn convert_long2_sat_rtn(short2);\n" |
29509 | "long2 __ovld __cnfn convert_long2(short2);\n" |
29510 | "long2 __ovld __cnfn convert_long2_sat(short2);\n" |
29511 | "long2 __ovld __cnfn convert_long2_rte(ushort2);\n" |
29512 | "long2 __ovld __cnfn convert_long2_sat_rte(ushort2);\n" |
29513 | "long2 __ovld __cnfn convert_long2_rtz(ushort2);\n" |
29514 | "long2 __ovld __cnfn convert_long2_sat_rtz(ushort2);\n" |
29515 | "long2 __ovld __cnfn convert_long2_rtp(ushort2);\n" |
29516 | "long2 __ovld __cnfn convert_long2_sat_rtp(ushort2);\n" |
29517 | "long2 __ovld __cnfn convert_long2_rtn(ushort2);\n" |
29518 | "long2 __ovld __cnfn convert_long2_sat_rtn(ushort2);\n" |
29519 | "long2 __ovld __cnfn convert_long2(ushort2);\n" |
29520 | "long2 __ovld __cnfn convert_long2_sat(ushort2);\n" |
29521 | "long2 __ovld __cnfn convert_long2_rte(int2);\n" |
29522 | "long2 __ovld __cnfn convert_long2_sat_rte(int2);\n" |
29523 | "long2 __ovld __cnfn convert_long2_rtz(int2);\n" |
29524 | "long2 __ovld __cnfn convert_long2_sat_rtz(int2);\n" |
29525 | "long2 __ovld __cnfn convert_long2_rtp(int2);\n" |
29526 | "long2 __ovld __cnfn convert_long2_sat_rtp(int2);\n" |
29527 | "long2 __ovld __cnfn convert_long2_rtn(int2);\n" |
29528 | "long2 __ovld __cnfn convert_long2_sat_rtn(int2);\n" |
29529 | "long2 __ovld __cnfn convert_long2(int2);\n" |
29530 | "long2 __ovld __cnfn convert_long2_sat(int2);\n" |
29531 | "long2 __ovld __cnfn convert_long2_rte(uint2);\n" |
29532 | "long2 __ovld __cnfn convert_long2_sat_rte(uint2);\n" |
29533 | "long2 __ovld __cnfn convert_long2_rtz(uint2);\n" |
29534 | "long2 __ovld __cnfn convert_long2_sat_rtz(uint2);\n" |
29535 | "long2 __ovld __cnfn convert_long2_rtp(uint2);\n" |
29536 | "long2 __ovld __cnfn convert_long2_sat_rtp(uint2);\n" |
29537 | "long2 __ovld __cnfn convert_long2_rtn(uint2);\n" |
29538 | "long2 __ovld __cnfn convert_long2_sat_rtn(uint2);\n" |
29539 | "long2 __ovld __cnfn convert_long2(uint2);\n" |
29540 | "long2 __ovld __cnfn convert_long2_sat(uint2);\n" |
29541 | "long2 __ovld __cnfn convert_long2_rte(long2);\n" |
29542 | "long2 __ovld __cnfn convert_long2_sat_rte(long2);\n" |
29543 | "long2 __ovld __cnfn convert_long2_rtz(long2);\n" |
29544 | "long2 __ovld __cnfn convert_long2_sat_rtz(long2);\n" |
29545 | "long2 __ovld __cnfn convert_long2_rtp(long2);\n" |
29546 | "long2 __ovld __cnfn convert_long2_sat_rtp(long2);\n" |
29547 | "long2 __ovld __cnfn convert_long2_rtn(long2);\n" |
29548 | "long2 __ovld __cnfn convert_long2_sat_rtn(long2);\n" |
29549 | "long2 __ovld __cnfn convert_long2(long2);\n" |
29550 | "long2 __ovld __cnfn convert_long2_sat(long2);\n" |
29551 | "long2 __ovld __cnfn convert_long2_rte(ulong2);\n" |
29552 | "long2 __ovld __cnfn convert_long2_sat_rte(ulong2);\n" |
29553 | "long2 __ovld __cnfn convert_long2_rtz(ulong2);\n" |
29554 | "long2 __ovld __cnfn convert_long2_sat_rtz(ulong2);\n" |
29555 | "long2 __ovld __cnfn convert_long2_rtp(ulong2);\n" |
29556 | "long2 __ovld __cnfn convert_long2_sat_rtp(ulong2);\n" |
29557 | "long2 __ovld __cnfn convert_long2_rtn(ulong2);\n" |
29558 | "long2 __ovld __cnfn convert_long2_sat_rtn(ulong2);\n" |
29559 | "long2 __ovld __cnfn convert_long2(ulong2);\n" |
29560 | "long2 __ovld __cnfn convert_long2_sat(ulong2);\n" |
29561 | "long2 __ovld __cnfn convert_long2_rte(float2);\n" |
29562 | "long2 __ovld __cnfn convert_long2_sat_rte(float2);\n" |
29563 | "long2 __ovld __cnfn convert_long2_rtz(float2);\n" |
29564 | "long2 __ovld __cnfn convert_long2_sat_rtz(float2);\n" |
29565 | "long2 __ovld __cnfn convert_long2_rtp(float2);\n" |
29566 | "long2 __ovld __cnfn convert_long2_sat_rtp(float2);\n" |
29567 | "long2 __ovld __cnfn convert_long2_rtn(float2);\n" |
29568 | "long2 __ovld __cnfn convert_long2_sat_rtn(float2);\n" |
29569 | "long2 __ovld __cnfn convert_long2(float2);\n" |
29570 | "long2 __ovld __cnfn convert_long2_sat(float2);\n" |
29571 | "ulong2 __ovld __cnfn convert_ulong2_rte(char2);\n" |
29572 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(char2);\n" |
29573 | "ulong2 __ovld __cnfn convert_ulong2_rtz(char2);\n" |
29574 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(char2);\n" |
29575 | "ulong2 __ovld __cnfn convert_ulong2_rtp(char2);\n" |
29576 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(char2);\n" |
29577 | "ulong2 __ovld __cnfn convert_ulong2_rtn(char2);\n" |
29578 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(char2);\n" |
29579 | "ulong2 __ovld __cnfn convert_ulong2(char2);\n" |
29580 | "ulong2 __ovld __cnfn convert_ulong2_sat(char2);\n" |
29581 | "ulong2 __ovld __cnfn convert_ulong2_rte(uchar2);\n" |
29582 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(uchar2);\n" |
29583 | "ulong2 __ovld __cnfn convert_ulong2_rtz(uchar2);\n" |
29584 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uchar2);\n" |
29585 | "ulong2 __ovld __cnfn convert_ulong2_rtp(uchar2);\n" |
29586 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uchar2);\n" |
29587 | "ulong2 __ovld __cnfn convert_ulong2_rtn(uchar2);\n" |
29588 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uchar2);\n" |
29589 | "ulong2 __ovld __cnfn convert_ulong2(uchar2);\n" |
29590 | "ulong2 __ovld __cnfn convert_ulong2_sat(uchar2);\n" |
29591 | "ulong2 __ovld __cnfn convert_ulong2_rte(short2);\n" |
29592 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(short2);\n" |
29593 | "ulong2 __ovld __cnfn convert_ulong2_rtz(short2);\n" |
29594 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(short2);\n" |
29595 | "ulong2 __ovld __cnfn convert_ulong2_rtp(short2);\n" |
29596 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(short2);\n" |
29597 | "ulong2 __ovld __cnfn convert_ulong2_rtn(short2);\n" |
29598 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(short2);\n" |
29599 | "ulong2 __ovld __cnfn convert_ulong2(short2);\n" |
29600 | "ulong2 __ovld __cnfn convert_ulong2_sat(short2);\n" |
29601 | "ulong2 __ovld __cnfn convert_ulong2_rte(ushort2);\n" |
29602 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(ushort2);\n" |
29603 | "ulong2 __ovld __cnfn convert_ulong2_rtz(ushort2);\n" |
29604 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ushort2);\n" |
29605 | "ulong2 __ovld __cnfn convert_ulong2_rtp(ushort2);\n" |
29606 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ushort2);\n" |
29607 | "ulong2 __ovld __cnfn convert_ulong2_rtn(ushort2);\n" |
29608 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ushort2);\n" |
29609 | "ulong2 __ovld __cnfn convert_ulong2(ushort2);\n" |
29610 | "ulong2 __ovld __cnfn convert_ulong2_sat(ushort2);\n" |
29611 | "ulong2 __ovld __cnfn convert_ulong2_rte(int2);\n" |
29612 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(int2);\n" |
29613 | "ulong2 __ovld __cnfn convert_ulong2_rtz(int2);\n" |
29614 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(int2);\n" |
29615 | "ulong2 __ovld __cnfn convert_ulong2_rtp(int2);\n" |
29616 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(int2);\n" |
29617 | "ulong2 __ovld __cnfn convert_ulong2_rtn(int2);\n" |
29618 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(int2);\n" |
29619 | "ulong2 __ovld __cnfn convert_ulong2(int2);\n" |
29620 | "ulong2 __ovld __cnfn convert_ulong2_sat(int2);\n" |
29621 | "ulong2 __ovld __cnfn convert_ulong2_rte(uint2);\n" |
29622 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(uint2);\n" |
29623 | "ulong2 __ovld __cnfn convert_ulong2_rtz(uint2);\n" |
29624 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uint2);\n" |
29625 | "ulong2 __ovld __cnfn convert_ulong2_rtp(uint2);\n" |
29626 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uint2);\n" |
29627 | "ulong2 __ovld __cnfn convert_ulong2_rtn(uint2);\n" |
29628 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uint2);\n" |
29629 | "ulong2 __ovld __cnfn convert_ulong2(uint2);\n" |
29630 | "ulong2 __ovld __cnfn convert_ulong2_sat(uint2);\n" |
29631 | "ulong2 __ovld __cnfn convert_ulong2_rte(long2);\n" |
29632 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(long2);\n" |
29633 | "ulong2 __ovld __cnfn convert_ulong2_rtz(long2);\n" |
29634 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(long2);\n" |
29635 | "ulong2 __ovld __cnfn convert_ulong2_rtp(long2);\n" |
29636 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(long2);\n" |
29637 | "ulong2 __ovld __cnfn convert_ulong2_rtn(long2);\n" |
29638 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(long2);\n" |
29639 | "ulong2 __ovld __cnfn convert_ulong2(long2);\n" |
29640 | "ulong2 __ovld __cnfn convert_ulong2_sat(long2);\n" |
29641 | "ulong2 __ovld __cnfn convert_ulong2_rte(ulong2);\n" |
29642 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(ulong2);\n" |
29643 | "ulong2 __ovld __cnfn convert_ulong2_rtz(ulong2);\n" |
29644 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ulong2);\n" |
29645 | "ulong2 __ovld __cnfn convert_ulong2_rtp(ulong2);\n" |
29646 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ulong2);\n" |
29647 | "ulong2 __ovld __cnfn convert_ulong2_rtn(ulong2);\n" |
29648 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ulong2);\n" |
29649 | "ulong2 __ovld __cnfn convert_ulong2(ulong2);\n" |
29650 | "ulong2 __ovld __cnfn convert_ulong2_sat(ulong2);\n" |
29651 | "ulong2 __ovld __cnfn convert_ulong2_rte(float2);\n" |
29652 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(float2);\n" |
29653 | "ulong2 __ovld __cnfn convert_ulong2_rtz(float2);\n" |
29654 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(float2);\n" |
29655 | "ulong2 __ovld __cnfn convert_ulong2_rtp(float2);\n" |
29656 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(float2);\n" |
29657 | "ulong2 __ovld __cnfn convert_ulong2_rtn(float2);\n" |
29658 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(float2);\n" |
29659 | "ulong2 __ovld __cnfn convert_ulong2(float2);\n" |
29660 | "ulong2 __ovld __cnfn convert_ulong2_sat(float2);\n" |
29661 | "float2 __ovld __cnfn convert_float2_rte(char2);\n" |
29662 | "float2 __ovld __cnfn convert_float2_rtz(char2);\n" |
29663 | "float2 __ovld __cnfn convert_float2_rtp(char2);\n" |
29664 | "float2 __ovld __cnfn convert_float2_rtn(char2);\n" |
29665 | "float2 __ovld __cnfn convert_float2(char2);\n" |
29666 | "float2 __ovld __cnfn convert_float2_rte(uchar2);\n" |
29667 | "float2 __ovld __cnfn convert_float2_rtz(uchar2);\n" |
29668 | "float2 __ovld __cnfn convert_float2_rtp(uchar2);\n" |
29669 | "float2 __ovld __cnfn convert_float2_rtn(uchar2);\n" |
29670 | "float2 __ovld __cnfn convert_float2(uchar2);\n" |
29671 | "float2 __ovld __cnfn convert_float2_rte(short2);\n" |
29672 | "float2 __ovld __cnfn convert_float2_rtz(short2);\n" |
29673 | "float2 __ovld __cnfn convert_float2_rtp(short2);\n" |
29674 | "float2 __ovld __cnfn convert_float2_rtn(short2);\n" |
29675 | "float2 __ovld __cnfn convert_float2(short2);\n" |
29676 | "float2 __ovld __cnfn convert_float2_rte(ushort2);\n" |
29677 | "float2 __ovld __cnfn convert_float2_rtz(ushort2);\n" |
29678 | "float2 __ovld __cnfn convert_float2_rtp(ushort2);\n" |
29679 | "float2 __ovld __cnfn convert_float2_rtn(ushort2);\n" |
29680 | "float2 __ovld __cnfn convert_float2(ushort2);\n" |
29681 | "float2 __ovld __cnfn convert_float2_rte(int2);\n" |
29682 | "float2 __ovld __cnfn convert_float2_rtz(int2);\n" |
29683 | "float2 __ovld __cnfn convert_float2_rtp(int2);\n" |
29684 | "float2 __ovld __cnfn convert_float2_rtn(int2);\n" |
29685 | "float2 __ovld __cnfn convert_float2(int2);\n" |
29686 | "float2 __ovld __cnfn convert_float2_rte(uint2);\n" |
29687 | "float2 __ovld __cnfn convert_float2_rtz(uint2);\n" |
29688 | "float2 __ovld __cnfn convert_float2_rtp(uint2);\n" |
29689 | "float2 __ovld __cnfn convert_float2_rtn(uint2);\n" |
29690 | "float2 __ovld __cnfn convert_float2(uint2);\n" |
29691 | "float2 __ovld __cnfn convert_float2_rte(long2);\n" |
29692 | "float2 __ovld __cnfn convert_float2_rtz(long2);\n" |
29693 | "float2 __ovld __cnfn convert_float2_rtp(long2);\n" |
29694 | "float2 __ovld __cnfn convert_float2_rtn(long2);\n" |
29695 | "float2 __ovld __cnfn convert_float2(long2);\n" |
29696 | "float2 __ovld __cnfn convert_float2_rte(ulong2);\n" |
29697 | "float2 __ovld __cnfn convert_float2_rtz(ulong2);\n" |
29698 | "float2 __ovld __cnfn convert_float2_rtp(ulong2);\n" |
29699 | "float2 __ovld __cnfn convert_float2_rtn(ulong2);\n" |
29700 | "float2 __ovld __cnfn convert_float2(ulong2);\n" |
29701 | "float2 __ovld __cnfn convert_float2_rte(float2);\n" |
29702 | "float2 __ovld __cnfn convert_float2_rtz(float2);\n" |
29703 | "float2 __ovld __cnfn convert_float2_rtp(float2);\n" |
29704 | "float2 __ovld __cnfn convert_float2_rtn(float2);\n" |
29705 | "float2 __ovld __cnfn convert_float2(float2);\n" |
29706 | "char3 __ovld __cnfn convert_char3_rte(char3);\n" |
29707 | "char3 __ovld __cnfn convert_char3_sat_rte(char3);\n" |
29708 | "char3 __ovld __cnfn convert_char3_rtz(char3);\n" |
29709 | "char3 __ovld __cnfn convert_char3_sat_rtz(char3);\n" |
29710 | "char3 __ovld __cnfn convert_char3_rtp(char3);\n" |
29711 | "char3 __ovld __cnfn convert_char3_sat_rtp(char3);\n" |
29712 | "char3 __ovld __cnfn convert_char3_rtn(char3);\n" |
29713 | "char3 __ovld __cnfn convert_char3_sat_rtn(char3);\n" |
29714 | "char3 __ovld __cnfn convert_char3(char3);\n" |
29715 | "char3 __ovld __cnfn convert_char3_sat(char3);\n" |
29716 | "char3 __ovld __cnfn convert_char3_rte(uchar3);\n" |
29717 | "char3 __ovld __cnfn convert_char3_sat_rte(uchar3);\n" |
29718 | "char3 __ovld __cnfn convert_char3_rtz(uchar3);\n" |
29719 | "char3 __ovld __cnfn convert_char3_sat_rtz(uchar3);\n" |
29720 | "char3 __ovld __cnfn convert_char3_rtp(uchar3);\n" |
29721 | "char3 __ovld __cnfn convert_char3_sat_rtp(uchar3);\n" |
29722 | "char3 __ovld __cnfn convert_char3_rtn(uchar3);\n" |
29723 | "char3 __ovld __cnfn convert_char3_sat_rtn(uchar3);\n" |
29724 | "char3 __ovld __cnfn convert_char3(uchar3);\n" |
29725 | "char3 __ovld __cnfn convert_char3_sat(uchar3);\n" |
29726 | "char3 __ovld __cnfn convert_char3_rte(short3);\n" |
29727 | "char3 __ovld __cnfn convert_char3_sat_rte(short3);\n" |
29728 | "char3 __ovld __cnfn convert_char3_rtz(short3);\n" |
29729 | "char3 __ovld __cnfn convert_char3_sat_rtz(short3);\n" |
29730 | "char3 __ovld __cnfn convert_char3_rtp(short3);\n" |
29731 | "char3 __ovld __cnfn convert_char3_sat_rtp(short3);\n" |
29732 | "char3 __ovld __cnfn convert_char3_rtn(short3);\n" |
29733 | "char3 __ovld __cnfn convert_char3_sat_rtn(short3);\n" |
29734 | "char3 __ovld __cnfn convert_char3(short3);\n" |
29735 | "char3 __ovld __cnfn convert_char3_sat(short3);\n" |
29736 | "char3 __ovld __cnfn convert_char3_rte(ushort3);\n" |
29737 | "char3 __ovld __cnfn convert_char3_sat_rte(ushort3);\n" |
29738 | "char3 __ovld __cnfn convert_char3_rtz(ushort3);\n" |
29739 | "char3 __ovld __cnfn convert_char3_sat_rtz(ushort3);\n" |
29740 | "char3 __ovld __cnfn convert_char3_rtp(ushort3);\n" |
29741 | "char3 __ovld __cnfn convert_char3_sat_rtp(ushort3);\n" |
29742 | "char3 __ovld __cnfn convert_char3_rtn(ushort3);\n" |
29743 | "char3 __ovld __cnfn convert_char3_sat_rtn(ushort3);\n" |
29744 | "char3 __ovld __cnfn convert_char3(ushort3);\n" |
29745 | "char3 __ovld __cnfn convert_char3_sat(ushort3);\n" |
29746 | "char3 __ovld __cnfn convert_char3_rte(int3);\n" |
29747 | "char3 __ovld __cnfn convert_char3_sat_rte(int3);\n" |
29748 | "char3 __ovld __cnfn convert_char3_rtz(int3);\n" |
29749 | "char3 __ovld __cnfn convert_char3_sat_rtz(int3);\n" |
29750 | "char3 __ovld __cnfn convert_char3_rtp(int3);\n" |
29751 | "char3 __ovld __cnfn convert_char3_sat_rtp(int3);\n" |
29752 | "char3 __ovld __cnfn convert_char3_rtn(int3);\n" |
29753 | "char3 __ovld __cnfn convert_char3_sat_rtn(int3);\n" |
29754 | "char3 __ovld __cnfn convert_char3(int3);\n" |
29755 | "char3 __ovld __cnfn convert_char3_sat(int3);\n" |
29756 | "char3 __ovld __cnfn convert_char3_rte(uint3);\n" |
29757 | "char3 __ovld __cnfn convert_char3_sat_rte(uint3);\n" |
29758 | "char3 __ovld __cnfn convert_char3_rtz(uint3);\n" |
29759 | "char3 __ovld __cnfn convert_char3_sat_rtz(uint3);\n" |
29760 | "char3 __ovld __cnfn convert_char3_rtp(uint3);\n" |
29761 | "char3 __ovld __cnfn convert_char3_sat_rtp(uint3);\n" |
29762 | "char3 __ovld __cnfn convert_char3_rtn(uint3);\n" |
29763 | "char3 __ovld __cnfn convert_char3_sat_rtn(uint3);\n" |
29764 | "char3 __ovld __cnfn convert_char3(uint3);\n" |
29765 | "char3 __ovld __cnfn convert_char3_sat(uint3);\n" |
29766 | "char3 __ovld __cnfn convert_char3_rte(long3);\n" |
29767 | "char3 __ovld __cnfn convert_char3_sat_rte(long3);\n" |
29768 | "char3 __ovld __cnfn convert_char3_rtz(long3);\n" |
29769 | "char3 __ovld __cnfn convert_char3_sat_rtz(long3);\n" |
29770 | "char3 __ovld __cnfn convert_char3_rtp(long3);\n" |
29771 | "char3 __ovld __cnfn convert_char3_sat_rtp(long3);\n" |
29772 | "char3 __ovld __cnfn convert_char3_rtn(long3);\n" |
29773 | "char3 __ovld __cnfn convert_char3_sat_rtn(long3);\n" |
29774 | "char3 __ovld __cnfn convert_char3(long3);\n" |
29775 | "char3 __ovld __cnfn convert_char3_sat(long3);\n" |
29776 | "char3 __ovld __cnfn convert_char3_rte(ulong3);\n" |
29777 | "char3 __ovld __cnfn convert_char3_sat_rte(ulong3);\n" |
29778 | "char3 __ovld __cnfn convert_char3_rtz(ulong3);\n" |
29779 | "char3 __ovld __cnfn convert_char3_sat_rtz(ulong3);\n" |
29780 | "char3 __ovld __cnfn convert_char3_rtp(ulong3);\n" |
29781 | "char3 __ovld __cnfn convert_char3_sat_rtp(ulong3);\n" |
29782 | "char3 __ovld __cnfn convert_char3_rtn(ulong3);\n" |
29783 | "char3 __ovld __cnfn convert_char3_sat_rtn(ulong3);\n" |
29784 | "char3 __ovld __cnfn convert_char3(ulong3);\n" |
29785 | "char3 __ovld __cnfn convert_char3_sat(ulong3);\n" |
29786 | "char3 __ovld __cnfn convert_char3_rte(float3);\n" |
29787 | "char3 __ovld __cnfn convert_char3_sat_rte(float3);\n" |
29788 | "char3 __ovld __cnfn convert_char3_rtz(float3);\n" |
29789 | "char3 __ovld __cnfn convert_char3_sat_rtz(float3);\n" |
29790 | "char3 __ovld __cnfn convert_char3_rtp(float3);\n" |
29791 | "char3 __ovld __cnfn convert_char3_sat_rtp(float3);\n" |
29792 | "char3 __ovld __cnfn convert_char3_rtn(float3);\n" |
29793 | "char3 __ovld __cnfn convert_char3_sat_rtn(float3);\n" |
29794 | "char3 __ovld __cnfn convert_char3(float3);\n" |
29795 | "char3 __ovld __cnfn convert_char3_sat(float3);\n" |
29796 | "uchar3 __ovld __cnfn convert_uchar3_rte(char3);\n" |
29797 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(char3);\n" |
29798 | "uchar3 __ovld __cnfn convert_uchar3_rtz(char3);\n" |
29799 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(char3);\n" |
29800 | "uchar3 __ovld __cnfn convert_uchar3_rtp(char3);\n" |
29801 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(char3);\n" |
29802 | "uchar3 __ovld __cnfn convert_uchar3_rtn(char3);\n" |
29803 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(char3);\n" |
29804 | "uchar3 __ovld __cnfn convert_uchar3(char3);\n" |
29805 | "uchar3 __ovld __cnfn convert_uchar3_sat(char3);\n" |
29806 | "uchar3 __ovld __cnfn convert_uchar3_rte(uchar3);\n" |
29807 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(uchar3);\n" |
29808 | "uchar3 __ovld __cnfn convert_uchar3_rtz(uchar3);\n" |
29809 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uchar3);\n" |
29810 | "uchar3 __ovld __cnfn convert_uchar3_rtp(uchar3);\n" |
29811 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uchar3);\n" |
29812 | "uchar3 __ovld __cnfn convert_uchar3_rtn(uchar3);\n" |
29813 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uchar3);\n" |
29814 | "uchar3 __ovld __cnfn convert_uchar3(uchar3);\n" |
29815 | "uchar3 __ovld __cnfn convert_uchar3_sat(uchar3);\n" |
29816 | "uchar3 __ovld __cnfn convert_uchar3_rte(short3);\n" |
29817 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(short3);\n" |
29818 | "uchar3 __ovld __cnfn convert_uchar3_rtz(short3);\n" |
29819 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(short3);\n" |
29820 | "uchar3 __ovld __cnfn convert_uchar3_rtp(short3);\n" |
29821 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(short3);\n" |
29822 | "uchar3 __ovld __cnfn convert_uchar3_rtn(short3);\n" |
29823 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(short3);\n" |
29824 | "uchar3 __ovld __cnfn convert_uchar3(short3);\n" |
29825 | "uchar3 __ovld __cnfn convert_uchar3_sat(short3);\n" |
29826 | "uchar3 __ovld __cnfn convert_uchar3_rte(ushort3);\n" |
29827 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(ushort3);\n" |
29828 | "uchar3 __ovld __cnfn convert_uchar3_rtz(ushort3);\n" |
29829 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ushort3);\n" |
29830 | "uchar3 __ovld __cnfn convert_uchar3_rtp(ushort3);\n" |
29831 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ushort3);\n" |
29832 | "uchar3 __ovld __cnfn convert_uchar3_rtn(ushort3);\n" |
29833 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ushort3);\n" |
29834 | "uchar3 __ovld __cnfn convert_uchar3(ushort3);\n" |
29835 | "uchar3 __ovld __cnfn convert_uchar3_sat(ushort3);\n" |
29836 | "uchar3 __ovld __cnfn convert_uchar3_rte(int3);\n" |
29837 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(int3);\n" |
29838 | "uchar3 __ovld __cnfn convert_uchar3_rtz(int3);\n" |
29839 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(int3);\n" |
29840 | "uchar3 __ovld __cnfn convert_uchar3_rtp(int3);\n" |
29841 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(int3);\n" |
29842 | "uchar3 __ovld __cnfn convert_uchar3_rtn(int3);\n" |
29843 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(int3);\n" |
29844 | "uchar3 __ovld __cnfn convert_uchar3(int3);\n" |
29845 | "uchar3 __ovld __cnfn convert_uchar3_sat(int3);\n" |
29846 | "uchar3 __ovld __cnfn convert_uchar3_rte(uint3);\n" |
29847 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(uint3);\n" |
29848 | "uchar3 __ovld __cnfn convert_uchar3_rtz(uint3);\n" |
29849 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uint3);\n" |
29850 | "uchar3 __ovld __cnfn convert_uchar3_rtp(uint3);\n" |
29851 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uint3);\n" |
29852 | "uchar3 __ovld __cnfn convert_uchar3_rtn(uint3);\n" |
29853 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uint3);\n" |
29854 | "uchar3 __ovld __cnfn convert_uchar3(uint3);\n" |
29855 | "uchar3 __ovld __cnfn convert_uchar3_sat(uint3);\n" |
29856 | "uchar3 __ovld __cnfn convert_uchar3_rte(long3);\n" |
29857 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(long3);\n" |
29858 | "uchar3 __ovld __cnfn convert_uchar3_rtz(long3);\n" |
29859 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(long3);\n" |
29860 | "uchar3 __ovld __cnfn convert_uchar3_rtp(long3);\n" |
29861 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(long3);\n" |
29862 | "uchar3 __ovld __cnfn convert_uchar3_rtn(long3);\n" |
29863 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(long3);\n" |
29864 | "uchar3 __ovld __cnfn convert_uchar3(long3);\n" |
29865 | "uchar3 __ovld __cnfn convert_uchar3_sat(long3);\n" |
29866 | "uchar3 __ovld __cnfn convert_uchar3_rte(ulong3);\n" |
29867 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(ulong3);\n" |
29868 | "uchar3 __ovld __cnfn convert_uchar3_rtz(ulong3);\n" |
29869 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ulong3);\n" |
29870 | "uchar3 __ovld __cnfn convert_uchar3_rtp(ulong3);\n" |
29871 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ulong3);\n" |
29872 | "uchar3 __ovld __cnfn convert_uchar3_rtn(ulong3);\n" |
29873 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ulong3);\n" |
29874 | "uchar3 __ovld __cnfn convert_uchar3(ulong3);\n" |
29875 | "uchar3 __ovld __cnfn convert_uchar3_sat(ulong3);\n" |
29876 | "uchar3 __ovld __cnfn convert_uchar3_rte(float3);\n" |
29877 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(float3);\n" |
29878 | "uchar3 __ovld __cnfn convert_uchar3_rtz(float3);\n" |
29879 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(float3);\n" |
29880 | "uchar3 __ovld __cnfn convert_uchar3_rtp(float3);\n" |
29881 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(float3);\n" |
29882 | "uchar3 __ovld __cnfn convert_uchar3_rtn(float3);\n" |
29883 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(float3);\n" |
29884 | "uchar3 __ovld __cnfn convert_uchar3(float3);\n" |
29885 | "uchar3 __ovld __cnfn convert_uchar3_sat(float3);\n" |
29886 | "short3 __ovld __cnfn convert_short3_rte(char3);\n" |
29887 | "short3 __ovld __cnfn convert_short3_sat_rte(char3);\n" |
29888 | "short3 __ovld __cnfn convert_short3_rtz(char3);\n" |
29889 | "short3 __ovld __cnfn convert_short3_sat_rtz(char3);\n" |
29890 | "short3 __ovld __cnfn convert_short3_rtp(char3);\n" |
29891 | "short3 __ovld __cnfn convert_short3_sat_rtp(char3);\n" |
29892 | "short3 __ovld __cnfn convert_short3_rtn(char3);\n" |
29893 | "short3 __ovld __cnfn convert_short3_sat_rtn(char3);\n" |
29894 | "short3 __ovld __cnfn convert_short3(char3);\n" |
29895 | "short3 __ovld __cnfn convert_short3_sat(char3);\n" |
29896 | "short3 __ovld __cnfn convert_short3_rte(uchar3);\n" |
29897 | "short3 __ovld __cnfn convert_short3_sat_rte(uchar3);\n" |
29898 | "short3 __ovld __cnfn convert_short3_rtz(uchar3);\n" |
29899 | "short3 __ovld __cnfn convert_short3_sat_rtz(uchar3);\n" |
29900 | "short3 __ovld __cnfn convert_short3_rtp(uchar3);\n" |
29901 | "short3 __ovld __cnfn convert_short3_sat_rtp(uchar3);\n" |
29902 | "short3 __ovld __cnfn convert_short3_rtn(uchar3);\n" |
29903 | "short3 __ovld __cnfn convert_short3_sat_rtn(uchar3);\n" |
29904 | "short3 __ovld __cnfn convert_short3(uchar3);\n" |
29905 | "short3 __ovld __cnfn convert_short3_sat(uchar3);\n" |
29906 | "short3 __ovld __cnfn convert_short3_rte(short3);\n" |
29907 | "short3 __ovld __cnfn convert_short3_sat_rte(short3);\n" |
29908 | "short3 __ovld __cnfn convert_short3_rtz(short3);\n" |
29909 | "short3 __ovld __cnfn convert_short3_sat_rtz(short3);\n" |
29910 | "short3 __ovld __cnfn convert_short3_rtp(short3);\n" |
29911 | "short3 __ovld __cnfn convert_short3_sat_rtp(short3);\n" |
29912 | "short3 __ovld __cnfn convert_short3_rtn(short3);\n" |
29913 | "short3 __ovld __cnfn convert_short3_sat_rtn(short3);\n" |
29914 | "short3 __ovld __cnfn convert_short3(short3);\n" |
29915 | "short3 __ovld __cnfn convert_short3_sat(short3);\n" |
29916 | "short3 __ovld __cnfn convert_short3_rte(ushort3);\n" |
29917 | "short3 __ovld __cnfn convert_short3_sat_rte(ushort3);\n" |
29918 | "short3 __ovld __cnfn convert_short3_rtz(ushort3);\n" |
29919 | "short3 __ovld __cnfn convert_short3_sat_rtz(ushort3);\n" |
29920 | "short3 __ovld __cnfn convert_short3_rtp(ushort3);\n" |
29921 | "short3 __ovld __cnfn convert_short3_sat_rtp(ushort3);\n" |
29922 | "short3 __ovld __cnfn convert_short3_rtn(ushort3);\n" |
29923 | "short3 __ovld __cnfn convert_short3_sat_rtn(ushort3);\n" |
29924 | "short3 __ovld __cnfn convert_short3(ushort3);\n" |
29925 | "short3 __ovld __cnfn convert_short3_sat(ushort3);\n" |
29926 | "short3 __ovld __cnfn convert_short3_rte(int3);\n" |
29927 | "short3 __ovld __cnfn convert_short3_sat_rte(int3);\n" |
29928 | "short3 __ovld __cnfn convert_short3_rtz(int3);\n" |
29929 | "short3 __ovld __cnfn convert_short3_sat_rtz(int3);\n" |
29930 | "short3 __ovld __cnfn convert_short3_rtp(int3);\n" |
29931 | "short3 __ovld __cnfn convert_short3_sat_rtp(int3);\n" |
29932 | "short3 __ovld __cnfn convert_short3_rtn(int3);\n" |
29933 | "short3 __ovld __cnfn convert_short3_sat_rtn(int3);\n" |
29934 | "short3 __ovld __cnfn convert_short3(int3);\n" |
29935 | "short3 __ovld __cnfn convert_short3_sat(int3);\n" |
29936 | "short3 __ovld __cnfn convert_short3_rte(uint3);\n" |
29937 | "short3 __ovld __cnfn convert_short3_sat_rte(uint3);\n" |
29938 | "short3 __ovld __cnfn convert_short3_rtz(uint3);\n" |
29939 | "short3 __ovld __cnfn convert_short3_sat_rtz(uint3);\n" |
29940 | "short3 __ovld __cnfn convert_short3_rtp(uint3);\n" |
29941 | "short3 __ovld __cnfn convert_short3_sat_rtp(uint3);\n" |
29942 | "short3 __ovld __cnfn convert_short3_rtn(uint3);\n" |
29943 | "short3 __ovld __cnfn convert_short3_sat_rtn(uint3);\n" |
29944 | "short3 __ovld __cnfn convert_short3(uint3);\n" |
29945 | "short3 __ovld __cnfn convert_short3_sat(uint3);\n" |
29946 | "short3 __ovld __cnfn convert_short3_rte(long3);\n" |
29947 | "short3 __ovld __cnfn convert_short3_sat_rte(long3);\n" |
29948 | "short3 __ovld __cnfn convert_short3_rtz(long3);\n" |
29949 | "short3 __ovld __cnfn convert_short3_sat_rtz(long3);\n" |
29950 | "short3 __ovld __cnfn convert_short3_rtp(long3);\n" |
29951 | "short3 __ovld __cnfn convert_short3_sat_rtp(long3);\n" |
29952 | "short3 __ovld __cnfn convert_short3_rtn(long3);\n" |
29953 | "short3 __ovld __cnfn convert_short3_sat_rtn(long3);\n" |
29954 | "short3 __ovld __cnfn convert_short3(long3);\n" |
29955 | "short3 __ovld __cnfn convert_short3_sat(long3);\n" |
29956 | "short3 __ovld __cnfn convert_short3_rte(ulong3);\n" |
29957 | "short3 __ovld __cnfn convert_short3_sat_rte(ulong3);\n" |
29958 | "short3 __ovld __cnfn convert_short3_rtz(ulong3);\n" |
29959 | "short3 __ovld __cnfn convert_short3_sat_rtz(ulong3);\n" |
29960 | "short3 __ovld __cnfn convert_short3_rtp(ulong3);\n" |
29961 | "short3 __ovld __cnfn convert_short3_sat_rtp(ulong3);\n" |
29962 | "short3 __ovld __cnfn convert_short3_rtn(ulong3);\n" |
29963 | "short3 __ovld __cnfn convert_short3_sat_rtn(ulong3);\n" |
29964 | "short3 __ovld __cnfn convert_short3(ulong3);\n" |
29965 | "short3 __ovld __cnfn convert_short3_sat(ulong3);\n" |
29966 | "short3 __ovld __cnfn convert_short3_rte(float3);\n" |
29967 | "short3 __ovld __cnfn convert_short3_sat_rte(float3);\n" |
29968 | "short3 __ovld __cnfn convert_short3_rtz(float3);\n" |
29969 | "short3 __ovld __cnfn convert_short3_sat_rtz(float3);\n" |
29970 | "short3 __ovld __cnfn convert_short3_rtp(float3);\n" |
29971 | "short3 __ovld __cnfn convert_short3_sat_rtp(float3);\n" |
29972 | "short3 __ovld __cnfn convert_short3_rtn(float3);\n" |
29973 | "short3 __ovld __cnfn convert_short3_sat_rtn(float3);\n" |
29974 | "short3 __ovld __cnfn convert_short3(float3);\n" |
29975 | "short3 __ovld __cnfn convert_short3_sat(float3);\n" |
29976 | "ushort3 __ovld __cnfn convert_ushort3_rte(char3);\n" |
29977 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(char3);\n" |
29978 | "ushort3 __ovld __cnfn convert_ushort3_rtz(char3);\n" |
29979 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(char3);\n" |
29980 | "ushort3 __ovld __cnfn convert_ushort3_rtp(char3);\n" |
29981 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(char3);\n" |
29982 | "ushort3 __ovld __cnfn convert_ushort3_rtn(char3);\n" |
29983 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(char3);\n" |
29984 | "ushort3 __ovld __cnfn convert_ushort3(char3);\n" |
29985 | "ushort3 __ovld __cnfn convert_ushort3_sat(char3);\n" |
29986 | "ushort3 __ovld __cnfn convert_ushort3_rte(uchar3);\n" |
29987 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(uchar3);\n" |
29988 | "ushort3 __ovld __cnfn convert_ushort3_rtz(uchar3);\n" |
29989 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uchar3);\n" |
29990 | "ushort3 __ovld __cnfn convert_ushort3_rtp(uchar3);\n" |
29991 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uchar3);\n" |
29992 | "ushort3 __ovld __cnfn convert_ushort3_rtn(uchar3);\n" |
29993 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uchar3);\n" |
29994 | "ushort3 __ovld __cnfn convert_ushort3(uchar3);\n" |
29995 | "ushort3 __ovld __cnfn convert_ushort3_sat(uchar3);\n" |
29996 | "ushort3 __ovld __cnfn convert_ushort3_rte(short3);\n" |
29997 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(short3);\n" |
29998 | "ushort3 __ovld __cnfn convert_ushort3_rtz(short3);\n" |
29999 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(short3);\n" |
30000 | "ushort3 __ovld __cnfn convert_ushort3_rtp(short3);\n" |
30001 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(short3);\n" |
30002 | "ushort3 __ovld __cnfn convert_ushort3_rtn(short3);\n" |
30003 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(short3);\n" |
30004 | "ushort3 __ovld __cnfn convert_ushort3(short3);\n" |
30005 | "ushort3 __ovld __cnfn convert_ushort3_sat(short3);\n" |
30006 | "ushort3 __ovld __cnfn convert_ushort3_rte(ushort3);\n" |
30007 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(ushort3);\n" |
30008 | "ushort3 __ovld __cnfn convert_ushort3_rtz(ushort3);\n" |
30009 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ushort3);\n" |
30010 | "ushort3 __ovld __cnfn convert_ushort3_rtp(ushort3);\n" |
30011 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ushort3);\n" |
30012 | "ushort3 __ovld __cnfn convert_ushort3_rtn(ushort3);\n" |
30013 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ushort3);\n" |
30014 | "ushort3 __ovld __cnfn convert_ushort3(ushort3);\n" |
30015 | "ushort3 __ovld __cnfn convert_ushort3_sat(ushort3);\n" |
30016 | "ushort3 __ovld __cnfn convert_ushort3_rte(int3);\n" |
30017 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(int3);\n" |
30018 | "ushort3 __ovld __cnfn convert_ushort3_rtz(int3);\n" |
30019 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(int3);\n" |
30020 | "ushort3 __ovld __cnfn convert_ushort3_rtp(int3);\n" |
30021 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(int3);\n" |
30022 | "ushort3 __ovld __cnfn convert_ushort3_rtn(int3);\n" |
30023 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(int3);\n" |
30024 | "ushort3 __ovld __cnfn convert_ushort3(int3);\n" |
30025 | "ushort3 __ovld __cnfn convert_ushort3_sat(int3);\n" |
30026 | "ushort3 __ovld __cnfn convert_ushort3_rte(uint3);\n" |
30027 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(uint3);\n" |
30028 | "ushort3 __ovld __cnfn convert_ushort3_rtz(uint3);\n" |
30029 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uint3);\n" |
30030 | "ushort3 __ovld __cnfn convert_ushort3_rtp(uint3);\n" |
30031 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uint3);\n" |
30032 | "ushort3 __ovld __cnfn convert_ushort3_rtn(uint3);\n" |
30033 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uint3);\n" |
30034 | "ushort3 __ovld __cnfn convert_ushort3(uint3);\n" |
30035 | "ushort3 __ovld __cnfn convert_ushort3_sat(uint3);\n" |
30036 | "ushort3 __ovld __cnfn convert_ushort3_rte(long3);\n" |
30037 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(long3);\n" |
30038 | "ushort3 __ovld __cnfn convert_ushort3_rtz(long3);\n" |
30039 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(long3);\n" |
30040 | "ushort3 __ovld __cnfn convert_ushort3_rtp(long3);\n" |
30041 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(long3);\n" |
30042 | "ushort3 __ovld __cnfn convert_ushort3_rtn(long3);\n" |
30043 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(long3);\n" |
30044 | "ushort3 __ovld __cnfn convert_ushort3(long3);\n" |
30045 | "ushort3 __ovld __cnfn convert_ushort3_sat(long3);\n" |
30046 | "ushort3 __ovld __cnfn convert_ushort3_rte(ulong3);\n" |
30047 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(ulong3);\n" |
30048 | "ushort3 __ovld __cnfn convert_ushort3_rtz(ulong3);\n" |
30049 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ulong3);\n" |
30050 | "ushort3 __ovld __cnfn convert_ushort3_rtp(ulong3);\n" |
30051 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ulong3);\n" |
30052 | "ushort3 __ovld __cnfn convert_ushort3_rtn(ulong3);\n" |
30053 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ulong3);\n" |
30054 | "ushort3 __ovld __cnfn convert_ushort3(ulong3);\n" |
30055 | "ushort3 __ovld __cnfn convert_ushort3_sat(ulong3);\n" |
30056 | "ushort3 __ovld __cnfn convert_ushort3_rte(float3);\n" |
30057 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(float3);\n" |
30058 | "ushort3 __ovld __cnfn convert_ushort3_rtz(float3);\n" |
30059 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(float3);\n" |
30060 | "ushort3 __ovld __cnfn convert_ushort3_rtp(float3);\n" |
30061 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(float3);\n" |
30062 | "ushort3 __ovld __cnfn convert_ushort3_rtn(float3);\n" |
30063 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(float3);\n" |
30064 | "ushort3 __ovld __cnfn convert_ushort3(float3);\n" |
30065 | "ushort3 __ovld __cnfn convert_ushort3_sat(float3);\n" |
30066 | "int3 __ovld __cnfn convert_int3_rte(char3);\n" |
30067 | "int3 __ovld __cnfn convert_int3_sat_rte(char3);\n" |
30068 | "int3 __ovld __cnfn convert_int3_rtz(char3);\n" |
30069 | "int3 __ovld __cnfn convert_int3_sat_rtz(char3);\n" |
30070 | "int3 __ovld __cnfn convert_int3_rtp(char3);\n" |
30071 | "int3 __ovld __cnfn convert_int3_sat_rtp(char3);\n" |
30072 | "int3 __ovld __cnfn convert_int3_rtn(char3);\n" |
30073 | "int3 __ovld __cnfn convert_int3_sat_rtn(char3);\n" |
30074 | "int3 __ovld __cnfn convert_int3(char3);\n" |
30075 | "int3 __ovld __cnfn convert_int3_sat(char3);\n" |
30076 | "int3 __ovld __cnfn convert_int3_rte(uchar3);\n" |
30077 | "int3 __ovld __cnfn convert_int3_sat_rte(uchar3);\n" |
30078 | "int3 __ovld __cnfn convert_int3_rtz(uchar3);\n" |
30079 | "int3 __ovld __cnfn convert_int3_sat_rtz(uchar3);\n" |
30080 | "int3 __ovld __cnfn convert_int3_rtp(uchar3);\n" |
30081 | "int3 __ovld __cnfn convert_int3_sat_rtp(uchar3);\n" |
30082 | "int3 __ovld __cnfn convert_int3_rtn(uchar3);\n" |
30083 | "int3 __ovld __cnfn convert_int3_sat_rtn(uchar3);\n" |
30084 | "int3 __ovld __cnfn convert_int3(uchar3);\n" |
30085 | "int3 __ovld __cnfn convert_int3_sat(uchar3);\n" |
30086 | "int3 __ovld __cnfn convert_int3_rte(short3);\n" |
30087 | "int3 __ovld __cnfn convert_int3_sat_rte(short3);\n" |
30088 | "int3 __ovld __cnfn convert_int3_rtz(short3);\n" |
30089 | "int3 __ovld __cnfn convert_int3_sat_rtz(short3);\n" |
30090 | "int3 __ovld __cnfn convert_int3_rtp(short3);\n" |
30091 | "int3 __ovld __cnfn convert_int3_sat_rtp(short3);\n" |
30092 | "int3 __ovld __cnfn convert_int3_rtn(short3);\n" |
30093 | "int3 __ovld __cnfn convert_int3_sat_rtn(short3);\n" |
30094 | "int3 __ovld __cnfn convert_int3(short3);\n" |
30095 | "int3 __ovld __cnfn convert_int3_sat(short3);\n" |
30096 | "int3 __ovld __cnfn convert_int3_rte(ushort3);\n" |
30097 | "int3 __ovld __cnfn convert_int3_sat_rte(ushort3);\n" |
30098 | "int3 __ovld __cnfn convert_int3_rtz(ushort3);\n" |
30099 | "int3 __ovld __cnfn convert_int3_sat_rtz(ushort3);\n" |
30100 | "int3 __ovld __cnfn convert_int3_rtp(ushort3);\n" |
30101 | "int3 __ovld __cnfn convert_int3_sat_rtp(ushort3);\n" |
30102 | "int3 __ovld __cnfn convert_int3_rtn(ushort3);\n" |
30103 | "int3 __ovld __cnfn convert_int3_sat_rtn(ushort3);\n" |
30104 | "int3 __ovld __cnfn convert_int3(ushort3);\n" |
30105 | "int3 __ovld __cnfn convert_int3_sat(ushort3);\n" |
30106 | "int3 __ovld __cnfn convert_int3_rte(int3);\n" |
30107 | "int3 __ovld __cnfn convert_int3_sat_rte(int3);\n" |
30108 | "int3 __ovld __cnfn convert_int3_rtz(int3);\n" |
30109 | "int3 __ovld __cnfn convert_int3_sat_rtz(int3);\n" |
30110 | "int3 __ovld __cnfn convert_int3_rtp(int3);\n" |
30111 | "int3 __ovld __cnfn convert_int3_sat_rtp(int3);\n" |
30112 | "int3 __ovld __cnfn convert_int3_rtn(int3);\n" |
30113 | "int3 __ovld __cnfn convert_int3_sat_rtn(int3);\n" |
30114 | "int3 __ovld __cnfn convert_int3(int3);\n" |
30115 | "int3 __ovld __cnfn convert_int3_sat(int3);\n" |
30116 | "int3 __ovld __cnfn convert_int3_rte(uint3);\n" |
30117 | "int3 __ovld __cnfn convert_int3_sat_rte(uint3);\n" |
30118 | "int3 __ovld __cnfn convert_int3_rtz(uint3);\n" |
30119 | "int3 __ovld __cnfn convert_int3_sat_rtz(uint3);\n" |
30120 | "int3 __ovld __cnfn convert_int3_rtp(uint3);\n" |
30121 | "int3 __ovld __cnfn convert_int3_sat_rtp(uint3);\n" |
30122 | "int3 __ovld __cnfn convert_int3_rtn(uint3);\n" |
30123 | "int3 __ovld __cnfn convert_int3_sat_rtn(uint3);\n" |
30124 | "int3 __ovld __cnfn convert_int3(uint3);\n" |
30125 | "int3 __ovld __cnfn convert_int3_sat(uint3);\n" |
30126 | "int3 __ovld __cnfn convert_int3_rte(long3);\n" |
30127 | "int3 __ovld __cnfn convert_int3_sat_rte(long3);\n" |
30128 | "int3 __ovld __cnfn convert_int3_rtz(long3);\n" |
30129 | "int3 __ovld __cnfn convert_int3_sat_rtz(long3);\n" |
30130 | "int3 __ovld __cnfn convert_int3_rtp(long3);\n" |
30131 | "int3 __ovld __cnfn convert_int3_sat_rtp(long3);\n" |
30132 | "int3 __ovld __cnfn convert_int3_rtn(long3);\n" |
30133 | "int3 __ovld __cnfn convert_int3_sat_rtn(long3);\n" |
30134 | "int3 __ovld __cnfn convert_int3(long3);\n" |
30135 | "int3 __ovld __cnfn convert_int3_sat(long3);\n" |
30136 | "int3 __ovld __cnfn convert_int3_rte(ulong3);\n" |
30137 | "int3 __ovld __cnfn convert_int3_sat_rte(ulong3);\n" |
30138 | "int3 __ovld __cnfn convert_int3_rtz(ulong3);\n" |
30139 | "int3 __ovld __cnfn convert_int3_sat_rtz(ulong3);\n" |
30140 | "int3 __ovld __cnfn convert_int3_rtp(ulong3);\n" |
30141 | "int3 __ovld __cnfn convert_int3_sat_rtp(ulong3);\n" |
30142 | "int3 __ovld __cnfn convert_int3_rtn(ulong3);\n" |
30143 | "int3 __ovld __cnfn convert_int3_sat_rtn(ulong3);\n" |
30144 | "int3 __ovld __cnfn convert_int3(ulong3);\n" |
30145 | "int3 __ovld __cnfn convert_int3_sat(ulong3);\n" |
30146 | "int3 __ovld __cnfn convert_int3_rte(float3);\n" |
30147 | "int3 __ovld __cnfn convert_int3_sat_rte(float3);\n" |
30148 | "int3 __ovld __cnfn convert_int3_rtz(float3);\n" |
30149 | "int3 __ovld __cnfn convert_int3_sat_rtz(float3);\n" |
30150 | "int3 __ovld __cnfn convert_int3_rtp(float3);\n" |
30151 | "int3 __ovld __cnfn convert_int3_sat_rtp(float3);\n" |
30152 | "int3 __ovld __cnfn convert_int3_rtn(float3);\n" |
30153 | "int3 __ovld __cnfn convert_int3_sat_rtn(float3);\n" |
30154 | "int3 __ovld __cnfn convert_int3(float3);\n" |
30155 | "int3 __ovld __cnfn convert_int3_sat(float3);\n" |
30156 | "uint3 __ovld __cnfn convert_uint3_rte(char3);\n" |
30157 | "uint3 __ovld __cnfn convert_uint3_sat_rte(char3);\n" |
30158 | "uint3 __ovld __cnfn convert_uint3_rtz(char3);\n" |
30159 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(char3);\n" |
30160 | "uint3 __ovld __cnfn convert_uint3_rtp(char3);\n" |
30161 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(char3);\n" |
30162 | "uint3 __ovld __cnfn convert_uint3_rtn(char3);\n" |
30163 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(char3);\n" |
30164 | "uint3 __ovld __cnfn convert_uint3(char3);\n" |
30165 | "uint3 __ovld __cnfn convert_uint3_sat(char3);\n" |
30166 | "uint3 __ovld __cnfn convert_uint3_rte(uchar3);\n" |
30167 | "uint3 __ovld __cnfn convert_uint3_sat_rte(uchar3);\n" |
30168 | "uint3 __ovld __cnfn convert_uint3_rtz(uchar3);\n" |
30169 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(uchar3);\n" |
30170 | "uint3 __ovld __cnfn convert_uint3_rtp(uchar3);\n" |
30171 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(uchar3);\n" |
30172 | "uint3 __ovld __cnfn convert_uint3_rtn(uchar3);\n" |
30173 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(uchar3);\n" |
30174 | "uint3 __ovld __cnfn convert_uint3(uchar3);\n" |
30175 | "uint3 __ovld __cnfn convert_uint3_sat(uchar3);\n" |
30176 | "uint3 __ovld __cnfn convert_uint3_rte(short3);\n" |
30177 | "uint3 __ovld __cnfn convert_uint3_sat_rte(short3);\n" |
30178 | "uint3 __ovld __cnfn convert_uint3_rtz(short3);\n" |
30179 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(short3);\n" |
30180 | "uint3 __ovld __cnfn convert_uint3_rtp(short3);\n" |
30181 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(short3);\n" |
30182 | "uint3 __ovld __cnfn convert_uint3_rtn(short3);\n" |
30183 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(short3);\n" |
30184 | "uint3 __ovld __cnfn convert_uint3(short3);\n" |
30185 | "uint3 __ovld __cnfn convert_uint3_sat(short3);\n" |
30186 | "uint3 __ovld __cnfn convert_uint3_rte(ushort3);\n" |
30187 | "uint3 __ovld __cnfn convert_uint3_sat_rte(ushort3);\n" |
30188 | "uint3 __ovld __cnfn convert_uint3_rtz(ushort3);\n" |
30189 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(ushort3);\n" |
30190 | "uint3 __ovld __cnfn convert_uint3_rtp(ushort3);\n" |
30191 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(ushort3);\n" |
30192 | "uint3 __ovld __cnfn convert_uint3_rtn(ushort3);\n" |
30193 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(ushort3);\n" |
30194 | "uint3 __ovld __cnfn convert_uint3(ushort3);\n" |
30195 | "uint3 __ovld __cnfn convert_uint3_sat(ushort3);\n" |
30196 | "uint3 __ovld __cnfn convert_uint3_rte(int3);\n" |
30197 | "uint3 __ovld __cnfn convert_uint3_sat_rte(int3);\n" |
30198 | "uint3 __ovld __cnfn convert_uint3_rtz(int3);\n" |
30199 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(int3);\n" |
30200 | "uint3 __ovld __cnfn convert_uint3_rtp(int3);\n" |
30201 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(int3);\n" |
30202 | "uint3 __ovld __cnfn convert_uint3_rtn(int3);\n" |
30203 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(int3);\n" |
30204 | "uint3 __ovld __cnfn convert_uint3(int3);\n" |
30205 | "uint3 __ovld __cnfn convert_uint3_sat(int3);\n" |
30206 | "uint3 __ovld __cnfn convert_uint3_rte(uint3);\n" |
30207 | "uint3 __ovld __cnfn convert_uint3_sat_rte(uint3);\n" |
30208 | "uint3 __ovld __cnfn convert_uint3_rtz(uint3);\n" |
30209 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(uint3);\n" |
30210 | "uint3 __ovld __cnfn convert_uint3_rtp(uint3);\n" |
30211 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(uint3);\n" |
30212 | "uint3 __ovld __cnfn convert_uint3_rtn(uint3);\n" |
30213 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(uint3);\n" |
30214 | "uint3 __ovld __cnfn convert_uint3(uint3);\n" |
30215 | "uint3 __ovld __cnfn convert_uint3_sat(uint3);\n" |
30216 | "uint3 __ovld __cnfn convert_uint3_rte(long3);\n" |
30217 | "uint3 __ovld __cnfn convert_uint3_sat_rte(long3);\n" |
30218 | "uint3 __ovld __cnfn convert_uint3_rtz(long3);\n" |
30219 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(long3);\n" |
30220 | "uint3 __ovld __cnfn convert_uint3_rtp(long3);\n" |
30221 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(long3);\n" |
30222 | "uint3 __ovld __cnfn convert_uint3_rtn(long3);\n" |
30223 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(long3);\n" |
30224 | "uint3 __ovld __cnfn convert_uint3(long3);\n" |
30225 | "uint3 __ovld __cnfn convert_uint3_sat(long3);\n" |
30226 | "uint3 __ovld __cnfn convert_uint3_rte(ulong3);\n" |
30227 | "uint3 __ovld __cnfn convert_uint3_sat_rte(ulong3);\n" |
30228 | "uint3 __ovld __cnfn convert_uint3_rtz(ulong3);\n" |
30229 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(ulong3);\n" |
30230 | "uint3 __ovld __cnfn convert_uint3_rtp(ulong3);\n" |
30231 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(ulong3);\n" |
30232 | "uint3 __ovld __cnfn convert_uint3_rtn(ulong3);\n" |
30233 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(ulong3);\n" |
30234 | "uint3 __ovld __cnfn convert_uint3(ulong3);\n" |
30235 | "uint3 __ovld __cnfn convert_uint3_sat(ulong3);\n" |
30236 | "uint3 __ovld __cnfn convert_uint3_rte(float3);\n" |
30237 | "uint3 __ovld __cnfn convert_uint3_sat_rte(float3);\n" |
30238 | "uint3 __ovld __cnfn convert_uint3_rtz(float3);\n" |
30239 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(float3);\n" |
30240 | "uint3 __ovld __cnfn convert_uint3_rtp(float3);\n" |
30241 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(float3);\n" |
30242 | "uint3 __ovld __cnfn convert_uint3_rtn(float3);\n" |
30243 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(float3);\n" |
30244 | "uint3 __ovld __cnfn convert_uint3(float3);\n" |
30245 | "uint3 __ovld __cnfn convert_uint3_sat(float3);\n" |
30246 | "long3 __ovld __cnfn convert_long3_rte(char3);\n" |
30247 | "long3 __ovld __cnfn convert_long3_sat_rte(char3);\n" |
30248 | "long3 __ovld __cnfn convert_long3_rtz(char3);\n" |
30249 | "long3 __ovld __cnfn convert_long3_sat_rtz(char3);\n" |
30250 | "long3 __ovld __cnfn convert_long3_rtp(char3);\n" |
30251 | "long3 __ovld __cnfn convert_long3_sat_rtp(char3);\n" |
30252 | "long3 __ovld __cnfn convert_long3_rtn(char3);\n" |
30253 | "long3 __ovld __cnfn convert_long3_sat_rtn(char3);\n" |
30254 | "long3 __ovld __cnfn convert_long3(char3);\n" |
30255 | "long3 __ovld __cnfn convert_long3_sat(char3);\n" |
30256 | "long3 __ovld __cnfn convert_long3_rte(uchar3);\n" |
30257 | "long3 __ovld __cnfn convert_long3_sat_rte(uchar3);\n" |
30258 | "long3 __ovld __cnfn convert_long3_rtz(uchar3);\n" |
30259 | "long3 __ovld __cnfn convert_long3_sat_rtz(uchar3);\n" |
30260 | "long3 __ovld __cnfn convert_long3_rtp(uchar3);\n" |
30261 | "long3 __ovld __cnfn convert_long3_sat_rtp(uchar3);\n" |
30262 | "long3 __ovld __cnfn convert_long3_rtn(uchar3);\n" |
30263 | "long3 __ovld __cnfn convert_long3_sat_rtn(uchar3);\n" |
30264 | "long3 __ovld __cnfn convert_long3(uchar3);\n" |
30265 | "long3 __ovld __cnfn convert_long3_sat(uchar3);\n" |
30266 | "long3 __ovld __cnfn convert_long3_rte(short3);\n" |
30267 | "long3 __ovld __cnfn convert_long3_sat_rte(short3);\n" |
30268 | "long3 __ovld __cnfn convert_long3_rtz(short3);\n" |
30269 | "long3 __ovld __cnfn convert_long3_sat_rtz(short3);\n" |
30270 | "long3 __ovld __cnfn convert_long3_rtp(short3);\n" |
30271 | "long3 __ovld __cnfn convert_long3_sat_rtp(short3);\n" |
30272 | "long3 __ovld __cnfn convert_long3_rtn(short3);\n" |
30273 | "long3 __ovld __cnfn convert_long3_sat_rtn(short3);\n" |
30274 | "long3 __ovld __cnfn convert_long3(short3);\n" |
30275 | "long3 __ovld __cnfn convert_long3_sat(short3);\n" |
30276 | "long3 __ovld __cnfn convert_long3_rte(ushort3);\n" |
30277 | "long3 __ovld __cnfn convert_long3_sat_rte(ushort3);\n" |
30278 | "long3 __ovld __cnfn convert_long3_rtz(ushort3);\n" |
30279 | "long3 __ovld __cnfn convert_long3_sat_rtz(ushort3);\n" |
30280 | "long3 __ovld __cnfn convert_long3_rtp(ushort3);\n" |
30281 | "long3 __ovld __cnfn convert_long3_sat_rtp(ushort3);\n" |
30282 | "long3 __ovld __cnfn convert_long3_rtn(ushort3);\n" |
30283 | "long3 __ovld __cnfn convert_long3_sat_rtn(ushort3);\n" |
30284 | "long3 __ovld __cnfn convert_long3(ushort3);\n" |
30285 | "long3 __ovld __cnfn convert_long3_sat(ushort3);\n" |
30286 | "long3 __ovld __cnfn convert_long3_rte(int3);\n" |
30287 | "long3 __ovld __cnfn convert_long3_sat_rte(int3);\n" |
30288 | "long3 __ovld __cnfn convert_long3_rtz(int3);\n" |
30289 | "long3 __ovld __cnfn convert_long3_sat_rtz(int3);\n" |
30290 | "long3 __ovld __cnfn convert_long3_rtp(int3);\n" |
30291 | "long3 __ovld __cnfn convert_long3_sat_rtp(int3);\n" |
30292 | "long3 __ovld __cnfn convert_long3_rtn(int3);\n" |
30293 | "long3 __ovld __cnfn convert_long3_sat_rtn(int3);\n" |
30294 | "long3 __ovld __cnfn convert_long3(int3);\n" |
30295 | "long3 __ovld __cnfn convert_long3_sat(int3);\n" |
30296 | "long3 __ovld __cnfn convert_long3_rte(uint3);\n" |
30297 | "long3 __ovld __cnfn convert_long3_sat_rte(uint3);\n" |
30298 | "long3 __ovld __cnfn convert_long3_rtz(uint3);\n" |
30299 | "long3 __ovld __cnfn convert_long3_sat_rtz(uint3);\n" |
30300 | "long3 __ovld __cnfn convert_long3_rtp(uint3);\n" |
30301 | "long3 __ovld __cnfn convert_long3_sat_rtp(uint3);\n" |
30302 | "long3 __ovld __cnfn convert_long3_rtn(uint3);\n" |
30303 | "long3 __ovld __cnfn convert_long3_sat_rtn(uint3);\n" |
30304 | "long3 __ovld __cnfn convert_long3(uint3);\n" |
30305 | "long3 __ovld __cnfn convert_long3_sat(uint3);\n" |
30306 | "long3 __ovld __cnfn convert_long3_rte(long3);\n" |
30307 | "long3 __ovld __cnfn convert_long3_sat_rte(long3);\n" |
30308 | "long3 __ovld __cnfn convert_long3_rtz(long3);\n" |
30309 | "long3 __ovld __cnfn convert_long3_sat_rtz(long3);\n" |
30310 | "long3 __ovld __cnfn convert_long3_rtp(long3);\n" |
30311 | "long3 __ovld __cnfn convert_long3_sat_rtp(long3);\n" |
30312 | "long3 __ovld __cnfn convert_long3_rtn(long3);\n" |
30313 | "long3 __ovld __cnfn convert_long3_sat_rtn(long3);\n" |
30314 | "long3 __ovld __cnfn convert_long3(long3);\n" |
30315 | "long3 __ovld __cnfn convert_long3_sat(long3);\n" |
30316 | "long3 __ovld __cnfn convert_long3_rte(ulong3);\n" |
30317 | "long3 __ovld __cnfn convert_long3_sat_rte(ulong3);\n" |
30318 | "long3 __ovld __cnfn convert_long3_rtz(ulong3);\n" |
30319 | "long3 __ovld __cnfn convert_long3_sat_rtz(ulong3);\n" |
30320 | "long3 __ovld __cnfn convert_long3_rtp(ulong3);\n" |
30321 | "long3 __ovld __cnfn convert_long3_sat_rtp(ulong3);\n" |
30322 | "long3 __ovld __cnfn convert_long3_rtn(ulong3);\n" |
30323 | "long3 __ovld __cnfn convert_long3_sat_rtn(ulong3);\n" |
30324 | "long3 __ovld __cnfn convert_long3(ulong3);\n" |
30325 | "long3 __ovld __cnfn convert_long3_sat(ulong3);\n" |
30326 | "long3 __ovld __cnfn convert_long3_rte(float3);\n" |
30327 | "long3 __ovld __cnfn convert_long3_sat_rte(float3);\n" |
30328 | "long3 __ovld __cnfn convert_long3_rtz(float3);\n" |
30329 | "long3 __ovld __cnfn convert_long3_sat_rtz(float3);\n" |
30330 | "long3 __ovld __cnfn convert_long3_rtp(float3);\n" |
30331 | "long3 __ovld __cnfn convert_long3_sat_rtp(float3);\n" |
30332 | "long3 __ovld __cnfn convert_long3_rtn(float3);\n" |
30333 | "long3 __ovld __cnfn convert_long3_sat_rtn(float3);\n" |
30334 | "long3 __ovld __cnfn convert_long3(float3);\n" |
30335 | "long3 __ovld __cnfn convert_long3_sat(float3);\n" |
30336 | "ulong3 __ovld __cnfn convert_ulong3_rte(char3);\n" |
30337 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(char3);\n" |
30338 | "ulong3 __ovld __cnfn convert_ulong3_rtz(char3);\n" |
30339 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(char3);\n" |
30340 | "ulong3 __ovld __cnfn convert_ulong3_rtp(char3);\n" |
30341 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(char3);\n" |
30342 | "ulong3 __ovld __cnfn convert_ulong3_rtn(char3);\n" |
30343 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(char3);\n" |
30344 | "ulong3 __ovld __cnfn convert_ulong3(char3);\n" |
30345 | "ulong3 __ovld __cnfn convert_ulong3_sat(char3);\n" |
30346 | "ulong3 __ovld __cnfn convert_ulong3_rte(uchar3);\n" |
30347 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(uchar3);\n" |
30348 | "ulong3 __ovld __cnfn convert_ulong3_rtz(uchar3);\n" |
30349 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uchar3);\n" |
30350 | "ulong3 __ovld __cnfn convert_ulong3_rtp(uchar3);\n" |
30351 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uchar3);\n" |
30352 | "ulong3 __ovld __cnfn convert_ulong3_rtn(uchar3);\n" |
30353 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uchar3);\n" |
30354 | "ulong3 __ovld __cnfn convert_ulong3(uchar3);\n" |
30355 | "ulong3 __ovld __cnfn convert_ulong3_sat(uchar3);\n" |
30356 | "ulong3 __ovld __cnfn convert_ulong3_rte(short3);\n" |
30357 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(short3);\n" |
30358 | "ulong3 __ovld __cnfn convert_ulong3_rtz(short3);\n" |
30359 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(short3);\n" |
30360 | "ulong3 __ovld __cnfn convert_ulong3_rtp(short3);\n" |
30361 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(short3);\n" |
30362 | "ulong3 __ovld __cnfn convert_ulong3_rtn(short3);\n" |
30363 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(short3);\n" |
30364 | "ulong3 __ovld __cnfn convert_ulong3(short3);\n" |
30365 | "ulong3 __ovld __cnfn convert_ulong3_sat(short3);\n" |
30366 | "ulong3 __ovld __cnfn convert_ulong3_rte(ushort3);\n" |
30367 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(ushort3);\n" |
30368 | "ulong3 __ovld __cnfn convert_ulong3_rtz(ushort3);\n" |
30369 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ushort3);\n" |
30370 | "ulong3 __ovld __cnfn convert_ulong3_rtp(ushort3);\n" |
30371 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ushort3);\n" |
30372 | "ulong3 __ovld __cnfn convert_ulong3_rtn(ushort3);\n" |
30373 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ushort3);\n" |
30374 | "ulong3 __ovld __cnfn convert_ulong3(ushort3);\n" |
30375 | "ulong3 __ovld __cnfn convert_ulong3_sat(ushort3);\n" |
30376 | "ulong3 __ovld __cnfn convert_ulong3_rte(int3);\n" |
30377 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(int3);\n" |
30378 | "ulong3 __ovld __cnfn convert_ulong3_rtz(int3);\n" |
30379 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(int3);\n" |
30380 | "ulong3 __ovld __cnfn convert_ulong3_rtp(int3);\n" |
30381 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(int3);\n" |
30382 | "ulong3 __ovld __cnfn convert_ulong3_rtn(int3);\n" |
30383 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(int3);\n" |
30384 | "ulong3 __ovld __cnfn convert_ulong3(int3);\n" |
30385 | "ulong3 __ovld __cnfn convert_ulong3_sat(int3);\n" |
30386 | "ulong3 __ovld __cnfn convert_ulong3_rte(uint3);\n" |
30387 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(uint3);\n" |
30388 | "ulong3 __ovld __cnfn convert_ulong3_rtz(uint3);\n" |
30389 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uint3);\n" |
30390 | "ulong3 __ovld __cnfn convert_ulong3_rtp(uint3);\n" |
30391 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uint3);\n" |
30392 | "ulong3 __ovld __cnfn convert_ulong3_rtn(uint3);\n" |
30393 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uint3);\n" |
30394 | "ulong3 __ovld __cnfn convert_ulong3(uint3);\n" |
30395 | "ulong3 __ovld __cnfn convert_ulong3_sat(uint3);\n" |
30396 | "ulong3 __ovld __cnfn convert_ulong3_rte(long3);\n" |
30397 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(long3);\n" |
30398 | "ulong3 __ovld __cnfn convert_ulong3_rtz(long3);\n" |
30399 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(long3);\n" |
30400 | "ulong3 __ovld __cnfn convert_ulong3_rtp(long3);\n" |
30401 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(long3);\n" |
30402 | "ulong3 __ovld __cnfn convert_ulong3_rtn(long3);\n" |
30403 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(long3);\n" |
30404 | "ulong3 __ovld __cnfn convert_ulong3(long3);\n" |
30405 | "ulong3 __ovld __cnfn convert_ulong3_sat(long3);\n" |
30406 | "ulong3 __ovld __cnfn convert_ulong3_rte(ulong3);\n" |
30407 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(ulong3);\n" |
30408 | "ulong3 __ovld __cnfn convert_ulong3_rtz(ulong3);\n" |
30409 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ulong3);\n" |
30410 | "ulong3 __ovld __cnfn convert_ulong3_rtp(ulong3);\n" |
30411 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ulong3);\n" |
30412 | "ulong3 __ovld __cnfn convert_ulong3_rtn(ulong3);\n" |
30413 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ulong3);\n" |
30414 | "ulong3 __ovld __cnfn convert_ulong3(ulong3);\n" |
30415 | "ulong3 __ovld __cnfn convert_ulong3_sat(ulong3);\n" |
30416 | "ulong3 __ovld __cnfn convert_ulong3_rte(float3);\n" |
30417 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(float3);\n" |
30418 | "ulong3 __ovld __cnfn convert_ulong3_rtz(float3);\n" |
30419 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(float3);\n" |
30420 | "ulong3 __ovld __cnfn convert_ulong3_rtp(float3);\n" |
30421 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(float3);\n" |
30422 | "ulong3 __ovld __cnfn convert_ulong3_rtn(float3);\n" |
30423 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(float3);\n" |
30424 | "ulong3 __ovld __cnfn convert_ulong3(float3);\n" |
30425 | "ulong3 __ovld __cnfn convert_ulong3_sat(float3);\n" |
30426 | "float3 __ovld __cnfn convert_float3_rte(char3);\n" |
30427 | "float3 __ovld __cnfn convert_float3_rtz(char3);\n" |
30428 | "float3 __ovld __cnfn convert_float3_rtp(char3);\n" |
30429 | "float3 __ovld __cnfn convert_float3_rtn(char3);\n" |
30430 | "float3 __ovld __cnfn convert_float3(char3);\n" |
30431 | "float3 __ovld __cnfn convert_float3_rte(uchar3);\n" |
30432 | "float3 __ovld __cnfn convert_float3_rtz(uchar3);\n" |
30433 | "float3 __ovld __cnfn convert_float3_rtp(uchar3);\n" |
30434 | "float3 __ovld __cnfn convert_float3_rtn(uchar3);\n" |
30435 | "float3 __ovld __cnfn convert_float3(uchar3);\n" |
30436 | "float3 __ovld __cnfn convert_float3_rte(short3);\n" |
30437 | "float3 __ovld __cnfn convert_float3_rtz(short3);\n" |
30438 | "float3 __ovld __cnfn convert_float3_rtp(short3);\n" |
30439 | "float3 __ovld __cnfn convert_float3_rtn(short3);\n" |
30440 | "float3 __ovld __cnfn convert_float3(short3);\n" |
30441 | "float3 __ovld __cnfn convert_float3_rte(ushort3);\n" |
30442 | "float3 __ovld __cnfn convert_float3_rtz(ushort3);\n" |
30443 | "float3 __ovld __cnfn convert_float3_rtp(ushort3);\n" |
30444 | "float3 __ovld __cnfn convert_float3_rtn(ushort3);\n" |
30445 | "float3 __ovld __cnfn convert_float3(ushort3);\n" |
30446 | "float3 __ovld __cnfn convert_float3_rte(int3);\n" |
30447 | "float3 __ovld __cnfn convert_float3_rtz(int3);\n" |
30448 | "float3 __ovld __cnfn convert_float3_rtp(int3);\n" |
30449 | "float3 __ovld __cnfn convert_float3_rtn(int3);\n" |
30450 | "float3 __ovld __cnfn convert_float3(int3);\n" |
30451 | "float3 __ovld __cnfn convert_float3_rte(uint3);\n" |
30452 | "float3 __ovld __cnfn convert_float3_rtz(uint3);\n" |
30453 | "float3 __ovld __cnfn convert_float3_rtp(uint3);\n" |
30454 | "float3 __ovld __cnfn convert_float3_rtn(uint3);\n" |
30455 | "float3 __ovld __cnfn convert_float3(uint3);\n" |
30456 | "float3 __ovld __cnfn convert_float3_rte(long3);\n" |
30457 | "float3 __ovld __cnfn convert_float3_rtz(long3);\n" |
30458 | "float3 __ovld __cnfn convert_float3_rtp(long3);\n" |
30459 | "float3 __ovld __cnfn convert_float3_rtn(long3);\n" |
30460 | "float3 __ovld __cnfn convert_float3(long3);\n" |
30461 | "float3 __ovld __cnfn convert_float3_rte(ulong3);\n" |
30462 | "float3 __ovld __cnfn convert_float3_rtz(ulong3);\n" |
30463 | "float3 __ovld __cnfn convert_float3_rtp(ulong3);\n" |
30464 | "float3 __ovld __cnfn convert_float3_rtn(ulong3);\n" |
30465 | "float3 __ovld __cnfn convert_float3(ulong3);\n" |
30466 | "float3 __ovld __cnfn convert_float3_rte(float3);\n" |
30467 | "float3 __ovld __cnfn convert_float3_rtz(float3);\n" |
30468 | "float3 __ovld __cnfn convert_float3_rtp(float3);\n" |
30469 | "float3 __ovld __cnfn convert_float3_rtn(float3);\n" |
30470 | "float3 __ovld __cnfn convert_float3(float3);\n" |
30471 | "char4 __ovld __cnfn convert_char4_rte(char4);\n" |
30472 | "char4 __ovld __cnfn convert_char4_sat_rte(char4);\n" |
30473 | "char4 __ovld __cnfn convert_char4_rtz(char4);\n" |
30474 | "char4 __ovld __cnfn convert_char4_sat_rtz(char4);\n" |
30475 | "char4 __ovld __cnfn convert_char4_rtp(char4);\n" |
30476 | "char4 __ovld __cnfn convert_char4_sat_rtp(char4);\n" |
30477 | "char4 __ovld __cnfn convert_char4_rtn(char4);\n" |
30478 | "char4 __ovld __cnfn convert_char4_sat_rtn(char4);\n" |
30479 | "char4 __ovld __cnfn convert_char4(char4);\n" |
30480 | "char4 __ovld __cnfn convert_char4_sat(char4);\n" |
30481 | "char4 __ovld __cnfn convert_char4_rte(uchar4);\n" |
30482 | "char4 __ovld __cnfn convert_char4_sat_rte(uchar4);\n" |
30483 | "char4 __ovld __cnfn convert_char4_rtz(uchar4);\n" |
30484 | "char4 __ovld __cnfn convert_char4_sat_rtz(uchar4);\n" |
30485 | "char4 __ovld __cnfn convert_char4_rtp(uchar4);\n" |
30486 | "char4 __ovld __cnfn convert_char4_sat_rtp(uchar4);\n" |
30487 | "char4 __ovld __cnfn convert_char4_rtn(uchar4);\n" |
30488 | "char4 __ovld __cnfn convert_char4_sat_rtn(uchar4);\n" |
30489 | "char4 __ovld __cnfn convert_char4(uchar4);\n" |
30490 | "char4 __ovld __cnfn convert_char4_sat(uchar4);\n" |
30491 | "char4 __ovld __cnfn convert_char4_rte(short4);\n" |
30492 | "char4 __ovld __cnfn convert_char4_sat_rte(short4);\n" |
30493 | "char4 __ovld __cnfn convert_char4_rtz(short4);\n" |
30494 | "char4 __ovld __cnfn convert_char4_sat_rtz(short4);\n" |
30495 | "char4 __ovld __cnfn convert_char4_rtp(short4);\n" |
30496 | "char4 __ovld __cnfn convert_char4_sat_rtp(short4);\n" |
30497 | "char4 __ovld __cnfn convert_char4_rtn(short4);\n" |
30498 | "char4 __ovld __cnfn convert_char4_sat_rtn(short4);\n" |
30499 | "char4 __ovld __cnfn convert_char4(short4);\n" |
30500 | "char4 __ovld __cnfn convert_char4_sat(short4);\n" |
30501 | "char4 __ovld __cnfn convert_char4_rte(ushort4);\n" |
30502 | "char4 __ovld __cnfn convert_char4_sat_rte(ushort4);\n" |
30503 | "char4 __ovld __cnfn convert_char4_rtz(ushort4);\n" |
30504 | "char4 __ovld __cnfn convert_char4_sat_rtz(ushort4);\n" |
30505 | "char4 __ovld __cnfn convert_char4_rtp(ushort4);\n" |
30506 | "char4 __ovld __cnfn convert_char4_sat_rtp(ushort4);\n" |
30507 | "char4 __ovld __cnfn convert_char4_rtn(ushort4);\n" |
30508 | "char4 __ovld __cnfn convert_char4_sat_rtn(ushort4);\n" |
30509 | "char4 __ovld __cnfn convert_char4(ushort4);\n" |
30510 | "char4 __ovld __cnfn convert_char4_sat(ushort4);\n" |
30511 | "char4 __ovld __cnfn convert_char4_rte(int4);\n" |
30512 | "char4 __ovld __cnfn convert_char4_sat_rte(int4);\n" |
30513 | "char4 __ovld __cnfn convert_char4_rtz(int4);\n" |
30514 | "char4 __ovld __cnfn convert_char4_sat_rtz(int4);\n" |
30515 | "char4 __ovld __cnfn convert_char4_rtp(int4);\n" |
30516 | "char4 __ovld __cnfn convert_char4_sat_rtp(int4);\n" |
30517 | "char4 __ovld __cnfn convert_char4_rtn(int4);\n" |
30518 | "char4 __ovld __cnfn convert_char4_sat_rtn(int4);\n" |
30519 | "char4 __ovld __cnfn convert_char4(int4);\n" |
30520 | "char4 __ovld __cnfn convert_char4_sat(int4);\n" |
30521 | "char4 __ovld __cnfn convert_char4_rte(uint4);\n" |
30522 | "char4 __ovld __cnfn convert_char4_sat_rte(uint4);\n" |
30523 | "char4 __ovld __cnfn convert_char4_rtz(uint4);\n" |
30524 | "char4 __ovld __cnfn convert_char4_sat_rtz(uint4);\n" |
30525 | "char4 __ovld __cnfn convert_char4_rtp(uint4);\n" |
30526 | "char4 __ovld __cnfn convert_char4_sat_rtp(uint4);\n" |
30527 | "char4 __ovld __cnfn convert_char4_rtn(uint4);\n" |
30528 | "char4 __ovld __cnfn convert_char4_sat_rtn(uint4);\n" |
30529 | "char4 __ovld __cnfn convert_char4(uint4);\n" |
30530 | "char4 __ovld __cnfn convert_char4_sat(uint4);\n" |
30531 | "char4 __ovld __cnfn convert_char4_rte(long4);\n" |
30532 | "char4 __ovld __cnfn convert_char4_sat_rte(long4);\n" |
30533 | "char4 __ovld __cnfn convert_char4_rtz(long4);\n" |
30534 | "char4 __ovld __cnfn convert_char4_sat_rtz(long4);\n" |
30535 | "char4 __ovld __cnfn convert_char4_rtp(long4);\n" |
30536 | "char4 __ovld __cnfn convert_char4_sat_rtp(long4);\n" |
30537 | "char4 __ovld __cnfn convert_char4_rtn(long4);\n" |
30538 | "char4 __ovld __cnfn convert_char4_sat_rtn(long4);\n" |
30539 | "char4 __ovld __cnfn convert_char4(long4);\n" |
30540 | "char4 __ovld __cnfn convert_char4_sat(long4);\n" |
30541 | "char4 __ovld __cnfn convert_char4_rte(ulong4);\n" |
30542 | "char4 __ovld __cnfn convert_char4_sat_rte(ulong4);\n" |
30543 | "char4 __ovld __cnfn convert_char4_rtz(ulong4);\n" |
30544 | "char4 __ovld __cnfn convert_char4_sat_rtz(ulong4);\n" |
30545 | "char4 __ovld __cnfn convert_char4_rtp(ulong4);\n" |
30546 | "char4 __ovld __cnfn convert_char4_sat_rtp(ulong4);\n" |
30547 | "char4 __ovld __cnfn convert_char4_rtn(ulong4);\n" |
30548 | "char4 __ovld __cnfn convert_char4_sat_rtn(ulong4);\n" |
30549 | "char4 __ovld __cnfn convert_char4(ulong4);\n" |
30550 | "char4 __ovld __cnfn convert_char4_sat(ulong4);\n" |
30551 | "char4 __ovld __cnfn convert_char4_rte(float4);\n" |
30552 | "char4 __ovld __cnfn convert_char4_sat_rte(float4);\n" |
30553 | "char4 __ovld __cnfn convert_char4_rtz(float4);\n" |
30554 | "char4 __ovld __cnfn convert_char4_sat_rtz(float4);\n" |
30555 | "char4 __ovld __cnfn convert_char4_rtp(float4);\n" |
30556 | "char4 __ovld __cnfn convert_char4_sat_rtp(float4);\n" |
30557 | "char4 __ovld __cnfn convert_char4_rtn(float4);\n" |
30558 | "char4 __ovld __cnfn convert_char4_sat_rtn(float4);\n" |
30559 | "char4 __ovld __cnfn convert_char4(float4);\n" |
30560 | "char4 __ovld __cnfn convert_char4_sat(float4);\n" |
30561 | "uchar4 __ovld __cnfn convert_uchar4_rte(char4);\n" |
30562 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(char4);\n" |
30563 | "uchar4 __ovld __cnfn convert_uchar4_rtz(char4);\n" |
30564 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(char4);\n" |
30565 | "uchar4 __ovld __cnfn convert_uchar4_rtp(char4);\n" |
30566 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(char4);\n" |
30567 | "uchar4 __ovld __cnfn convert_uchar4_rtn(char4);\n" |
30568 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(char4);\n" |
30569 | "uchar4 __ovld __cnfn convert_uchar4(char4);\n" |
30570 | "uchar4 __ovld __cnfn convert_uchar4_sat(char4);\n" |
30571 | "uchar4 __ovld __cnfn convert_uchar4_rte(uchar4);\n" |
30572 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(uchar4);\n" |
30573 | "uchar4 __ovld __cnfn convert_uchar4_rtz(uchar4);\n" |
30574 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uchar4);\n" |
30575 | "uchar4 __ovld __cnfn convert_uchar4_rtp(uchar4);\n" |
30576 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uchar4);\n" |
30577 | "uchar4 __ovld __cnfn convert_uchar4_rtn(uchar4);\n" |
30578 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uchar4);\n" |
30579 | "uchar4 __ovld __cnfn convert_uchar4(uchar4);\n" |
30580 | "uchar4 __ovld __cnfn convert_uchar4_sat(uchar4);\n" |
30581 | "uchar4 __ovld __cnfn convert_uchar4_rte(short4);\n" |
30582 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(short4);\n" |
30583 | "uchar4 __ovld __cnfn convert_uchar4_rtz(short4);\n" |
30584 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(short4);\n" |
30585 | "uchar4 __ovld __cnfn convert_uchar4_rtp(short4);\n" |
30586 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(short4);\n" |
30587 | "uchar4 __ovld __cnfn convert_uchar4_rtn(short4);\n" |
30588 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(short4);\n" |
30589 | "uchar4 __ovld __cnfn convert_uchar4(short4);\n" |
30590 | "uchar4 __ovld __cnfn convert_uchar4_sat(short4);\n" |
30591 | "uchar4 __ovld __cnfn convert_uchar4_rte(ushort4);\n" |
30592 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(ushort4);\n" |
30593 | "uchar4 __ovld __cnfn convert_uchar4_rtz(ushort4);\n" |
30594 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ushort4);\n" |
30595 | "uchar4 __ovld __cnfn convert_uchar4_rtp(ushort4);\n" |
30596 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ushort4);\n" |
30597 | "uchar4 __ovld __cnfn convert_uchar4_rtn(ushort4);\n" |
30598 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ushort4);\n" |
30599 | "uchar4 __ovld __cnfn convert_uchar4(ushort4);\n" |
30600 | "uchar4 __ovld __cnfn convert_uchar4_sat(ushort4);\n" |
30601 | "uchar4 __ovld __cnfn convert_uchar4_rte(int4);\n" |
30602 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(int4);\n" |
30603 | "uchar4 __ovld __cnfn convert_uchar4_rtz(int4);\n" |
30604 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(int4);\n" |
30605 | "uchar4 __ovld __cnfn convert_uchar4_rtp(int4);\n" |
30606 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(int4);\n" |
30607 | "uchar4 __ovld __cnfn convert_uchar4_rtn(int4);\n" |
30608 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(int4);\n" |
30609 | "uchar4 __ovld __cnfn convert_uchar4(int4);\n" |
30610 | "uchar4 __ovld __cnfn convert_uchar4_sat(int4);\n" |
30611 | "uchar4 __ovld __cnfn convert_uchar4_rte(uint4);\n" |
30612 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(uint4);\n" |
30613 | "uchar4 __ovld __cnfn convert_uchar4_rtz(uint4);\n" |
30614 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uint4);\n" |
30615 | "uchar4 __ovld __cnfn convert_uchar4_rtp(uint4);\n" |
30616 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uint4);\n" |
30617 | "uchar4 __ovld __cnfn convert_uchar4_rtn(uint4);\n" |
30618 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uint4);\n" |
30619 | "uchar4 __ovld __cnfn convert_uchar4(uint4);\n" |
30620 | "uchar4 __ovld __cnfn convert_uchar4_sat(uint4);\n" |
30621 | "uchar4 __ovld __cnfn convert_uchar4_rte(long4);\n" |
30622 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(long4);\n" |
30623 | "uchar4 __ovld __cnfn convert_uchar4_rtz(long4);\n" |
30624 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(long4);\n" |
30625 | "uchar4 __ovld __cnfn convert_uchar4_rtp(long4);\n" |
30626 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(long4);\n" |
30627 | "uchar4 __ovld __cnfn convert_uchar4_rtn(long4);\n" |
30628 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(long4);\n" |
30629 | "uchar4 __ovld __cnfn convert_uchar4(long4);\n" |
30630 | "uchar4 __ovld __cnfn convert_uchar4_sat(long4);\n" |
30631 | "uchar4 __ovld __cnfn convert_uchar4_rte(ulong4);\n" |
30632 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(ulong4);\n" |
30633 | "uchar4 __ovld __cnfn convert_uchar4_rtz(ulong4);\n" |
30634 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ulong4);\n" |
30635 | "uchar4 __ovld __cnfn convert_uchar4_rtp(ulong4);\n" |
30636 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ulong4);\n" |
30637 | "uchar4 __ovld __cnfn convert_uchar4_rtn(ulong4);\n" |
30638 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ulong4);\n" |
30639 | "uchar4 __ovld __cnfn convert_uchar4(ulong4);\n" |
30640 | "uchar4 __ovld __cnfn convert_uchar4_sat(ulong4);\n" |
30641 | "uchar4 __ovld __cnfn convert_uchar4_rte(float4);\n" |
30642 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(float4);\n" |
30643 | "uchar4 __ovld __cnfn convert_uchar4_rtz(float4);\n" |
30644 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(float4);\n" |
30645 | "uchar4 __ovld __cnfn convert_uchar4_rtp(float4);\n" |
30646 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(float4);\n" |
30647 | "uchar4 __ovld __cnfn convert_uchar4_rtn(float4);\n" |
30648 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(float4);\n" |
30649 | "uchar4 __ovld __cnfn convert_uchar4(float4);\n" |
30650 | "uchar4 __ovld __cnfn convert_uchar4_sat(float4);\n" |
30651 | "short4 __ovld __cnfn convert_short4_rte(char4);\n" |
30652 | "short4 __ovld __cnfn convert_short4_sat_rte(char4);\n" |
30653 | "short4 __ovld __cnfn convert_short4_rtz(char4);\n" |
30654 | "short4 __ovld __cnfn convert_short4_sat_rtz(char4);\n" |
30655 | "short4 __ovld __cnfn convert_short4_rtp(char4);\n" |
30656 | "short4 __ovld __cnfn convert_short4_sat_rtp(char4);\n" |
30657 | "short4 __ovld __cnfn convert_short4_rtn(char4);\n" |
30658 | "short4 __ovld __cnfn convert_short4_sat_rtn(char4);\n" |
30659 | "short4 __ovld __cnfn convert_short4(char4);\n" |
30660 | "short4 __ovld __cnfn convert_short4_sat(char4);\n" |
30661 | "short4 __ovld __cnfn convert_short4_rte(uchar4);\n" |
30662 | "short4 __ovld __cnfn convert_short4_sat_rte(uchar4);\n" |
30663 | "short4 __ovld __cnfn convert_short4_rtz(uchar4);\n" |
30664 | "short4 __ovld __cnfn convert_short4_sat_rtz(uchar4);\n" |
30665 | "short4 __ovld __cnfn convert_short4_rtp(uchar4);\n" |
30666 | "short4 __ovld __cnfn convert_short4_sat_rtp(uchar4);\n" |
30667 | "short4 __ovld __cnfn convert_short4_rtn(uchar4);\n" |
30668 | "short4 __ovld __cnfn convert_short4_sat_rtn(uchar4);\n" |
30669 | "short4 __ovld __cnfn convert_short4(uchar4);\n" |
30670 | "short4 __ovld __cnfn convert_short4_sat(uchar4);\n" |
30671 | "short4 __ovld __cnfn convert_short4_rte(short4);\n" |
30672 | "short4 __ovld __cnfn convert_short4_sat_rte(short4);\n" |
30673 | "short4 __ovld __cnfn convert_short4_rtz(short4);\n" |
30674 | "short4 __ovld __cnfn convert_short4_sat_rtz(short4);\n" |
30675 | "short4 __ovld __cnfn convert_short4_rtp(short4);\n" |
30676 | "short4 __ovld __cnfn convert_short4_sat_rtp(short4);\n" |
30677 | "short4 __ovld __cnfn convert_short4_rtn(short4);\n" |
30678 | "short4 __ovld __cnfn convert_short4_sat_rtn(short4);\n" |
30679 | "short4 __ovld __cnfn convert_short4(short4);\n" |
30680 | "short4 __ovld __cnfn convert_short4_sat(short4);\n" |
30681 | "short4 __ovld __cnfn convert_short4_rte(ushort4);\n" |
30682 | "short4 __ovld __cnfn convert_short4_sat_rte(ushort4);\n" |
30683 | "short4 __ovld __cnfn convert_short4_rtz(ushort4);\n" |
30684 | "short4 __ovld __cnfn convert_short4_sat_rtz(ushort4);\n" |
30685 | "short4 __ovld __cnfn convert_short4_rtp(ushort4);\n" |
30686 | "short4 __ovld __cnfn convert_short4_sat_rtp(ushort4);\n" |
30687 | "short4 __ovld __cnfn convert_short4_rtn(ushort4);\n" |
30688 | "short4 __ovld __cnfn convert_short4_sat_rtn(ushort4);\n" |
30689 | "short4 __ovld __cnfn convert_short4(ushort4);\n" |
30690 | "short4 __ovld __cnfn convert_short4_sat(ushort4);\n" |
30691 | "short4 __ovld __cnfn convert_short4_rte(int4);\n" |
30692 | "short4 __ovld __cnfn convert_short4_sat_rte(int4);\n" |
30693 | "short4 __ovld __cnfn convert_short4_rtz(int4);\n" |
30694 | "short4 __ovld __cnfn convert_short4_sat_rtz(int4);\n" |
30695 | "short4 __ovld __cnfn convert_short4_rtp(int4);\n" |
30696 | "short4 __ovld __cnfn convert_short4_sat_rtp(int4);\n" |
30697 | "short4 __ovld __cnfn convert_short4_rtn(int4);\n" |
30698 | "short4 __ovld __cnfn convert_short4_sat_rtn(int4);\n" |
30699 | "short4 __ovld __cnfn convert_short4(int4);\n" |
30700 | "short4 __ovld __cnfn convert_short4_sat(int4);\n" |
30701 | "short4 __ovld __cnfn convert_short4_rte(uint4);\n" |
30702 | "short4 __ovld __cnfn convert_short4_sat_rte(uint4);\n" |
30703 | "short4 __ovld __cnfn convert_short4_rtz(uint4);\n" |
30704 | "short4 __ovld __cnfn convert_short4_sat_rtz(uint4);\n" |
30705 | "short4 __ovld __cnfn convert_short4_rtp(uint4);\n" |
30706 | "short4 __ovld __cnfn convert_short4_sat_rtp(uint4);\n" |
30707 | "short4 __ovld __cnfn convert_short4_rtn(uint4);\n" |
30708 | "short4 __ovld __cnfn convert_short4_sat_rtn(uint4);\n" |
30709 | "short4 __ovld __cnfn convert_short4(uint4);\n" |
30710 | "short4 __ovld __cnfn convert_short4_sat(uint4);\n" |
30711 | "short4 __ovld __cnfn convert_short4_rte(long4);\n" |
30712 | "short4 __ovld __cnfn convert_short4_sat_rte(long4);\n" |
30713 | "short4 __ovld __cnfn convert_short4_rtz(long4);\n" |
30714 | "short4 __ovld __cnfn convert_short4_sat_rtz(long4);\n" |
30715 | "short4 __ovld __cnfn convert_short4_rtp(long4);\n" |
30716 | "short4 __ovld __cnfn convert_short4_sat_rtp(long4);\n" |
30717 | "short4 __ovld __cnfn convert_short4_rtn(long4);\n" |
30718 | "short4 __ovld __cnfn convert_short4_sat_rtn(long4);\n" |
30719 | "short4 __ovld __cnfn convert_short4(long4);\n" |
30720 | "short4 __ovld __cnfn convert_short4_sat(long4);\n" |
30721 | "short4 __ovld __cnfn convert_short4_rte(ulong4);\n" |
30722 | "short4 __ovld __cnfn convert_short4_sat_rte(ulong4);\n" |
30723 | "short4 __ovld __cnfn convert_short4_rtz(ulong4);\n" |
30724 | "short4 __ovld __cnfn convert_short4_sat_rtz(ulong4);\n" |
30725 | "short4 __ovld __cnfn convert_short4_rtp(ulong4);\n" |
30726 | "short4 __ovld __cnfn convert_short4_sat_rtp(ulong4);\n" |
30727 | "short4 __ovld __cnfn convert_short4_rtn(ulong4);\n" |
30728 | "short4 __ovld __cnfn convert_short4_sat_rtn(ulong4);\n" |
30729 | "short4 __ovld __cnfn convert_short4(ulong4);\n" |
30730 | "short4 __ovld __cnfn convert_short4_sat(ulong4);\n" |
30731 | "short4 __ovld __cnfn convert_short4_rte(float4);\n" |
30732 | "short4 __ovld __cnfn convert_short4_sat_rte(float4);\n" |
30733 | "short4 __ovld __cnfn convert_short4_rtz(float4);\n" |
30734 | "short4 __ovld __cnfn convert_short4_sat_rtz(float4);\n" |
30735 | "short4 __ovld __cnfn convert_short4_rtp(float4);\n" |
30736 | "short4 __ovld __cnfn convert_short4_sat_rtp(float4);\n" |
30737 | "short4 __ovld __cnfn convert_short4_rtn(float4);\n" |
30738 | "short4 __ovld __cnfn convert_short4_sat_rtn(float4);\n" |
30739 | "short4 __ovld __cnfn convert_short4(float4);\n" |
30740 | "short4 __ovld __cnfn convert_short4_sat(float4);\n" |
30741 | "ushort4 __ovld __cnfn convert_ushort4_rte(char4);\n" |
30742 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(char4);\n" |
30743 | "ushort4 __ovld __cnfn convert_ushort4_rtz(char4);\n" |
30744 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(char4);\n" |
30745 | "ushort4 __ovld __cnfn convert_ushort4_rtp(char4);\n" |
30746 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(char4);\n" |
30747 | "ushort4 __ovld __cnfn convert_ushort4_rtn(char4);\n" |
30748 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(char4);\n" |
30749 | "ushort4 __ovld __cnfn convert_ushort4(char4);\n" |
30750 | "ushort4 __ovld __cnfn convert_ushort4_sat(char4);\n" |
30751 | "ushort4 __ovld __cnfn convert_ushort4_rte(uchar4);\n" |
30752 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(uchar4);\n" |
30753 | "ushort4 __ovld __cnfn convert_ushort4_rtz(uchar4);\n" |
30754 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uchar4);\n" |
30755 | "ushort4 __ovld __cnfn convert_ushort4_rtp(uchar4);\n" |
30756 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uchar4);\n" |
30757 | "ushort4 __ovld __cnfn convert_ushort4_rtn(uchar4);\n" |
30758 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uchar4);\n" |
30759 | "ushort4 __ovld __cnfn convert_ushort4(uchar4);\n" |
30760 | "ushort4 __ovld __cnfn convert_ushort4_sat(uchar4);\n" |
30761 | "ushort4 __ovld __cnfn convert_ushort4_rte(short4);\n" |
30762 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(short4);\n" |
30763 | "ushort4 __ovld __cnfn convert_ushort4_rtz(short4);\n" |
30764 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(short4);\n" |
30765 | "ushort4 __ovld __cnfn convert_ushort4_rtp(short4);\n" |
30766 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(short4);\n" |
30767 | "ushort4 __ovld __cnfn convert_ushort4_rtn(short4);\n" |
30768 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(short4);\n" |
30769 | "ushort4 __ovld __cnfn convert_ushort4(short4);\n" |
30770 | "ushort4 __ovld __cnfn convert_ushort4_sat(short4);\n" |
30771 | "ushort4 __ovld __cnfn convert_ushort4_rte(ushort4);\n" |
30772 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(ushort4);\n" |
30773 | "ushort4 __ovld __cnfn convert_ushort4_rtz(ushort4);\n" |
30774 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ushort4);\n" |
30775 | "ushort4 __ovld __cnfn convert_ushort4_rtp(ushort4);\n" |
30776 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ushort4);\n" |
30777 | "ushort4 __ovld __cnfn convert_ushort4_rtn(ushort4);\n" |
30778 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ushort4);\n" |
30779 | "ushort4 __ovld __cnfn convert_ushort4(ushort4);\n" |
30780 | "ushort4 __ovld __cnfn convert_ushort4_sat(ushort4);\n" |
30781 | "ushort4 __ovld __cnfn convert_ushort4_rte(int4);\n" |
30782 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(int4);\n" |
30783 | "ushort4 __ovld __cnfn convert_ushort4_rtz(int4);\n" |
30784 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(int4);\n" |
30785 | "ushort4 __ovld __cnfn convert_ushort4_rtp(int4);\n" |
30786 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(int4);\n" |
30787 | "ushort4 __ovld __cnfn convert_ushort4_rtn(int4);\n" |
30788 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(int4);\n" |
30789 | "ushort4 __ovld __cnfn convert_ushort4(int4);\n" |
30790 | "ushort4 __ovld __cnfn convert_ushort4_sat(int4);\n" |
30791 | "ushort4 __ovld __cnfn convert_ushort4_rte(uint4);\n" |
30792 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(uint4);\n" |
30793 | "ushort4 __ovld __cnfn convert_ushort4_rtz(uint4);\n" |
30794 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uint4);\n" |
30795 | "ushort4 __ovld __cnfn convert_ushort4_rtp(uint4);\n" |
30796 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uint4);\n" |
30797 | "ushort4 __ovld __cnfn convert_ushort4_rtn(uint4);\n" |
30798 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uint4);\n" |
30799 | "ushort4 __ovld __cnfn convert_ushort4(uint4);\n" |
30800 | "ushort4 __ovld __cnfn convert_ushort4_sat(uint4);\n" |
30801 | "ushort4 __ovld __cnfn convert_ushort4_rte(long4);\n" |
30802 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(long4);\n" |
30803 | "ushort4 __ovld __cnfn convert_ushort4_rtz(long4);\n" |
30804 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(long4);\n" |
30805 | "ushort4 __ovld __cnfn convert_ushort4_rtp(long4);\n" |
30806 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(long4);\n" |
30807 | "ushort4 __ovld __cnfn convert_ushort4_rtn(long4);\n" |
30808 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(long4);\n" |
30809 | "ushort4 __ovld __cnfn convert_ushort4(long4);\n" |
30810 | "ushort4 __ovld __cnfn convert_ushort4_sat(long4);\n" |
30811 | "ushort4 __ovld __cnfn convert_ushort4_rte(ulong4);\n" |
30812 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(ulong4);\n" |
30813 | "ushort4 __ovld __cnfn convert_ushort4_rtz(ulong4);\n" |
30814 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ulong4);\n" |
30815 | "ushort4 __ovld __cnfn convert_ushort4_rtp(ulong4);\n" |
30816 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ulong4);\n" |
30817 | "ushort4 __ovld __cnfn convert_ushort4_rtn(ulong4);\n" |
30818 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ulong4);\n" |
30819 | "ushort4 __ovld __cnfn convert_ushort4(ulong4);\n" |
30820 | "ushort4 __ovld __cnfn convert_ushort4_sat(ulong4);\n" |
30821 | "ushort4 __ovld __cnfn convert_ushort4_rte(float4);\n" |
30822 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(float4);\n" |
30823 | "ushort4 __ovld __cnfn convert_ushort4_rtz(float4);\n" |
30824 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(float4);\n" |
30825 | "ushort4 __ovld __cnfn convert_ushort4_rtp(float4);\n" |
30826 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(float4);\n" |
30827 | "ushort4 __ovld __cnfn convert_ushort4_rtn(float4);\n" |
30828 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(float4);\n" |
30829 | "ushort4 __ovld __cnfn convert_ushort4(float4);\n" |
30830 | "ushort4 __ovld __cnfn convert_ushort4_sat(float4);\n" |
30831 | "int4 __ovld __cnfn convert_int4_rte(char4);\n" |
30832 | "int4 __ovld __cnfn convert_int4_sat_rte(char4);\n" |
30833 | "int4 __ovld __cnfn convert_int4_rtz(char4);\n" |
30834 | "int4 __ovld __cnfn convert_int4_sat_rtz(char4);\n" |
30835 | "int4 __ovld __cnfn convert_int4_rtp(char4);\n" |
30836 | "int4 __ovld __cnfn convert_int4_sat_rtp(char4);\n" |
30837 | "int4 __ovld __cnfn convert_int4_rtn(char4);\n" |
30838 | "int4 __ovld __cnfn convert_int4_sat_rtn(char4);\n" |
30839 | "int4 __ovld __cnfn convert_int4(char4);\n" |
30840 | "int4 __ovld __cnfn convert_int4_sat(char4);\n" |
30841 | "int4 __ovld __cnfn convert_int4_rte(uchar4);\n" |
30842 | "int4 __ovld __cnfn convert_int4_sat_rte(uchar4);\n" |
30843 | "int4 __ovld __cnfn convert_int4_rtz(uchar4);\n" |
30844 | "int4 __ovld __cnfn convert_int4_sat_rtz(uchar4);\n" |
30845 | "int4 __ovld __cnfn convert_int4_rtp(uchar4);\n" |
30846 | "int4 __ovld __cnfn convert_int4_sat_rtp(uchar4);\n" |
30847 | "int4 __ovld __cnfn convert_int4_rtn(uchar4);\n" |
30848 | "int4 __ovld __cnfn convert_int4_sat_rtn(uchar4);\n" |
30849 | "int4 __ovld __cnfn convert_int4(uchar4);\n" |
30850 | "int4 __ovld __cnfn convert_int4_sat(uchar4);\n" |
30851 | "int4 __ovld __cnfn convert_int4_rte(short4);\n" |
30852 | "int4 __ovld __cnfn convert_int4_sat_rte(short4);\n" |
30853 | "int4 __ovld __cnfn convert_int4_rtz(short4);\n" |
30854 | "int4 __ovld __cnfn convert_int4_sat_rtz(short4);\n" |
30855 | "int4 __ovld __cnfn convert_int4_rtp(short4);\n" |
30856 | "int4 __ovld __cnfn convert_int4_sat_rtp(short4);\n" |
30857 | "int4 __ovld __cnfn convert_int4_rtn(short4);\n" |
30858 | "int4 __ovld __cnfn convert_int4_sat_rtn(short4);\n" |
30859 | "int4 __ovld __cnfn convert_int4(short4);\n" |
30860 | "int4 __ovld __cnfn convert_int4_sat(short4);\n" |
30861 | "int4 __ovld __cnfn convert_int4_rte(ushort4);\n" |
30862 | "int4 __ovld __cnfn convert_int4_sat_rte(ushort4);\n" |
30863 | "int4 __ovld __cnfn convert_int4_rtz(ushort4);\n" |
30864 | "int4 __ovld __cnfn convert_int4_sat_rtz(ushort4);\n" |
30865 | "int4 __ovld __cnfn convert_int4_rtp(ushort4);\n" |
30866 | "int4 __ovld __cnfn convert_int4_sat_rtp(ushort4);\n" |
30867 | "int4 __ovld __cnfn convert_int4_rtn(ushort4);\n" |
30868 | "int4 __ovld __cnfn convert_int4_sat_rtn(ushort4);\n" |
30869 | "int4 __ovld __cnfn convert_int4(ushort4);\n" |
30870 | "int4 __ovld __cnfn convert_int4_sat(ushort4);\n" |
30871 | "int4 __ovld __cnfn convert_int4_rte(int4);\n" |
30872 | "int4 __ovld __cnfn convert_int4_sat_rte(int4);\n" |
30873 | "int4 __ovld __cnfn convert_int4_rtz(int4);\n" |
30874 | "int4 __ovld __cnfn convert_int4_sat_rtz(int4);\n" |
30875 | "int4 __ovld __cnfn convert_int4_rtp(int4);\n" |
30876 | "int4 __ovld __cnfn convert_int4_sat_rtp(int4);\n" |
30877 | "int4 __ovld __cnfn convert_int4_rtn(int4);\n" |
30878 | "int4 __ovld __cnfn convert_int4_sat_rtn(int4);\n" |
30879 | "int4 __ovld __cnfn convert_int4(int4);\n" |
30880 | "int4 __ovld __cnfn convert_int4_sat(int4);\n" |
30881 | "int4 __ovld __cnfn convert_int4_rte(uint4);\n" |
30882 | "int4 __ovld __cnfn convert_int4_sat_rte(uint4);\n" |
30883 | "int4 __ovld __cnfn convert_int4_rtz(uint4);\n" |
30884 | "int4 __ovld __cnfn convert_int4_sat_rtz(uint4);\n" |
30885 | "int4 __ovld __cnfn convert_int4_rtp(uint4);\n" |
30886 | "int4 __ovld __cnfn convert_int4_sat_rtp(uint4);\n" |
30887 | "int4 __ovld __cnfn convert_int4_rtn(uint4);\n" |
30888 | "int4 __ovld __cnfn convert_int4_sat_rtn(uint4);\n" |
30889 | "int4 __ovld __cnfn convert_int4(uint4);\n" |
30890 | "int4 __ovld __cnfn convert_int4_sat(uint4);\n" |
30891 | "int4 __ovld __cnfn convert_int4_rte(long4);\n" |
30892 | "int4 __ovld __cnfn convert_int4_sat_rte(long4);\n" |
30893 | "int4 __ovld __cnfn convert_int4_rtz(long4);\n" |
30894 | "int4 __ovld __cnfn convert_int4_sat_rtz(long4);\n" |
30895 | "int4 __ovld __cnfn convert_int4_rtp(long4);\n" |
30896 | "int4 __ovld __cnfn convert_int4_sat_rtp(long4);\n" |
30897 | "int4 __ovld __cnfn convert_int4_rtn(long4);\n" |
30898 | "int4 __ovld __cnfn convert_int4_sat_rtn(long4);\n" |
30899 | "int4 __ovld __cnfn convert_int4(long4);\n" |
30900 | "int4 __ovld __cnfn convert_int4_sat(long4);\n" |
30901 | "int4 __ovld __cnfn convert_int4_rte(ulong4);\n" |
30902 | "int4 __ovld __cnfn convert_int4_sat_rte(ulong4);\n" |
30903 | "int4 __ovld __cnfn convert_int4_rtz(ulong4);\n" |
30904 | "int4 __ovld __cnfn convert_int4_sat_rtz(ulong4);\n" |
30905 | "int4 __ovld __cnfn convert_int4_rtp(ulong4);\n" |
30906 | "int4 __ovld __cnfn convert_int4_sat_rtp(ulong4);\n" |
30907 | "int4 __ovld __cnfn convert_int4_rtn(ulong4);\n" |
30908 | "int4 __ovld __cnfn convert_int4_sat_rtn(ulong4);\n" |
30909 | "int4 __ovld __cnfn convert_int4(ulong4);\n" |
30910 | "int4 __ovld __cnfn convert_int4_sat(ulong4);\n" |
30911 | "int4 __ovld __cnfn convert_int4_rte(float4);\n" |
30912 | "int4 __ovld __cnfn convert_int4_sat_rte(float4);\n" |
30913 | "int4 __ovld __cnfn convert_int4_rtz(float4);\n" |
30914 | "int4 __ovld __cnfn convert_int4_sat_rtz(float4);\n" |
30915 | "int4 __ovld __cnfn convert_int4_rtp(float4);\n" |
30916 | "int4 __ovld __cnfn convert_int4_sat_rtp(float4);\n" |
30917 | "int4 __ovld __cnfn convert_int4_rtn(float4);\n" |
30918 | "int4 __ovld __cnfn convert_int4_sat_rtn(float4);\n" |
30919 | "int4 __ovld __cnfn convert_int4(float4);\n" |
30920 | "int4 __ovld __cnfn convert_int4_sat(float4);\n" |
30921 | "uint4 __ovld __cnfn convert_uint4_rte(char4);\n" |
30922 | "uint4 __ovld __cnfn convert_uint4_sat_rte(char4);\n" |
30923 | "uint4 __ovld __cnfn convert_uint4_rtz(char4);\n" |
30924 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(char4);\n" |
30925 | "uint4 __ovld __cnfn convert_uint4_rtp(char4);\n" |
30926 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(char4);\n" |
30927 | "uint4 __ovld __cnfn convert_uint4_rtn(char4);\n" |
30928 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(char4);\n" |
30929 | "uint4 __ovld __cnfn convert_uint4(char4);\n" |
30930 | "uint4 __ovld __cnfn convert_uint4_sat(char4);\n" |
30931 | "uint4 __ovld __cnfn convert_uint4_rte(uchar4);\n" |
30932 | "uint4 __ovld __cnfn convert_uint4_sat_rte(uchar4);\n" |
30933 | "uint4 __ovld __cnfn convert_uint4_rtz(uchar4);\n" |
30934 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(uchar4);\n" |
30935 | "uint4 __ovld __cnfn convert_uint4_rtp(uchar4);\n" |
30936 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(uchar4);\n" |
30937 | "uint4 __ovld __cnfn convert_uint4_rtn(uchar4);\n" |
30938 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(uchar4);\n" |
30939 | "uint4 __ovld __cnfn convert_uint4(uchar4);\n" |
30940 | "uint4 __ovld __cnfn convert_uint4_sat(uchar4);\n" |
30941 | "uint4 __ovld __cnfn convert_uint4_rte(short4);\n" |
30942 | "uint4 __ovld __cnfn convert_uint4_sat_rte(short4);\n" |
30943 | "uint4 __ovld __cnfn convert_uint4_rtz(short4);\n" |
30944 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(short4);\n" |
30945 | "uint4 __ovld __cnfn convert_uint4_rtp(short4);\n" |
30946 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(short4);\n" |
30947 | "uint4 __ovld __cnfn convert_uint4_rtn(short4);\n" |
30948 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(short4);\n" |
30949 | "uint4 __ovld __cnfn convert_uint4(short4);\n" |
30950 | "uint4 __ovld __cnfn convert_uint4_sat(short4);\n" |
30951 | "uint4 __ovld __cnfn convert_uint4_rte(ushort4);\n" |
30952 | "uint4 __ovld __cnfn convert_uint4_sat_rte(ushort4);\n" |
30953 | "uint4 __ovld __cnfn convert_uint4_rtz(ushort4);\n" |
30954 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(ushort4);\n" |
30955 | "uint4 __ovld __cnfn convert_uint4_rtp(ushort4);\n" |
30956 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(ushort4);\n" |
30957 | "uint4 __ovld __cnfn convert_uint4_rtn(ushort4);\n" |
30958 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(ushort4);\n" |
30959 | "uint4 __ovld __cnfn convert_uint4(ushort4);\n" |
30960 | "uint4 __ovld __cnfn convert_uint4_sat(ushort4);\n" |
30961 | "uint4 __ovld __cnfn convert_uint4_rte(int4);\n" |
30962 | "uint4 __ovld __cnfn convert_uint4_sat_rte(int4);\n" |
30963 | "uint4 __ovld __cnfn convert_uint4_rtz(int4);\n" |
30964 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(int4);\n" |
30965 | "uint4 __ovld __cnfn convert_uint4_rtp(int4);\n" |
30966 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(int4);\n" |
30967 | "uint4 __ovld __cnfn convert_uint4_rtn(int4);\n" |
30968 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(int4);\n" |
30969 | "uint4 __ovld __cnfn convert_uint4(int4);\n" |
30970 | "uint4 __ovld __cnfn convert_uint4_sat(int4);\n" |
30971 | "uint4 __ovld __cnfn convert_uint4_rte(uint4);\n" |
30972 | "uint4 __ovld __cnfn convert_uint4_sat_rte(uint4);\n" |
30973 | "uint4 __ovld __cnfn convert_uint4_rtz(uint4);\n" |
30974 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(uint4);\n" |
30975 | "uint4 __ovld __cnfn convert_uint4_rtp(uint4);\n" |
30976 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(uint4);\n" |
30977 | "uint4 __ovld __cnfn convert_uint4_rtn(uint4);\n" |
30978 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(uint4);\n" |
30979 | "uint4 __ovld __cnfn convert_uint4(uint4);\n" |
30980 | "uint4 __ovld __cnfn convert_uint4_sat(uint4);\n" |
30981 | "uint4 __ovld __cnfn convert_uint4_rte(long4);\n" |
30982 | "uint4 __ovld __cnfn convert_uint4_sat_rte(long4);\n" |
30983 | "uint4 __ovld __cnfn convert_uint4_rtz(long4);\n" |
30984 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(long4);\n" |
30985 | "uint4 __ovld __cnfn convert_uint4_rtp(long4);\n" |
30986 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(long4);\n" |
30987 | "uint4 __ovld __cnfn convert_uint4_rtn(long4);\n" |
30988 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(long4);\n" |
30989 | "uint4 __ovld __cnfn convert_uint4(long4);\n" |
30990 | "uint4 __ovld __cnfn convert_uint4_sat(long4);\n" |
30991 | "uint4 __ovld __cnfn convert_uint4_rte(ulong4);\n" |
30992 | "uint4 __ovld __cnfn convert_uint4_sat_rte(ulong4);\n" |
30993 | "uint4 __ovld __cnfn convert_uint4_rtz(ulong4);\n" |
30994 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(ulong4);\n" |
30995 | "uint4 __ovld __cnfn convert_uint4_rtp(ulong4);\n" |
30996 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(ulong4);\n" |
30997 | "uint4 __ovld __cnfn convert_uint4_rtn(ulong4);\n" |
30998 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(ulong4);\n" |
30999 | "uint4 __ovld __cnfn convert_uint4(ulong4);\n" |
31000 | "uint4 __ovld __cnfn convert_uint4_sat(ulong4);\n" |
31001 | "uint4 __ovld __cnfn convert_uint4_rte(float4);\n" |
31002 | "uint4 __ovld __cnfn convert_uint4_sat_rte(float4);\n" |
31003 | "uint4 __ovld __cnfn convert_uint4_rtz(float4);\n" |
31004 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(float4);\n" |
31005 | "uint4 __ovld __cnfn convert_uint4_rtp(float4);\n" |
31006 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(float4);\n" |
31007 | "uint4 __ovld __cnfn convert_uint4_rtn(float4);\n" |
31008 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(float4);\n" |
31009 | "uint4 __ovld __cnfn convert_uint4(float4);\n" |
31010 | "uint4 __ovld __cnfn convert_uint4_sat(float4);\n" |
31011 | "long4 __ovld __cnfn convert_long4_rte(char4);\n" |
31012 | "long4 __ovld __cnfn convert_long4_sat_rte(char4);\n" |
31013 | "long4 __ovld __cnfn convert_long4_rtz(char4);\n" |
31014 | "long4 __ovld __cnfn convert_long4_sat_rtz(char4);\n" |
31015 | "long4 __ovld __cnfn convert_long4_rtp(char4);\n" |
31016 | "long4 __ovld __cnfn convert_long4_sat_rtp(char4);\n" |
31017 | "long4 __ovld __cnfn convert_long4_rtn(char4);\n" |
31018 | "long4 __ovld __cnfn convert_long4_sat_rtn(char4);\n" |
31019 | "long4 __ovld __cnfn convert_long4(char4);\n" |
31020 | "long4 __ovld __cnfn convert_long4_sat(char4);\n" |
31021 | "long4 __ovld __cnfn convert_long4_rte(uchar4);\n" |
31022 | "long4 __ovld __cnfn convert_long4_sat_rte(uchar4);\n" |
31023 | "long4 __ovld __cnfn convert_long4_rtz(uchar4);\n" |
31024 | "long4 __ovld __cnfn convert_long4_sat_rtz(uchar4);\n" |
31025 | "long4 __ovld __cnfn convert_long4_rtp(uchar4);\n" |
31026 | "long4 __ovld __cnfn convert_long4_sat_rtp(uchar4);\n" |
31027 | "long4 __ovld __cnfn convert_long4_rtn(uchar4);\n" |
31028 | "long4 __ovld __cnfn convert_long4_sat_rtn(uchar4);\n" |
31029 | "long4 __ovld __cnfn convert_long4(uchar4);\n" |
31030 | "long4 __ovld __cnfn convert_long4_sat(uchar4);\n" |
31031 | "long4 __ovld __cnfn convert_long4_rte(short4);\n" |
31032 | "long4 __ovld __cnfn convert_long4_sat_rte(short4);\n" |
31033 | "long4 __ovld __cnfn convert_long4_rtz(short4);\n" |
31034 | "long4 __ovld __cnfn convert_long4_sat_rtz(short4);\n" |
31035 | "long4 __ovld __cnfn convert_long4_rtp(short4);\n" |
31036 | "long4 __ovld __cnfn convert_long4_sat_rtp(short4);\n" |
31037 | "long4 __ovld __cnfn convert_long4_rtn(short4);\n" |
31038 | "long4 __ovld __cnfn convert_long4_sat_rtn(short4);\n" |
31039 | "long4 __ovld __cnfn convert_long4(short4);\n" |
31040 | "long4 __ovld __cnfn convert_long4_sat(short4);\n" |
31041 | "long4 __ovld __cnfn convert_long4_rte(ushort4);\n" |
31042 | "long4 __ovld __cnfn convert_long4_sat_rte(ushort4);\n" |
31043 | "long4 __ovld __cnfn convert_long4_rtz(ushort4);\n" |
31044 | "long4 __ovld __cnfn convert_long4_sat_rtz(ushort4);\n" |
31045 | "long4 __ovld __cnfn convert_long4_rtp(ushort4);\n" |
31046 | "long4 __ovld __cnfn convert_long4_sat_rtp(ushort4);\n" |
31047 | "long4 __ovld __cnfn convert_long4_rtn(ushort4);\n" |
31048 | "long4 __ovld __cnfn convert_long4_sat_rtn(ushort4);\n" |
31049 | "long4 __ovld __cnfn convert_long4(ushort4);\n" |
31050 | "long4 __ovld __cnfn convert_long4_sat(ushort4);\n" |
31051 | "long4 __ovld __cnfn convert_long4_rte(int4);\n" |
31052 | "long4 __ovld __cnfn convert_long4_sat_rte(int4);\n" |
31053 | "long4 __ovld __cnfn convert_long4_rtz(int4);\n" |
31054 | "long4 __ovld __cnfn convert_long4_sat_rtz(int4);\n" |
31055 | "long4 __ovld __cnfn convert_long4_rtp(int4);\n" |
31056 | "long4 __ovld __cnfn convert_long4_sat_rtp(int4);\n" |
31057 | "long4 __ovld __cnfn convert_long4_rtn(int4);\n" |
31058 | "long4 __ovld __cnfn convert_long4_sat_rtn(int4);\n" |
31059 | "long4 __ovld __cnfn convert_long4(int4);\n" |
31060 | "long4 __ovld __cnfn convert_long4_sat(int4);\n" |
31061 | "long4 __ovld __cnfn convert_long4_rte(uint4);\n" |
31062 | "long4 __ovld __cnfn convert_long4_sat_rte(uint4);\n" |
31063 | "long4 __ovld __cnfn convert_long4_rtz(uint4);\n" |
31064 | "long4 __ovld __cnfn convert_long4_sat_rtz(uint4);\n" |
31065 | "long4 __ovld __cnfn convert_long4_rtp(uint4);\n" |
31066 | "long4 __ovld __cnfn convert_long4_sat_rtp(uint4);\n" |
31067 | "long4 __ovld __cnfn convert_long4_rtn(uint4);\n" |
31068 | "long4 __ovld __cnfn convert_long4_sat_rtn(uint4);\n" |
31069 | "long4 __ovld __cnfn convert_long4(uint4);\n" |
31070 | "long4 __ovld __cnfn convert_long4_sat(uint4);\n" |
31071 | "long4 __ovld __cnfn convert_long4_rte(long4);\n" |
31072 | "long4 __ovld __cnfn convert_long4_sat_rte(long4);\n" |
31073 | "long4 __ovld __cnfn convert_long4_rtz(long4);\n" |
31074 | "long4 __ovld __cnfn convert_long4_sat_rtz(long4);\n" |
31075 | "long4 __ovld __cnfn convert_long4_rtp(long4);\n" |
31076 | "long4 __ovld __cnfn convert_long4_sat_rtp(long4);\n" |
31077 | "long4 __ovld __cnfn convert_long4_rtn(long4);\n" |
31078 | "long4 __ovld __cnfn convert_long4_sat_rtn(long4);\n" |
31079 | "long4 __ovld __cnfn convert_long4(long4);\n" |
31080 | "long4 __ovld __cnfn convert_long4_sat(long4);\n" |
31081 | "long4 __ovld __cnfn convert_long4_rte(ulong4);\n" |
31082 | "long4 __ovld __cnfn convert_long4_sat_rte(ulong4);\n" |
31083 | "long4 __ovld __cnfn convert_long4_rtz(ulong4);\n" |
31084 | "long4 __ovld __cnfn convert_long4_sat_rtz(ulong4);\n" |
31085 | "long4 __ovld __cnfn convert_long4_rtp(ulong4);\n" |
31086 | "long4 __ovld __cnfn convert_long4_sat_rtp(ulong4);\n" |
31087 | "long4 __ovld __cnfn convert_long4_rtn(ulong4);\n" |
31088 | "long4 __ovld __cnfn convert_long4_sat_rtn(ulong4);\n" |
31089 | "long4 __ovld __cnfn convert_long4(ulong4);\n" |
31090 | "long4 __ovld __cnfn convert_long4_sat(ulong4);\n" |
31091 | "long4 __ovld __cnfn convert_long4_rte(float4);\n" |
31092 | "long4 __ovld __cnfn convert_long4_sat_rte(float4);\n" |
31093 | "long4 __ovld __cnfn convert_long4_rtz(float4);\n" |
31094 | "long4 __ovld __cnfn convert_long4_sat_rtz(float4);\n" |
31095 | "long4 __ovld __cnfn convert_long4_rtp(float4);\n" |
31096 | "long4 __ovld __cnfn convert_long4_sat_rtp(float4);\n" |
31097 | "long4 __ovld __cnfn convert_long4_rtn(float4);\n" |
31098 | "long4 __ovld __cnfn convert_long4_sat_rtn(float4);\n" |
31099 | "long4 __ovld __cnfn convert_long4(float4);\n" |
31100 | "long4 __ovld __cnfn convert_long4_sat(float4);\n" |
31101 | "ulong4 __ovld __cnfn convert_ulong4_rte(char4);\n" |
31102 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(char4);\n" |
31103 | "ulong4 __ovld __cnfn convert_ulong4_rtz(char4);\n" |
31104 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(char4);\n" |
31105 | "ulong4 __ovld __cnfn convert_ulong4_rtp(char4);\n" |
31106 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(char4);\n" |
31107 | "ulong4 __ovld __cnfn convert_ulong4_rtn(char4);\n" |
31108 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(char4);\n" |
31109 | "ulong4 __ovld __cnfn convert_ulong4(char4);\n" |
31110 | "ulong4 __ovld __cnfn convert_ulong4_sat(char4);\n" |
31111 | "ulong4 __ovld __cnfn convert_ulong4_rte(uchar4);\n" |
31112 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(uchar4);\n" |
31113 | "ulong4 __ovld __cnfn convert_ulong4_rtz(uchar4);\n" |
31114 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uchar4);\n" |
31115 | "ulong4 __ovld __cnfn convert_ulong4_rtp(uchar4);\n" |
31116 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uchar4);\n" |
31117 | "ulong4 __ovld __cnfn convert_ulong4_rtn(uchar4);\n" |
31118 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uchar4);\n" |
31119 | "ulong4 __ovld __cnfn convert_ulong4(uchar4);\n" |
31120 | "ulong4 __ovld __cnfn convert_ulong4_sat(uchar4);\n" |
31121 | "ulong4 __ovld __cnfn convert_ulong4_rte(short4);\n" |
31122 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(short4);\n" |
31123 | "ulong4 __ovld __cnfn convert_ulong4_rtz(short4);\n" |
31124 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(short4);\n" |
31125 | "ulong4 __ovld __cnfn convert_ulong4_rtp(short4);\n" |
31126 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(short4);\n" |
31127 | "ulong4 __ovld __cnfn convert_ulong4_rtn(short4);\n" |
31128 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(short4);\n" |
31129 | "ulong4 __ovld __cnfn convert_ulong4(short4);\n" |
31130 | "ulong4 __ovld __cnfn convert_ulong4_sat(short4);\n" |
31131 | "ulong4 __ovld __cnfn convert_ulong4_rte(ushort4);\n" |
31132 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(ushort4);\n" |
31133 | "ulong4 __ovld __cnfn convert_ulong4_rtz(ushort4);\n" |
31134 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ushort4);\n" |
31135 | "ulong4 __ovld __cnfn convert_ulong4_rtp(ushort4);\n" |
31136 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ushort4);\n" |
31137 | "ulong4 __ovld __cnfn convert_ulong4_rtn(ushort4);\n" |
31138 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ushort4);\n" |
31139 | "ulong4 __ovld __cnfn convert_ulong4(ushort4);\n" |
31140 | "ulong4 __ovld __cnfn convert_ulong4_sat(ushort4);\n" |
31141 | "ulong4 __ovld __cnfn convert_ulong4_rte(int4);\n" |
31142 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(int4);\n" |
31143 | "ulong4 __ovld __cnfn convert_ulong4_rtz(int4);\n" |
31144 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(int4);\n" |
31145 | "ulong4 __ovld __cnfn convert_ulong4_rtp(int4);\n" |
31146 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(int4);\n" |
31147 | "ulong4 __ovld __cnfn convert_ulong4_rtn(int4);\n" |
31148 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(int4);\n" |
31149 | "ulong4 __ovld __cnfn convert_ulong4(int4);\n" |
31150 | "ulong4 __ovld __cnfn convert_ulong4_sat(int4);\n" |
31151 | "ulong4 __ovld __cnfn convert_ulong4_rte(uint4);\n" |
31152 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(uint4);\n" |
31153 | "ulong4 __ovld __cnfn convert_ulong4_rtz(uint4);\n" |
31154 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uint4);\n" |
31155 | "ulong4 __ovld __cnfn convert_ulong4_rtp(uint4);\n" |
31156 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uint4);\n" |
31157 | "ulong4 __ovld __cnfn convert_ulong4_rtn(uint4);\n" |
31158 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uint4);\n" |
31159 | "ulong4 __ovld __cnfn convert_ulong4(uint4);\n" |
31160 | "ulong4 __ovld __cnfn convert_ulong4_sat(uint4);\n" |
31161 | "ulong4 __ovld __cnfn convert_ulong4_rte(long4);\n" |
31162 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(long4);\n" |
31163 | "ulong4 __ovld __cnfn convert_ulong4_rtz(long4);\n" |
31164 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(long4);\n" |
31165 | "ulong4 __ovld __cnfn convert_ulong4_rtp(long4);\n" |
31166 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(long4);\n" |
31167 | "ulong4 __ovld __cnfn convert_ulong4_rtn(long4);\n" |
31168 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(long4);\n" |
31169 | "ulong4 __ovld __cnfn convert_ulong4(long4);\n" |
31170 | "ulong4 __ovld __cnfn convert_ulong4_sat(long4);\n" |
31171 | "ulong4 __ovld __cnfn convert_ulong4_rte(ulong4);\n" |
31172 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(ulong4);\n" |
31173 | "ulong4 __ovld __cnfn convert_ulong4_rtz(ulong4);\n" |
31174 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ulong4);\n" |
31175 | "ulong4 __ovld __cnfn convert_ulong4_rtp(ulong4);\n" |
31176 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ulong4);\n" |
31177 | "ulong4 __ovld __cnfn convert_ulong4_rtn(ulong4);\n" |
31178 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ulong4);\n" |
31179 | "ulong4 __ovld __cnfn convert_ulong4(ulong4);\n" |
31180 | "ulong4 __ovld __cnfn convert_ulong4_sat(ulong4);\n" |
31181 | "ulong4 __ovld __cnfn convert_ulong4_rte(float4);\n" |
31182 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(float4);\n" |
31183 | "ulong4 __ovld __cnfn convert_ulong4_rtz(float4);\n" |
31184 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(float4);\n" |
31185 | "ulong4 __ovld __cnfn convert_ulong4_rtp(float4);\n" |
31186 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(float4);\n" |
31187 | "ulong4 __ovld __cnfn convert_ulong4_rtn(float4);\n" |
31188 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(float4);\n" |
31189 | "ulong4 __ovld __cnfn convert_ulong4(float4);\n" |
31190 | "ulong4 __ovld __cnfn convert_ulong4_sat(float4);\n" |
31191 | "float4 __ovld __cnfn convert_float4_rte(char4);\n" |
31192 | "float4 __ovld __cnfn convert_float4_rtz(char4);\n" |
31193 | "float4 __ovld __cnfn convert_float4_rtp(char4);\n" |
31194 | "float4 __ovld __cnfn convert_float4_rtn(char4);\n" |
31195 | "float4 __ovld __cnfn convert_float4(char4);\n" |
31196 | "float4 __ovld __cnfn convert_float4_rte(uchar4);\n" |
31197 | "float4 __ovld __cnfn convert_float4_rtz(uchar4);\n" |
31198 | "float4 __ovld __cnfn convert_float4_rtp(uchar4);\n" |
31199 | "float4 __ovld __cnfn convert_float4_rtn(uchar4);\n" |
31200 | "float4 __ovld __cnfn convert_float4(uchar4);\n" |
31201 | "float4 __ovld __cnfn convert_float4_rte(short4);\n" |
31202 | "float4 __ovld __cnfn convert_float4_rtz(short4);\n" |
31203 | "float4 __ovld __cnfn convert_float4_rtp(short4);\n" |
31204 | "float4 __ovld __cnfn convert_float4_rtn(short4);\n" |
31205 | "float4 __ovld __cnfn convert_float4(short4);\n" |
31206 | "float4 __ovld __cnfn convert_float4_rte(ushort4);\n" |
31207 | "float4 __ovld __cnfn convert_float4_rtz(ushort4);\n" |
31208 | "float4 __ovld __cnfn convert_float4_rtp(ushort4);\n" |
31209 | "float4 __ovld __cnfn convert_float4_rtn(ushort4);\n" |
31210 | "float4 __ovld __cnfn convert_float4(ushort4);\n" |
31211 | "float4 __ovld __cnfn convert_float4_rte(int4);\n" |
31212 | "float4 __ovld __cnfn convert_float4_rtz(int4);\n" |
31213 | "float4 __ovld __cnfn convert_float4_rtp(int4);\n" |
31214 | "float4 __ovld __cnfn convert_float4_rtn(int4);\n" |
31215 | "float4 __ovld __cnfn convert_float4(int4);\n" |
31216 | "float4 __ovld __cnfn convert_float4_rte(uint4);\n" |
31217 | "float4 __ovld __cnfn convert_float4_rtz(uint4);\n" |
31218 | "float4 __ovld __cnfn convert_float4_rtp(uint4);\n" |
31219 | "float4 __ovld __cnfn convert_float4_rtn(uint4);\n" |
31220 | "float4 __ovld __cnfn convert_float4(uint4);\n" |
31221 | "float4 __ovld __cnfn convert_float4_rte(long4);\n" |
31222 | "float4 __ovld __cnfn convert_float4_rtz(long4);\n" |
31223 | "float4 __ovld __cnfn convert_float4_rtp(long4);\n" |
31224 | "float4 __ovld __cnfn convert_float4_rtn(long4);\n" |
31225 | "float4 __ovld __cnfn convert_float4(long4);\n" |
31226 | "float4 __ovld __cnfn convert_float4_rte(ulong4);\n" |
31227 | "float4 __ovld __cnfn convert_float4_rtz(ulong4);\n" |
31228 | "float4 __ovld __cnfn convert_float4_rtp(ulong4);\n" |
31229 | "float4 __ovld __cnfn convert_float4_rtn(ulong4);\n" |
31230 | "float4 __ovld __cnfn convert_float4(ulong4);\n" |
31231 | "float4 __ovld __cnfn convert_float4_rte(float4);\n" |
31232 | "float4 __ovld __cnfn convert_float4_rtz(float4);\n" |
31233 | "float4 __ovld __cnfn convert_float4_rtp(float4);\n" |
31234 | "float4 __ovld __cnfn convert_float4_rtn(float4);\n" |
31235 | "float4 __ovld __cnfn convert_float4(float4);\n" |
31236 | "char8 __ovld __cnfn convert_char8_rte(char8);\n" |
31237 | "char8 __ovld __cnfn convert_char8_sat_rte(char8);\n" |
31238 | "char8 __ovld __cnfn convert_char8_rtz(char8);\n" |
31239 | "char8 __ovld __cnfn convert_char8_sat_rtz(char8);\n" |
31240 | "char8 __ovld __cnfn convert_char8_rtp(char8);\n" |
31241 | "char8 __ovld __cnfn convert_char8_sat_rtp(char8);\n" |
31242 | "char8 __ovld __cnfn convert_char8_rtn(char8);\n" |
31243 | "char8 __ovld __cnfn convert_char8_sat_rtn(char8);\n" |
31244 | "char8 __ovld __cnfn convert_char8(char8);\n" |
31245 | "char8 __ovld __cnfn convert_char8_sat(char8);\n" |
31246 | "char8 __ovld __cnfn convert_char8_rte(uchar8);\n" |
31247 | "char8 __ovld __cnfn convert_char8_sat_rte(uchar8);\n" |
31248 | "char8 __ovld __cnfn convert_char8_rtz(uchar8);\n" |
31249 | "char8 __ovld __cnfn convert_char8_sat_rtz(uchar8);\n" |
31250 | "char8 __ovld __cnfn convert_char8_rtp(uchar8);\n" |
31251 | "char8 __ovld __cnfn convert_char8_sat_rtp(uchar8);\n" |
31252 | "char8 __ovld __cnfn convert_char8_rtn(uchar8);\n" |
31253 | "char8 __ovld __cnfn convert_char8_sat_rtn(uchar8);\n" |
31254 | "char8 __ovld __cnfn convert_char8(uchar8);\n" |
31255 | "char8 __ovld __cnfn convert_char8_sat(uchar8);\n" |
31256 | "char8 __ovld __cnfn convert_char8_rte(short8);\n" |
31257 | "char8 __ovld __cnfn convert_char8_sat_rte(short8);\n" |
31258 | "char8 __ovld __cnfn convert_char8_rtz(short8);\n" |
31259 | "char8 __ovld __cnfn convert_char8_sat_rtz(short8);\n" |
31260 | "char8 __ovld __cnfn convert_char8_rtp(short8);\n" |
31261 | "char8 __ovld __cnfn convert_char8_sat_rtp(short8);\n" |
31262 | "char8 __ovld __cnfn convert_char8_rtn(short8);\n" |
31263 | "char8 __ovld __cnfn convert_char8_sat_rtn(short8);\n" |
31264 | "char8 __ovld __cnfn convert_char8(short8);\n" |
31265 | "char8 __ovld __cnfn convert_char8_sat(short8);\n" |
31266 | "char8 __ovld __cnfn convert_char8_rte(ushort8);\n" |
31267 | "char8 __ovld __cnfn convert_char8_sat_rte(ushort8);\n" |
31268 | "char8 __ovld __cnfn convert_char8_rtz(ushort8);\n" |
31269 | "char8 __ovld __cnfn convert_char8_sat_rtz(ushort8);\n" |
31270 | "char8 __ovld __cnfn convert_char8_rtp(ushort8);\n" |
31271 | "char8 __ovld __cnfn convert_char8_sat_rtp(ushort8);\n" |
31272 | "char8 __ovld __cnfn convert_char8_rtn(ushort8);\n" |
31273 | "char8 __ovld __cnfn convert_char8_sat_rtn(ushort8);\n" |
31274 | "char8 __ovld __cnfn convert_char8(ushort8);\n" |
31275 | "char8 __ovld __cnfn convert_char8_sat(ushort8);\n" |
31276 | "char8 __ovld __cnfn convert_char8_rte(int8);\n" |
31277 | "char8 __ovld __cnfn convert_char8_sat_rte(int8);\n" |
31278 | "char8 __ovld __cnfn convert_char8_rtz(int8);\n" |
31279 | "char8 __ovld __cnfn convert_char8_sat_rtz(int8);\n" |
31280 | "char8 __ovld __cnfn convert_char8_rtp(int8);\n" |
31281 | "char8 __ovld __cnfn convert_char8_sat_rtp(int8);\n" |
31282 | "char8 __ovld __cnfn convert_char8_rtn(int8);\n" |
31283 | "char8 __ovld __cnfn convert_char8_sat_rtn(int8);\n" |
31284 | "char8 __ovld __cnfn convert_char8(int8);\n" |
31285 | "char8 __ovld __cnfn convert_char8_sat(int8);\n" |
31286 | "char8 __ovld __cnfn convert_char8_rte(uint8);\n" |
31287 | "char8 __ovld __cnfn convert_char8_sat_rte(uint8);\n" |
31288 | "char8 __ovld __cnfn convert_char8_rtz(uint8);\n" |
31289 | "char8 __ovld __cnfn convert_char8_sat_rtz(uint8);\n" |
31290 | "char8 __ovld __cnfn convert_char8_rtp(uint8);\n" |
31291 | "char8 __ovld __cnfn convert_char8_sat_rtp(uint8);\n" |
31292 | "char8 __ovld __cnfn convert_char8_rtn(uint8);\n" |
31293 | "char8 __ovld __cnfn convert_char8_sat_rtn(uint8);\n" |
31294 | "char8 __ovld __cnfn convert_char8(uint8);\n" |
31295 | "char8 __ovld __cnfn convert_char8_sat(uint8);\n" |
31296 | "char8 __ovld __cnfn convert_char8_rte(long8);\n" |
31297 | "char8 __ovld __cnfn convert_char8_sat_rte(long8);\n" |
31298 | "char8 __ovld __cnfn convert_char8_rtz(long8);\n" |
31299 | "char8 __ovld __cnfn convert_char8_sat_rtz(long8);\n" |
31300 | "char8 __ovld __cnfn convert_char8_rtp(long8);\n" |
31301 | "char8 __ovld __cnfn convert_char8_sat_rtp(long8);\n" |
31302 | "char8 __ovld __cnfn convert_char8_rtn(long8);\n" |
31303 | "char8 __ovld __cnfn convert_char8_sat_rtn(long8);\n" |
31304 | "char8 __ovld __cnfn convert_char8(long8);\n" |
31305 | "char8 __ovld __cnfn convert_char8_sat(long8);\n" |
31306 | "char8 __ovld __cnfn convert_char8_rte(ulong8);\n" |
31307 | "char8 __ovld __cnfn convert_char8_sat_rte(ulong8);\n" |
31308 | "char8 __ovld __cnfn convert_char8_rtz(ulong8);\n" |
31309 | "char8 __ovld __cnfn convert_char8_sat_rtz(ulong8);\n" |
31310 | "char8 __ovld __cnfn convert_char8_rtp(ulong8);\n" |
31311 | "char8 __ovld __cnfn convert_char8_sat_rtp(ulong8);\n" |
31312 | "char8 __ovld __cnfn convert_char8_rtn(ulong8);\n" |
31313 | "char8 __ovld __cnfn convert_char8_sat_rtn(ulong8);\n" |
31314 | "char8 __ovld __cnfn convert_char8(ulong8);\n" |
31315 | "char8 __ovld __cnfn convert_char8_sat(ulong8);\n" |
31316 | "char8 __ovld __cnfn convert_char8_rte(float8);\n" |
31317 | "char8 __ovld __cnfn convert_char8_sat_rte(float8);\n" |
31318 | "char8 __ovld __cnfn convert_char8_rtz(float8);\n" |
31319 | "char8 __ovld __cnfn convert_char8_sat_rtz(float8);\n" |
31320 | "char8 __ovld __cnfn convert_char8_rtp(float8);\n" |
31321 | "char8 __ovld __cnfn convert_char8_sat_rtp(float8);\n" |
31322 | "char8 __ovld __cnfn convert_char8_rtn(float8);\n" |
31323 | "char8 __ovld __cnfn convert_char8_sat_rtn(float8);\n" |
31324 | "char8 __ovld __cnfn convert_char8(float8);\n" |
31325 | "char8 __ovld __cnfn convert_char8_sat(float8);\n" |
31326 | "uchar8 __ovld __cnfn convert_uchar8_rte(char8);\n" |
31327 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(char8);\n" |
31328 | "uchar8 __ovld __cnfn convert_uchar8_rtz(char8);\n" |
31329 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(char8);\n" |
31330 | "uchar8 __ovld __cnfn convert_uchar8_rtp(char8);\n" |
31331 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(char8);\n" |
31332 | "uchar8 __ovld __cnfn convert_uchar8_rtn(char8);\n" |
31333 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(char8);\n" |
31334 | "uchar8 __ovld __cnfn convert_uchar8(char8);\n" |
31335 | "uchar8 __ovld __cnfn convert_uchar8_sat(char8);\n" |
31336 | "uchar8 __ovld __cnfn convert_uchar8_rte(uchar8);\n" |
31337 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(uchar8);\n" |
31338 | "uchar8 __ovld __cnfn convert_uchar8_rtz(uchar8);\n" |
31339 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uchar8);\n" |
31340 | "uchar8 __ovld __cnfn convert_uchar8_rtp(uchar8);\n" |
31341 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uchar8);\n" |
31342 | "uchar8 __ovld __cnfn convert_uchar8_rtn(uchar8);\n" |
31343 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uchar8);\n" |
31344 | "uchar8 __ovld __cnfn convert_uchar8(uchar8);\n" |
31345 | "uchar8 __ovld __cnfn convert_uchar8_sat(uchar8);\n" |
31346 | "uchar8 __ovld __cnfn convert_uchar8_rte(short8);\n" |
31347 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(short8);\n" |
31348 | "uchar8 __ovld __cnfn convert_uchar8_rtz(short8);\n" |
31349 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(short8);\n" |
31350 | "uchar8 __ovld __cnfn convert_uchar8_rtp(short8);\n" |
31351 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(short8);\n" |
31352 | "uchar8 __ovld __cnfn convert_uchar8_rtn(short8);\n" |
31353 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(short8);\n" |
31354 | "uchar8 __ovld __cnfn convert_uchar8(short8);\n" |
31355 | "uchar8 __ovld __cnfn convert_uchar8_sat(short8);\n" |
31356 | "uchar8 __ovld __cnfn convert_uchar8_rte(ushort8);\n" |
31357 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(ushort8);\n" |
31358 | "uchar8 __ovld __cnfn convert_uchar8_rtz(ushort8);\n" |
31359 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ushort8);\n" |
31360 | "uchar8 __ovld __cnfn convert_uchar8_rtp(ushort8);\n" |
31361 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ushort8);\n" |
31362 | "uchar8 __ovld __cnfn convert_uchar8_rtn(ushort8);\n" |
31363 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ushort8);\n" |
31364 | "uchar8 __ovld __cnfn convert_uchar8(ushort8);\n" |
31365 | "uchar8 __ovld __cnfn convert_uchar8_sat(ushort8);\n" |
31366 | "uchar8 __ovld __cnfn convert_uchar8_rte(int8);\n" |
31367 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(int8);\n" |
31368 | "uchar8 __ovld __cnfn convert_uchar8_rtz(int8);\n" |
31369 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(int8);\n" |
31370 | "uchar8 __ovld __cnfn convert_uchar8_rtp(int8);\n" |
31371 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(int8);\n" |
31372 | "uchar8 __ovld __cnfn convert_uchar8_rtn(int8);\n" |
31373 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(int8);\n" |
31374 | "uchar8 __ovld __cnfn convert_uchar8(int8);\n" |
31375 | "uchar8 __ovld __cnfn convert_uchar8_sat(int8);\n" |
31376 | "uchar8 __ovld __cnfn convert_uchar8_rte(uint8);\n" |
31377 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(uint8);\n" |
31378 | "uchar8 __ovld __cnfn convert_uchar8_rtz(uint8);\n" |
31379 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uint8);\n" |
31380 | "uchar8 __ovld __cnfn convert_uchar8_rtp(uint8);\n" |
31381 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uint8);\n" |
31382 | "uchar8 __ovld __cnfn convert_uchar8_rtn(uint8);\n" |
31383 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uint8);\n" |
31384 | "uchar8 __ovld __cnfn convert_uchar8(uint8);\n" |
31385 | "uchar8 __ovld __cnfn convert_uchar8_sat(uint8);\n" |
31386 | "uchar8 __ovld __cnfn convert_uchar8_rte(long8);\n" |
31387 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(long8);\n" |
31388 | "uchar8 __ovld __cnfn convert_uchar8_rtz(long8);\n" |
31389 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(long8);\n" |
31390 | "uchar8 __ovld __cnfn convert_uchar8_rtp(long8);\n" |
31391 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(long8);\n" |
31392 | "uchar8 __ovld __cnfn convert_uchar8_rtn(long8);\n" |
31393 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(long8);\n" |
31394 | "uchar8 __ovld __cnfn convert_uchar8(long8);\n" |
31395 | "uchar8 __ovld __cnfn convert_uchar8_sat(long8);\n" |
31396 | "uchar8 __ovld __cnfn convert_uchar8_rte(ulong8);\n" |
31397 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(ulong8);\n" |
31398 | "uchar8 __ovld __cnfn convert_uchar8_rtz(ulong8);\n" |
31399 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ulong8);\n" |
31400 | "uchar8 __ovld __cnfn convert_uchar8_rtp(ulong8);\n" |
31401 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ulong8);\n" |
31402 | "uchar8 __ovld __cnfn convert_uchar8_rtn(ulong8);\n" |
31403 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ulong8);\n" |
31404 | "uchar8 __ovld __cnfn convert_uchar8(ulong8);\n" |
31405 | "uchar8 __ovld __cnfn convert_uchar8_sat(ulong8);\n" |
31406 | "uchar8 __ovld __cnfn convert_uchar8_rte(float8);\n" |
31407 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(float8);\n" |
31408 | "uchar8 __ovld __cnfn convert_uchar8_rtz(float8);\n" |
31409 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(float8);\n" |
31410 | "uchar8 __ovld __cnfn convert_uchar8_rtp(float8);\n" |
31411 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(float8);\n" |
31412 | "uchar8 __ovld __cnfn convert_uchar8_rtn(float8);\n" |
31413 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(float8);\n" |
31414 | "uchar8 __ovld __cnfn convert_uchar8(float8);\n" |
31415 | "uchar8 __ovld __cnfn convert_uchar8_sat(float8);\n" |
31416 | "short8 __ovld __cnfn convert_short8_rte(char8);\n" |
31417 | "short8 __ovld __cnfn convert_short8_sat_rte(char8);\n" |
31418 | "short8 __ovld __cnfn convert_short8_rtz(char8);\n" |
31419 | "short8 __ovld __cnfn convert_short8_sat_rtz(char8);\n" |
31420 | "short8 __ovld __cnfn convert_short8_rtp(char8);\n" |
31421 | "short8 __ovld __cnfn convert_short8_sat_rtp(char8);\n" |
31422 | "short8 __ovld __cnfn convert_short8_rtn(char8);\n" |
31423 | "short8 __ovld __cnfn convert_short8_sat_rtn(char8);\n" |
31424 | "short8 __ovld __cnfn convert_short8(char8);\n" |
31425 | "short8 __ovld __cnfn convert_short8_sat(char8);\n" |
31426 | "short8 __ovld __cnfn convert_short8_rte(uchar8);\n" |
31427 | "short8 __ovld __cnfn convert_short8_sat_rte(uchar8);\n" |
31428 | "short8 __ovld __cnfn convert_short8_rtz(uchar8);\n" |
31429 | "short8 __ovld __cnfn convert_short8_sat_rtz(uchar8);\n" |
31430 | "short8 __ovld __cnfn convert_short8_rtp(uchar8);\n" |
31431 | "short8 __ovld __cnfn convert_short8_sat_rtp(uchar8);\n" |
31432 | "short8 __ovld __cnfn convert_short8_rtn(uchar8);\n" |
31433 | "short8 __ovld __cnfn convert_short8_sat_rtn(uchar8);\n" |
31434 | "short8 __ovld __cnfn convert_short8(uchar8);\n" |
31435 | "short8 __ovld __cnfn convert_short8_sat(uchar8);\n" |
31436 | "short8 __ovld __cnfn convert_short8_rte(short8);\n" |
31437 | "short8 __ovld __cnfn convert_short8_sat_rte(short8);\n" |
31438 | "short8 __ovld __cnfn convert_short8_rtz(short8);\n" |
31439 | "short8 __ovld __cnfn convert_short8_sat_rtz(short8);\n" |
31440 | "short8 __ovld __cnfn convert_short8_rtp(short8);\n" |
31441 | "short8 __ovld __cnfn convert_short8_sat_rtp(short8);\n" |
31442 | "short8 __ovld __cnfn convert_short8_rtn(short8);\n" |
31443 | "short8 __ovld __cnfn convert_short8_sat_rtn(short8);\n" |
31444 | "short8 __ovld __cnfn convert_short8(short8);\n" |
31445 | "short8 __ovld __cnfn convert_short8_sat(short8);\n" |
31446 | "short8 __ovld __cnfn convert_short8_rte(ushort8);\n" |
31447 | "short8 __ovld __cnfn convert_short8_sat_rte(ushort8);\n" |
31448 | "short8 __ovld __cnfn convert_short8_rtz(ushort8);\n" |
31449 | "short8 __ovld __cnfn convert_short8_sat_rtz(ushort8);\n" |
31450 | "short8 __ovld __cnfn convert_short8_rtp(ushort8);\n" |
31451 | "short8 __ovld __cnfn convert_short8_sat_rtp(ushort8);\n" |
31452 | "short8 __ovld __cnfn convert_short8_rtn(ushort8);\n" |
31453 | "short8 __ovld __cnfn convert_short8_sat_rtn(ushort8);\n" |
31454 | "short8 __ovld __cnfn convert_short8(ushort8);\n" |
31455 | "short8 __ovld __cnfn convert_short8_sat(ushort8);\n" |
31456 | "short8 __ovld __cnfn convert_short8_rte(int8);\n" |
31457 | "short8 __ovld __cnfn convert_short8_sat_rte(int8);\n" |
31458 | "short8 __ovld __cnfn convert_short8_rtz(int8);\n" |
31459 | "short8 __ovld __cnfn convert_short8_sat_rtz(int8);\n" |
31460 | "short8 __ovld __cnfn convert_short8_rtp(int8);\n" |
31461 | "short8 __ovld __cnfn convert_short8_sat_rtp(int8);\n" |
31462 | "short8 __ovld __cnfn convert_short8_rtn(int8);\n" |
31463 | "short8 __ovld __cnfn convert_short8_sat_rtn(int8);\n" |
31464 | "short8 __ovld __cnfn convert_short8(int8);\n" |
31465 | "short8 __ovld __cnfn convert_short8_sat(int8);\n" |
31466 | "short8 __ovld __cnfn convert_short8_rte(uint8);\n" |
31467 | "short8 __ovld __cnfn convert_short8_sat_rte(uint8);\n" |
31468 | "short8 __ovld __cnfn convert_short8_rtz(uint8);\n" |
31469 | "short8 __ovld __cnfn convert_short8_sat_rtz(uint8);\n" |
31470 | "short8 __ovld __cnfn convert_short8_rtp(uint8);\n" |
31471 | "short8 __ovld __cnfn convert_short8_sat_rtp(uint8);\n" |
31472 | "short8 __ovld __cnfn convert_short8_rtn(uint8);\n" |
31473 | "short8 __ovld __cnfn convert_short8_sat_rtn(uint8);\n" |
31474 | "short8 __ovld __cnfn convert_short8(uint8);\n" |
31475 | "short8 __ovld __cnfn convert_short8_sat(uint8);\n" |
31476 | "short8 __ovld __cnfn convert_short8_rte(long8);\n" |
31477 | "short8 __ovld __cnfn convert_short8_sat_rte(long8);\n" |
31478 | "short8 __ovld __cnfn convert_short8_rtz(long8);\n" |
31479 | "short8 __ovld __cnfn convert_short8_sat_rtz(long8);\n" |
31480 | "short8 __ovld __cnfn convert_short8_rtp(long8);\n" |
31481 | "short8 __ovld __cnfn convert_short8_sat_rtp(long8);\n" |
31482 | "short8 __ovld __cnfn convert_short8_rtn(long8);\n" |
31483 | "short8 __ovld __cnfn convert_short8_sat_rtn(long8);\n" |
31484 | "short8 __ovld __cnfn convert_short8(long8);\n" |
31485 | "short8 __ovld __cnfn convert_short8_sat(long8);\n" |
31486 | "short8 __ovld __cnfn convert_short8_rte(ulong8);\n" |
31487 | "short8 __ovld __cnfn convert_short8_sat_rte(ulong8);\n" |
31488 | "short8 __ovld __cnfn convert_short8_rtz(ulong8);\n" |
31489 | "short8 __ovld __cnfn convert_short8_sat_rtz(ulong8);\n" |
31490 | "short8 __ovld __cnfn convert_short8_rtp(ulong8);\n" |
31491 | "short8 __ovld __cnfn convert_short8_sat_rtp(ulong8);\n" |
31492 | "short8 __ovld __cnfn convert_short8_rtn(ulong8);\n" |
31493 | "short8 __ovld __cnfn convert_short8_sat_rtn(ulong8);\n" |
31494 | "short8 __ovld __cnfn convert_short8(ulong8);\n" |
31495 | "short8 __ovld __cnfn convert_short8_sat(ulong8);\n" |
31496 | "short8 __ovld __cnfn convert_short8_rte(float8);\n" |
31497 | "short8 __ovld __cnfn convert_short8_sat_rte(float8);\n" |
31498 | "short8 __ovld __cnfn convert_short8_rtz(float8);\n" |
31499 | "short8 __ovld __cnfn convert_short8_sat_rtz(float8);\n" |
31500 | "short8 __ovld __cnfn convert_short8_rtp(float8);\n" |
31501 | "short8 __ovld __cnfn convert_short8_sat_rtp(float8);\n" |
31502 | "short8 __ovld __cnfn convert_short8_rtn(float8);\n" |
31503 | "short8 __ovld __cnfn convert_short8_sat_rtn(float8);\n" |
31504 | "short8 __ovld __cnfn convert_short8(float8);\n" |
31505 | "short8 __ovld __cnfn convert_short8_sat(float8);\n" |
31506 | "ushort8 __ovld __cnfn convert_ushort8_rte(char8);\n" |
31507 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(char8);\n" |
31508 | "ushort8 __ovld __cnfn convert_ushort8_rtz(char8);\n" |
31509 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(char8);\n" |
31510 | "ushort8 __ovld __cnfn convert_ushort8_rtp(char8);\n" |
31511 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(char8);\n" |
31512 | "ushort8 __ovld __cnfn convert_ushort8_rtn(char8);\n" |
31513 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(char8);\n" |
31514 | "ushort8 __ovld __cnfn convert_ushort8(char8);\n" |
31515 | "ushort8 __ovld __cnfn convert_ushort8_sat(char8);\n" |
31516 | "ushort8 __ovld __cnfn convert_ushort8_rte(uchar8);\n" |
31517 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(uchar8);\n" |
31518 | "ushort8 __ovld __cnfn convert_ushort8_rtz(uchar8);\n" |
31519 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uchar8);\n" |
31520 | "ushort8 __ovld __cnfn convert_ushort8_rtp(uchar8);\n" |
31521 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uchar8);\n" |
31522 | "ushort8 __ovld __cnfn convert_ushort8_rtn(uchar8);\n" |
31523 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uchar8);\n" |
31524 | "ushort8 __ovld __cnfn convert_ushort8(uchar8);\n" |
31525 | "ushort8 __ovld __cnfn convert_ushort8_sat(uchar8);\n" |
31526 | "ushort8 __ovld __cnfn convert_ushort8_rte(short8);\n" |
31527 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(short8);\n" |
31528 | "ushort8 __ovld __cnfn convert_ushort8_rtz(short8);\n" |
31529 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(short8);\n" |
31530 | "ushort8 __ovld __cnfn convert_ushort8_rtp(short8);\n" |
31531 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(short8);\n" |
31532 | "ushort8 __ovld __cnfn convert_ushort8_rtn(short8);\n" |
31533 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(short8);\n" |
31534 | "ushort8 __ovld __cnfn convert_ushort8(short8);\n" |
31535 | "ushort8 __ovld __cnfn convert_ushort8_sat(short8);\n" |
31536 | "ushort8 __ovld __cnfn convert_ushort8_rte(ushort8);\n" |
31537 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(ushort8);\n" |
31538 | "ushort8 __ovld __cnfn convert_ushort8_rtz(ushort8);\n" |
31539 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ushort8);\n" |
31540 | "ushort8 __ovld __cnfn convert_ushort8_rtp(ushort8);\n" |
31541 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ushort8);\n" |
31542 | "ushort8 __ovld __cnfn convert_ushort8_rtn(ushort8);\n" |
31543 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ushort8);\n" |
31544 | "ushort8 __ovld __cnfn convert_ushort8(ushort8);\n" |
31545 | "ushort8 __ovld __cnfn convert_ushort8_sat(ushort8);\n" |
31546 | "ushort8 __ovld __cnfn convert_ushort8_rte(int8);\n" |
31547 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(int8);\n" |
31548 | "ushort8 __ovld __cnfn convert_ushort8_rtz(int8);\n" |
31549 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(int8);\n" |
31550 | "ushort8 __ovld __cnfn convert_ushort8_rtp(int8);\n" |
31551 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(int8);\n" |
31552 | "ushort8 __ovld __cnfn convert_ushort8_rtn(int8);\n" |
31553 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(int8);\n" |
31554 | "ushort8 __ovld __cnfn convert_ushort8(int8);\n" |
31555 | "ushort8 __ovld __cnfn convert_ushort8_sat(int8);\n" |
31556 | "ushort8 __ovld __cnfn convert_ushort8_rte(uint8);\n" |
31557 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(uint8);\n" |
31558 | "ushort8 __ovld __cnfn convert_ushort8_rtz(uint8);\n" |
31559 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uint8);\n" |
31560 | "ushort8 __ovld __cnfn convert_ushort8_rtp(uint8);\n" |
31561 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uint8);\n" |
31562 | "ushort8 __ovld __cnfn convert_ushort8_rtn(uint8);\n" |
31563 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uint8);\n" |
31564 | "ushort8 __ovld __cnfn convert_ushort8(uint8);\n" |
31565 | "ushort8 __ovld __cnfn convert_ushort8_sat(uint8);\n" |
31566 | "ushort8 __ovld __cnfn convert_ushort8_rte(long8);\n" |
31567 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(long8);\n" |
31568 | "ushort8 __ovld __cnfn convert_ushort8_rtz(long8);\n" |
31569 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(long8);\n" |
31570 | "ushort8 __ovld __cnfn convert_ushort8_rtp(long8);\n" |
31571 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(long8);\n" |
31572 | "ushort8 __ovld __cnfn convert_ushort8_rtn(long8);\n" |
31573 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(long8);\n" |
31574 | "ushort8 __ovld __cnfn convert_ushort8(long8);\n" |
31575 | "ushort8 __ovld __cnfn convert_ushort8_sat(long8);\n" |
31576 | "ushort8 __ovld __cnfn convert_ushort8_rte(ulong8);\n" |
31577 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(ulong8);\n" |
31578 | "ushort8 __ovld __cnfn convert_ushort8_rtz(ulong8);\n" |
31579 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ulong8);\n" |
31580 | "ushort8 __ovld __cnfn convert_ushort8_rtp(ulong8);\n" |
31581 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ulong8);\n" |
31582 | "ushort8 __ovld __cnfn convert_ushort8_rtn(ulong8);\n" |
31583 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ulong8);\n" |
31584 | "ushort8 __ovld __cnfn convert_ushort8(ulong8);\n" |
31585 | "ushort8 __ovld __cnfn convert_ushort8_sat(ulong8);\n" |
31586 | "ushort8 __ovld __cnfn convert_ushort8_rte(float8);\n" |
31587 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(float8);\n" |
31588 | "ushort8 __ovld __cnfn convert_ushort8_rtz(float8);\n" |
31589 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(float8);\n" |
31590 | "ushort8 __ovld __cnfn convert_ushort8_rtp(float8);\n" |
31591 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(float8);\n" |
31592 | "ushort8 __ovld __cnfn convert_ushort8_rtn(float8);\n" |
31593 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(float8);\n" |
31594 | "ushort8 __ovld __cnfn convert_ushort8(float8);\n" |
31595 | "ushort8 __ovld __cnfn convert_ushort8_sat(float8);\n" |
31596 | "int8 __ovld __cnfn convert_int8_rte(char8);\n" |
31597 | "int8 __ovld __cnfn convert_int8_sat_rte(char8);\n" |
31598 | "int8 __ovld __cnfn convert_int8_rtz(char8);\n" |
31599 | "int8 __ovld __cnfn convert_int8_sat_rtz(char8);\n" |
31600 | "int8 __ovld __cnfn convert_int8_rtp(char8);\n" |
31601 | "int8 __ovld __cnfn convert_int8_sat_rtp(char8);\n" |
31602 | "int8 __ovld __cnfn convert_int8_rtn(char8);\n" |
31603 | "int8 __ovld __cnfn convert_int8_sat_rtn(char8);\n" |
31604 | "int8 __ovld __cnfn convert_int8(char8);\n" |
31605 | "int8 __ovld __cnfn convert_int8_sat(char8);\n" |
31606 | "int8 __ovld __cnfn convert_int8_rte(uchar8);\n" |
31607 | "int8 __ovld __cnfn convert_int8_sat_rte(uchar8);\n" |
31608 | "int8 __ovld __cnfn convert_int8_rtz(uchar8);\n" |
31609 | "int8 __ovld __cnfn convert_int8_sat_rtz(uchar8);\n" |
31610 | "int8 __ovld __cnfn convert_int8_rtp(uchar8);\n" |
31611 | "int8 __ovld __cnfn convert_int8_sat_rtp(uchar8);\n" |
31612 | "int8 __ovld __cnfn convert_int8_rtn(uchar8);\n" |
31613 | "int8 __ovld __cnfn convert_int8_sat_rtn(uchar8);\n" |
31614 | "int8 __ovld __cnfn convert_int8(uchar8);\n" |
31615 | "int8 __ovld __cnfn convert_int8_sat(uchar8);\n" |
31616 | "int8 __ovld __cnfn convert_int8_rte(short8);\n" |
31617 | "int8 __ovld __cnfn convert_int8_sat_rte(short8);\n" |
31618 | "int8 __ovld __cnfn convert_int8_rtz(short8);\n" |
31619 | "int8 __ovld __cnfn convert_int8_sat_rtz(short8);\n" |
31620 | "int8 __ovld __cnfn convert_int8_rtp(short8);\n" |
31621 | "int8 __ovld __cnfn convert_int8_sat_rtp(short8);\n" |
31622 | "int8 __ovld __cnfn convert_int8_rtn(short8);\n" |
31623 | "int8 __ovld __cnfn convert_int8_sat_rtn(short8);\n" |
31624 | "int8 __ovld __cnfn convert_int8(short8);\n" |
31625 | "int8 __ovld __cnfn convert_int8_sat(short8);\n" |
31626 | "int8 __ovld __cnfn convert_int8_rte(ushort8);\n" |
31627 | "int8 __ovld __cnfn convert_int8_sat_rte(ushort8);\n" |
31628 | "int8 __ovld __cnfn convert_int8_rtz(ushort8);\n" |
31629 | "int8 __ovld __cnfn convert_int8_sat_rtz(ushort8);\n" |
31630 | "int8 __ovld __cnfn convert_int8_rtp(ushort8);\n" |
31631 | "int8 __ovld __cnfn convert_int8_sat_rtp(ushort8);\n" |
31632 | "int8 __ovld __cnfn convert_int8_rtn(ushort8);\n" |
31633 | "int8 __ovld __cnfn convert_int8_sat_rtn(ushort8);\n" |
31634 | "int8 __ovld __cnfn convert_int8(ushort8);\n" |
31635 | "int8 __ovld __cnfn convert_int8_sat(ushort8);\n" |
31636 | "int8 __ovld __cnfn convert_int8_rte(int8);\n" |
31637 | "int8 __ovld __cnfn convert_int8_sat_rte(int8);\n" |
31638 | "int8 __ovld __cnfn convert_int8_rtz(int8);\n" |
31639 | "int8 __ovld __cnfn convert_int8_sat_rtz(int8);\n" |
31640 | "int8 __ovld __cnfn convert_int8_rtp(int8);\n" |
31641 | "int8 __ovld __cnfn convert_int8_sat_rtp(int8);\n" |
31642 | "int8 __ovld __cnfn convert_int8_rtn(int8);\n" |
31643 | "int8 __ovld __cnfn convert_int8_sat_rtn(int8);\n" |
31644 | "int8 __ovld __cnfn convert_int8(int8);\n" |
31645 | "int8 __ovld __cnfn convert_int8_sat(int8);\n" |
31646 | "int8 __ovld __cnfn convert_int8_rte(uint8);\n" |
31647 | "int8 __ovld __cnfn convert_int8_sat_rte(uint8);\n" |
31648 | "int8 __ovld __cnfn convert_int8_rtz(uint8);\n" |
31649 | "int8 __ovld __cnfn convert_int8_sat_rtz(uint8);\n" |
31650 | "int8 __ovld __cnfn convert_int8_rtp(uint8);\n" |
31651 | "int8 __ovld __cnfn convert_int8_sat_rtp(uint8);\n" |
31652 | "int8 __ovld __cnfn convert_int8_rtn(uint8);\n" |
31653 | "int8 __ovld __cnfn convert_int8_sat_rtn(uint8);\n" |
31654 | "int8 __ovld __cnfn convert_int8(uint8);\n" |
31655 | "int8 __ovld __cnfn convert_int8_sat(uint8);\n" |
31656 | "int8 __ovld __cnfn convert_int8_rte(long8);\n" |
31657 | "int8 __ovld __cnfn convert_int8_sat_rte(long8);\n" |
31658 | "int8 __ovld __cnfn convert_int8_rtz(long8);\n" |
31659 | "int8 __ovld __cnfn convert_int8_sat_rtz(long8);\n" |
31660 | "int8 __ovld __cnfn convert_int8_rtp(long8);\n" |
31661 | "int8 __ovld __cnfn convert_int8_sat_rtp(long8);\n" |
31662 | "int8 __ovld __cnfn convert_int8_rtn(long8);\n" |
31663 | "int8 __ovld __cnfn convert_int8_sat_rtn(long8);\n" |
31664 | "int8 __ovld __cnfn convert_int8(long8);\n" |
31665 | "int8 __ovld __cnfn convert_int8_sat(long8);\n" |
31666 | "int8 __ovld __cnfn convert_int8_rte(ulong8);\n" |
31667 | "int8 __ovld __cnfn convert_int8_sat_rte(ulong8);\n" |
31668 | "int8 __ovld __cnfn convert_int8_rtz(ulong8);\n" |
31669 | "int8 __ovld __cnfn convert_int8_sat_rtz(ulong8);\n" |
31670 | "int8 __ovld __cnfn convert_int8_rtp(ulong8);\n" |
31671 | "int8 __ovld __cnfn convert_int8_sat_rtp(ulong8);\n" |
31672 | "int8 __ovld __cnfn convert_int8_rtn(ulong8);\n" |
31673 | "int8 __ovld __cnfn convert_int8_sat_rtn(ulong8);\n" |
31674 | "int8 __ovld __cnfn convert_int8(ulong8);\n" |
31675 | "int8 __ovld __cnfn convert_int8_sat(ulong8);\n" |
31676 | "int8 __ovld __cnfn convert_int8_rte(float8);\n" |
31677 | "int8 __ovld __cnfn convert_int8_sat_rte(float8);\n" |
31678 | "int8 __ovld __cnfn convert_int8_rtz(float8);\n" |
31679 | "int8 __ovld __cnfn convert_int8_sat_rtz(float8);\n" |
31680 | "int8 __ovld __cnfn convert_int8_rtp(float8);\n" |
31681 | "int8 __ovld __cnfn convert_int8_sat_rtp(float8);\n" |
31682 | "int8 __ovld __cnfn convert_int8_rtn(float8);\n" |
31683 | "int8 __ovld __cnfn convert_int8_sat_rtn(float8);\n" |
31684 | "int8 __ovld __cnfn convert_int8(float8);\n" |
31685 | "int8 __ovld __cnfn convert_int8_sat(float8);\n" |
31686 | "uint8 __ovld __cnfn convert_uint8_rte(char8);\n" |
31687 | "uint8 __ovld __cnfn convert_uint8_sat_rte(char8);\n" |
31688 | "uint8 __ovld __cnfn convert_uint8_rtz(char8);\n" |
31689 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(char8);\n" |
31690 | "uint8 __ovld __cnfn convert_uint8_rtp(char8);\n" |
31691 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(char8);\n" |
31692 | "uint8 __ovld __cnfn convert_uint8_rtn(char8);\n" |
31693 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(char8);\n" |
31694 | "uint8 __ovld __cnfn convert_uint8(char8);\n" |
31695 | "uint8 __ovld __cnfn convert_uint8_sat(char8);\n" |
31696 | "uint8 __ovld __cnfn convert_uint8_rte(uchar8);\n" |
31697 | "uint8 __ovld __cnfn convert_uint8_sat_rte(uchar8);\n" |
31698 | "uint8 __ovld __cnfn convert_uint8_rtz(uchar8);\n" |
31699 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(uchar8);\n" |
31700 | "uint8 __ovld __cnfn convert_uint8_rtp(uchar8);\n" |
31701 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(uchar8);\n" |
31702 | "uint8 __ovld __cnfn convert_uint8_rtn(uchar8);\n" |
31703 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(uchar8);\n" |
31704 | "uint8 __ovld __cnfn convert_uint8(uchar8);\n" |
31705 | "uint8 __ovld __cnfn convert_uint8_sat(uchar8);\n" |
31706 | "uint8 __ovld __cnfn convert_uint8_rte(short8);\n" |
31707 | "uint8 __ovld __cnfn convert_uint8_sat_rte(short8);\n" |
31708 | "uint8 __ovld __cnfn convert_uint8_rtz(short8);\n" |
31709 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(short8);\n" |
31710 | "uint8 __ovld __cnfn convert_uint8_rtp(short8);\n" |
31711 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(short8);\n" |
31712 | "uint8 __ovld __cnfn convert_uint8_rtn(short8);\n" |
31713 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(short8);\n" |
31714 | "uint8 __ovld __cnfn convert_uint8(short8);\n" |
31715 | "uint8 __ovld __cnfn convert_uint8_sat(short8);\n" |
31716 | "uint8 __ovld __cnfn convert_uint8_rte(ushort8);\n" |
31717 | "uint8 __ovld __cnfn convert_uint8_sat_rte(ushort8);\n" |
31718 | "uint8 __ovld __cnfn convert_uint8_rtz(ushort8);\n" |
31719 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(ushort8);\n" |
31720 | "uint8 __ovld __cnfn convert_uint8_rtp(ushort8);\n" |
31721 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(ushort8);\n" |
31722 | "uint8 __ovld __cnfn convert_uint8_rtn(ushort8);\n" |
31723 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(ushort8);\n" |
31724 | "uint8 __ovld __cnfn convert_uint8(ushort8);\n" |
31725 | "uint8 __ovld __cnfn convert_uint8_sat(ushort8);\n" |
31726 | "uint8 __ovld __cnfn convert_uint8_rte(int8);\n" |
31727 | "uint8 __ovld __cnfn convert_uint8_sat_rte(int8);\n" |
31728 | "uint8 __ovld __cnfn convert_uint8_rtz(int8);\n" |
31729 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(int8);\n" |
31730 | "uint8 __ovld __cnfn convert_uint8_rtp(int8);\n" |
31731 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(int8);\n" |
31732 | "uint8 __ovld __cnfn convert_uint8_rtn(int8);\n" |
31733 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(int8);\n" |
31734 | "uint8 __ovld __cnfn convert_uint8(int8);\n" |
31735 | "uint8 __ovld __cnfn convert_uint8_sat(int8);\n" |
31736 | "uint8 __ovld __cnfn convert_uint8_rte(uint8);\n" |
31737 | "uint8 __ovld __cnfn convert_uint8_sat_rte(uint8);\n" |
31738 | "uint8 __ovld __cnfn convert_uint8_rtz(uint8);\n" |
31739 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(uint8);\n" |
31740 | "uint8 __ovld __cnfn convert_uint8_rtp(uint8);\n" |
31741 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(uint8);\n" |
31742 | "uint8 __ovld __cnfn convert_uint8_rtn(uint8);\n" |
31743 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(uint8);\n" |
31744 | "uint8 __ovld __cnfn convert_uint8(uint8);\n" |
31745 | "uint8 __ovld __cnfn convert_uint8_sat(uint8);\n" |
31746 | "uint8 __ovld __cnfn convert_uint8_rte(long8);\n" |
31747 | "uint8 __ovld __cnfn convert_uint8_sat_rte(long8);\n" |
31748 | "uint8 __ovld __cnfn convert_uint8_rtz(long8);\n" |
31749 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(long8);\n" |
31750 | "uint8 __ovld __cnfn convert_uint8_rtp(long8);\n" |
31751 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(long8);\n" |
31752 | "uint8 __ovld __cnfn convert_uint8_rtn(long8);\n" |
31753 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(long8);\n" |
31754 | "uint8 __ovld __cnfn convert_uint8(long8);\n" |
31755 | "uint8 __ovld __cnfn convert_uint8_sat(long8);\n" |
31756 | "uint8 __ovld __cnfn convert_uint8_rte(ulong8);\n" |
31757 | "uint8 __ovld __cnfn convert_uint8_sat_rte(ulong8);\n" |
31758 | "uint8 __ovld __cnfn convert_uint8_rtz(ulong8);\n" |
31759 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(ulong8);\n" |
31760 | "uint8 __ovld __cnfn convert_uint8_rtp(ulong8);\n" |
31761 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(ulong8);\n" |
31762 | "uint8 __ovld __cnfn convert_uint8_rtn(ulong8);\n" |
31763 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(ulong8);\n" |
31764 | "uint8 __ovld __cnfn convert_uint8(ulong8);\n" |
31765 | "uint8 __ovld __cnfn convert_uint8_sat(ulong8);\n" |
31766 | "uint8 __ovld __cnfn convert_uint8_rte(float8);\n" |
31767 | "uint8 __ovld __cnfn convert_uint8_sat_rte(float8);\n" |
31768 | "uint8 __ovld __cnfn convert_uint8_rtz(float8);\n" |
31769 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(float8);\n" |
31770 | "uint8 __ovld __cnfn convert_uint8_rtp(float8);\n" |
31771 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(float8);\n" |
31772 | "uint8 __ovld __cnfn convert_uint8_rtn(float8);\n" |
31773 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(float8);\n" |
31774 | "uint8 __ovld __cnfn convert_uint8(float8);\n" |
31775 | "uint8 __ovld __cnfn convert_uint8_sat(float8);\n" |
31776 | "long8 __ovld __cnfn convert_long8_rte(char8);\n" |
31777 | "long8 __ovld __cnfn convert_long8_sat_rte(char8);\n" |
31778 | "long8 __ovld __cnfn convert_long8_rtz(char8);\n" |
31779 | "long8 __ovld __cnfn convert_long8_sat_rtz(char8);\n" |
31780 | "long8 __ovld __cnfn convert_long8_rtp(char8);\n" |
31781 | "long8 __ovld __cnfn convert_long8_sat_rtp(char8);\n" |
31782 | "long8 __ovld __cnfn convert_long8_rtn(char8);\n" |
31783 | "long8 __ovld __cnfn convert_long8_sat_rtn(char8);\n" |
31784 | "long8 __ovld __cnfn convert_long8(char8);\n" |
31785 | "long8 __ovld __cnfn convert_long8_sat(char8);\n" |
31786 | "long8 __ovld __cnfn convert_long8_rte(uchar8);\n" |
31787 | "long8 __ovld __cnfn convert_long8_sat_rte(uchar8);\n" |
31788 | "long8 __ovld __cnfn convert_long8_rtz(uchar8);\n" |
31789 | "long8 __ovld __cnfn convert_long8_sat_rtz(uchar8);\n" |
31790 | "long8 __ovld __cnfn convert_long8_rtp(uchar8);\n" |
31791 | "long8 __ovld __cnfn convert_long8_sat_rtp(uchar8);\n" |
31792 | "long8 __ovld __cnfn convert_long8_rtn(uchar8);\n" |
31793 | "long8 __ovld __cnfn convert_long8_sat_rtn(uchar8);\n" |
31794 | "long8 __ovld __cnfn convert_long8(uchar8);\n" |
31795 | "long8 __ovld __cnfn convert_long8_sat(uchar8);\n" |
31796 | "long8 __ovld __cnfn convert_long8_rte(short8);\n" |
31797 | "long8 __ovld __cnfn convert_long8_sat_rte(short8);\n" |
31798 | "long8 __ovld __cnfn convert_long8_rtz(short8);\n" |
31799 | "long8 __ovld __cnfn convert_long8_sat_rtz(short8);\n" |
31800 | "long8 __ovld __cnfn convert_long8_rtp(short8);\n" |
31801 | "long8 __ovld __cnfn convert_long8_sat_rtp(short8);\n" |
31802 | "long8 __ovld __cnfn convert_long8_rtn(short8);\n" |
31803 | "long8 __ovld __cnfn convert_long8_sat_rtn(short8);\n" |
31804 | "long8 __ovld __cnfn convert_long8(short8);\n" |
31805 | "long8 __ovld __cnfn convert_long8_sat(short8);\n" |
31806 | "long8 __ovld __cnfn convert_long8_rte(ushort8);\n" |
31807 | "long8 __ovld __cnfn convert_long8_sat_rte(ushort8);\n" |
31808 | "long8 __ovld __cnfn convert_long8_rtz(ushort8);\n" |
31809 | "long8 __ovld __cnfn convert_long8_sat_rtz(ushort8);\n" |
31810 | "long8 __ovld __cnfn convert_long8_rtp(ushort8);\n" |
31811 | "long8 __ovld __cnfn convert_long8_sat_rtp(ushort8);\n" |
31812 | "long8 __ovld __cnfn convert_long8_rtn(ushort8);\n" |
31813 | "long8 __ovld __cnfn convert_long8_sat_rtn(ushort8);\n" |
31814 | "long8 __ovld __cnfn convert_long8(ushort8);\n" |
31815 | "long8 __ovld __cnfn convert_long8_sat(ushort8);\n" |
31816 | "long8 __ovld __cnfn convert_long8_rte(int8);\n" |
31817 | "long8 __ovld __cnfn convert_long8_sat_rte(int8);\n" |
31818 | "long8 __ovld __cnfn convert_long8_rtz(int8);\n" |
31819 | "long8 __ovld __cnfn convert_long8_sat_rtz(int8);\n" |
31820 | "long8 __ovld __cnfn convert_long8_rtp(int8);\n" |
31821 | "long8 __ovld __cnfn convert_long8_sat_rtp(int8);\n" |
31822 | "long8 __ovld __cnfn convert_long8_rtn(int8);\n" |
31823 | "long8 __ovld __cnfn convert_long8_sat_rtn(int8);\n" |
31824 | "long8 __ovld __cnfn convert_long8(int8);\n" |
31825 | "long8 __ovld __cnfn convert_long8_sat(int8);\n" |
31826 | "long8 __ovld __cnfn convert_long8_rte(uint8);\n" |
31827 | "long8 __ovld __cnfn convert_long8_sat_rte(uint8);\n" |
31828 | "long8 __ovld __cnfn convert_long8_rtz(uint8);\n" |
31829 | "long8 __ovld __cnfn convert_long8_sat_rtz(uint8);\n" |
31830 | "long8 __ovld __cnfn convert_long8_rtp(uint8);\n" |
31831 | "long8 __ovld __cnfn convert_long8_sat_rtp(uint8);\n" |
31832 | "long8 __ovld __cnfn convert_long8_rtn(uint8);\n" |
31833 | "long8 __ovld __cnfn convert_long8_sat_rtn(uint8);\n" |
31834 | "long8 __ovld __cnfn convert_long8(uint8);\n" |
31835 | "long8 __ovld __cnfn convert_long8_sat(uint8);\n" |
31836 | "long8 __ovld __cnfn convert_long8_rte(long8);\n" |
31837 | "long8 __ovld __cnfn convert_long8_sat_rte(long8);\n" |
31838 | "long8 __ovld __cnfn convert_long8_rtz(long8);\n" |
31839 | "long8 __ovld __cnfn convert_long8_sat_rtz(long8);\n" |
31840 | "long8 __ovld __cnfn convert_long8_rtp(long8);\n" |
31841 | "long8 __ovld __cnfn convert_long8_sat_rtp(long8);\n" |
31842 | "long8 __ovld __cnfn convert_long8_rtn(long8);\n" |
31843 | "long8 __ovld __cnfn convert_long8_sat_rtn(long8);\n" |
31844 | "long8 __ovld __cnfn convert_long8(long8);\n" |
31845 | "long8 __ovld __cnfn convert_long8_sat(long8);\n" |
31846 | "long8 __ovld __cnfn convert_long8_rte(ulong8);\n" |
31847 | "long8 __ovld __cnfn convert_long8_sat_rte(ulong8);\n" |
31848 | "long8 __ovld __cnfn convert_long8_rtz(ulong8);\n" |
31849 | "long8 __ovld __cnfn convert_long8_sat_rtz(ulong8);\n" |
31850 | "long8 __ovld __cnfn convert_long8_rtp(ulong8);\n" |
31851 | "long8 __ovld __cnfn convert_long8_sat_rtp(ulong8);\n" |
31852 | "long8 __ovld __cnfn convert_long8_rtn(ulong8);\n" |
31853 | "long8 __ovld __cnfn convert_long8_sat_rtn(ulong8);\n" |
31854 | "long8 __ovld __cnfn convert_long8(ulong8);\n" |
31855 | "long8 __ovld __cnfn convert_long8_sat(ulong8);\n" |
31856 | "long8 __ovld __cnfn convert_long8_rte(float8);\n" |
31857 | "long8 __ovld __cnfn convert_long8_sat_rte(float8);\n" |
31858 | "long8 __ovld __cnfn convert_long8_rtz(float8);\n" |
31859 | "long8 __ovld __cnfn convert_long8_sat_rtz(float8);\n" |
31860 | "long8 __ovld __cnfn convert_long8_rtp(float8);\n" |
31861 | "long8 __ovld __cnfn convert_long8_sat_rtp(float8);\n" |
31862 | "long8 __ovld __cnfn convert_long8_rtn(float8);\n" |
31863 | "long8 __ovld __cnfn convert_long8_sat_rtn(float8);\n" |
31864 | "long8 __ovld __cnfn convert_long8(float8);\n" |
31865 | "long8 __ovld __cnfn convert_long8_sat(float8);\n" |
31866 | "ulong8 __ovld __cnfn convert_ulong8_rte(char8);\n" |
31867 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(char8);\n" |
31868 | "ulong8 __ovld __cnfn convert_ulong8_rtz(char8);\n" |
31869 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(char8);\n" |
31870 | "ulong8 __ovld __cnfn convert_ulong8_rtp(char8);\n" |
31871 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(char8);\n" |
31872 | "ulong8 __ovld __cnfn convert_ulong8_rtn(char8);\n" |
31873 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(char8);\n" |
31874 | "ulong8 __ovld __cnfn convert_ulong8(char8);\n" |
31875 | "ulong8 __ovld __cnfn convert_ulong8_sat(char8);\n" |
31876 | "ulong8 __ovld __cnfn convert_ulong8_rte(uchar8);\n" |
31877 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(uchar8);\n" |
31878 | "ulong8 __ovld __cnfn convert_ulong8_rtz(uchar8);\n" |
31879 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uchar8);\n" |
31880 | "ulong8 __ovld __cnfn convert_ulong8_rtp(uchar8);\n" |
31881 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uchar8);\n" |
31882 | "ulong8 __ovld __cnfn convert_ulong8_rtn(uchar8);\n" |
31883 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uchar8);\n" |
31884 | "ulong8 __ovld __cnfn convert_ulong8(uchar8);\n" |
31885 | "ulong8 __ovld __cnfn convert_ulong8_sat(uchar8);\n" |
31886 | "ulong8 __ovld __cnfn convert_ulong8_rte(short8);\n" |
31887 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(short8);\n" |
31888 | "ulong8 __ovld __cnfn convert_ulong8_rtz(short8);\n" |
31889 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(short8);\n" |
31890 | "ulong8 __ovld __cnfn convert_ulong8_rtp(short8);\n" |
31891 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(short8);\n" |
31892 | "ulong8 __ovld __cnfn convert_ulong8_rtn(short8);\n" |
31893 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(short8);\n" |
31894 | "ulong8 __ovld __cnfn convert_ulong8(short8);\n" |
31895 | "ulong8 __ovld __cnfn convert_ulong8_sat(short8);\n" |
31896 | "ulong8 __ovld __cnfn convert_ulong8_rte(ushort8);\n" |
31897 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(ushort8);\n" |
31898 | "ulong8 __ovld __cnfn convert_ulong8_rtz(ushort8);\n" |
31899 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ushort8);\n" |
31900 | "ulong8 __ovld __cnfn convert_ulong8_rtp(ushort8);\n" |
31901 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ushort8);\n" |
31902 | "ulong8 __ovld __cnfn convert_ulong8_rtn(ushort8);\n" |
31903 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ushort8);\n" |
31904 | "ulong8 __ovld __cnfn convert_ulong8(ushort8);\n" |
31905 | "ulong8 __ovld __cnfn convert_ulong8_sat(ushort8);\n" |
31906 | "ulong8 __ovld __cnfn convert_ulong8_rte(int8);\n" |
31907 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(int8);\n" |
31908 | "ulong8 __ovld __cnfn convert_ulong8_rtz(int8);\n" |
31909 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(int8);\n" |
31910 | "ulong8 __ovld __cnfn convert_ulong8_rtp(int8);\n" |
31911 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(int8);\n" |
31912 | "ulong8 __ovld __cnfn convert_ulong8_rtn(int8);\n" |
31913 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(int8);\n" |
31914 | "ulong8 __ovld __cnfn convert_ulong8(int8);\n" |
31915 | "ulong8 __ovld __cnfn convert_ulong8_sat(int8);\n" |
31916 | "ulong8 __ovld __cnfn convert_ulong8_rte(uint8);\n" |
31917 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(uint8);\n" |
31918 | "ulong8 __ovld __cnfn convert_ulong8_rtz(uint8);\n" |
31919 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uint8);\n" |
31920 | "ulong8 __ovld __cnfn convert_ulong8_rtp(uint8);\n" |
31921 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uint8);\n" |
31922 | "ulong8 __ovld __cnfn convert_ulong8_rtn(uint8);\n" |
31923 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uint8);\n" |
31924 | "ulong8 __ovld __cnfn convert_ulong8(uint8);\n" |
31925 | "ulong8 __ovld __cnfn convert_ulong8_sat(uint8);\n" |
31926 | "ulong8 __ovld __cnfn convert_ulong8_rte(long8);\n" |
31927 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(long8);\n" |
31928 | "ulong8 __ovld __cnfn convert_ulong8_rtz(long8);\n" |
31929 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(long8);\n" |
31930 | "ulong8 __ovld __cnfn convert_ulong8_rtp(long8);\n" |
31931 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(long8);\n" |
31932 | "ulong8 __ovld __cnfn convert_ulong8_rtn(long8);\n" |
31933 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(long8);\n" |
31934 | "ulong8 __ovld __cnfn convert_ulong8(long8);\n" |
31935 | "ulong8 __ovld __cnfn convert_ulong8_sat(long8);\n" |
31936 | "ulong8 __ovld __cnfn convert_ulong8_rte(ulong8);\n" |
31937 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(ulong8);\n" |
31938 | "ulong8 __ovld __cnfn convert_ulong8_rtz(ulong8);\n" |
31939 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ulong8);\n" |
31940 | "ulong8 __ovld __cnfn convert_ulong8_rtp(ulong8);\n" |
31941 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ulong8);\n" |
31942 | "ulong8 __ovld __cnfn convert_ulong8_rtn(ulong8);\n" |
31943 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ulong8);\n" |
31944 | "ulong8 __ovld __cnfn convert_ulong8(ulong8);\n" |
31945 | "ulong8 __ovld __cnfn convert_ulong8_sat(ulong8);\n" |
31946 | "ulong8 __ovld __cnfn convert_ulong8_rte(float8);\n" |
31947 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(float8);\n" |
31948 | "ulong8 __ovld __cnfn convert_ulong8_rtz(float8);\n" |
31949 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(float8);\n" |
31950 | "ulong8 __ovld __cnfn convert_ulong8_rtp(float8);\n" |
31951 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(float8);\n" |
31952 | "ulong8 __ovld __cnfn convert_ulong8_rtn(float8);\n" |
31953 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(float8);\n" |
31954 | "ulong8 __ovld __cnfn convert_ulong8(float8);\n" |
31955 | "ulong8 __ovld __cnfn convert_ulong8_sat(float8);\n" |
31956 | "float8 __ovld __cnfn convert_float8_rte(char8);\n" |
31957 | "float8 __ovld __cnfn convert_float8_rtz(char8);\n" |
31958 | "float8 __ovld __cnfn convert_float8_rtp(char8);\n" |
31959 | "float8 __ovld __cnfn convert_float8_rtn(char8);\n" |
31960 | "float8 __ovld __cnfn convert_float8(char8);\n" |
31961 | "float8 __ovld __cnfn convert_float8_rte(uchar8);\n" |
31962 | "float8 __ovld __cnfn convert_float8_rtz(uchar8);\n" |
31963 | "float8 __ovld __cnfn convert_float8_rtp(uchar8);\n" |
31964 | "float8 __ovld __cnfn convert_float8_rtn(uchar8);\n" |
31965 | "float8 __ovld __cnfn convert_float8(uchar8);\n" |
31966 | "float8 __ovld __cnfn convert_float8_rte(short8);\n" |
31967 | "float8 __ovld __cnfn convert_float8_rtz(short8);\n" |
31968 | "float8 __ovld __cnfn convert_float8_rtp(short8);\n" |
31969 | "float8 __ovld __cnfn convert_float8_rtn(short8);\n" |
31970 | "float8 __ovld __cnfn convert_float8(short8);\n" |
31971 | "float8 __ovld __cnfn convert_float8_rte(ushort8);\n" |
31972 | "float8 __ovld __cnfn convert_float8_rtz(ushort8);\n" |
31973 | "float8 __ovld __cnfn convert_float8_rtp(ushort8);\n" |
31974 | "float8 __ovld __cnfn convert_float8_rtn(ushort8);\n" |
31975 | "float8 __ovld __cnfn convert_float8(ushort8);\n" |
31976 | "float8 __ovld __cnfn convert_float8_rte(int8);\n" |
31977 | "float8 __ovld __cnfn convert_float8_rtz(int8);\n" |
31978 | "float8 __ovld __cnfn convert_float8_rtp(int8);\n" |
31979 | "float8 __ovld __cnfn convert_float8_rtn(int8);\n" |
31980 | "float8 __ovld __cnfn convert_float8(int8);\n" |
31981 | "float8 __ovld __cnfn convert_float8_rte(uint8);\n" |
31982 | "float8 __ovld __cnfn convert_float8_rtz(uint8);\n" |
31983 | "float8 __ovld __cnfn convert_float8_rtp(uint8);\n" |
31984 | "float8 __ovld __cnfn convert_float8_rtn(uint8);\n" |
31985 | "float8 __ovld __cnfn convert_float8(uint8);\n" |
31986 | "float8 __ovld __cnfn convert_float8_rte(long8);\n" |
31987 | "float8 __ovld __cnfn convert_float8_rtz(long8);\n" |
31988 | "float8 __ovld __cnfn convert_float8_rtp(long8);\n" |
31989 | "float8 __ovld __cnfn convert_float8_rtn(long8);\n" |
31990 | "float8 __ovld __cnfn convert_float8(long8);\n" |
31991 | "float8 __ovld __cnfn convert_float8_rte(ulong8);\n" |
31992 | "float8 __ovld __cnfn convert_float8_rtz(ulong8);\n" |
31993 | "float8 __ovld __cnfn convert_float8_rtp(ulong8);\n" |
31994 | "float8 __ovld __cnfn convert_float8_rtn(ulong8);\n" |
31995 | "float8 __ovld __cnfn convert_float8(ulong8);\n" |
31996 | "float8 __ovld __cnfn convert_float8_rte(float8);\n" |
31997 | "float8 __ovld __cnfn convert_float8_rtz(float8);\n" |
31998 | "float8 __ovld __cnfn convert_float8_rtp(float8);\n" |
31999 | "float8 __ovld __cnfn convert_float8_rtn(float8);\n" |
32000 | "float8 __ovld __cnfn convert_float8(float8);\n" |
32001 | "char16 __ovld __cnfn convert_char16_rte(char16);\n" |
32002 | "char16 __ovld __cnfn convert_char16_sat_rte(char16);\n" |
32003 | "char16 __ovld __cnfn convert_char16_rtz(char16);\n" |
32004 | "char16 __ovld __cnfn convert_char16_sat_rtz(char16);\n" |
32005 | "char16 __ovld __cnfn convert_char16_rtp(char16);\n" |
32006 | "char16 __ovld __cnfn convert_char16_sat_rtp(char16);\n" |
32007 | "char16 __ovld __cnfn convert_char16_rtn(char16);\n" |
32008 | "char16 __ovld __cnfn convert_char16_sat_rtn(char16);\n" |
32009 | "char16 __ovld __cnfn convert_char16(char16);\n" |
32010 | "char16 __ovld __cnfn convert_char16_sat(char16);\n" |
32011 | "char16 __ovld __cnfn convert_char16_rte(uchar16);\n" |
32012 | "char16 __ovld __cnfn convert_char16_sat_rte(uchar16);\n" |
32013 | "char16 __ovld __cnfn convert_char16_rtz(uchar16);\n" |
32014 | "char16 __ovld __cnfn convert_char16_sat_rtz(uchar16);\n" |
32015 | "char16 __ovld __cnfn convert_char16_rtp(uchar16);\n" |
32016 | "char16 __ovld __cnfn convert_char16_sat_rtp(uchar16);\n" |
32017 | "char16 __ovld __cnfn convert_char16_rtn(uchar16);\n" |
32018 | "char16 __ovld __cnfn convert_char16_sat_rtn(uchar16);\n" |
32019 | "char16 __ovld __cnfn convert_char16(uchar16);\n" |
32020 | "char16 __ovld __cnfn convert_char16_sat(uchar16);\n" |
32021 | "char16 __ovld __cnfn convert_char16_rte(short16);\n" |
32022 | "char16 __ovld __cnfn convert_char16_sat_rte(short16);\n" |
32023 | "char16 __ovld __cnfn convert_char16_rtz(short16);\n" |
32024 | "char16 __ovld __cnfn convert_char16_sat_rtz(short16);\n" |
32025 | "char16 __ovld __cnfn convert_char16_rtp(short16);\n" |
32026 | "char16 __ovld __cnfn convert_char16_sat_rtp(short16);\n" |
32027 | "char16 __ovld __cnfn convert_char16_rtn(short16);\n" |
32028 | "char16 __ovld __cnfn convert_char16_sat_rtn(short16);\n" |
32029 | "char16 __ovld __cnfn convert_char16(short16);\n" |
32030 | "char16 __ovld __cnfn convert_char16_sat(short16);\n" |
32031 | "char16 __ovld __cnfn convert_char16_rte(ushort16);\n" |
32032 | "char16 __ovld __cnfn convert_char16_sat_rte(ushort16);\n" |
32033 | "char16 __ovld __cnfn convert_char16_rtz(ushort16);\n" |
32034 | "char16 __ovld __cnfn convert_char16_sat_rtz(ushort16);\n" |
32035 | "char16 __ovld __cnfn convert_char16_rtp(ushort16);\n" |
32036 | "char16 __ovld __cnfn convert_char16_sat_rtp(ushort16);\n" |
32037 | "char16 __ovld __cnfn convert_char16_rtn(ushort16);\n" |
32038 | "char16 __ovld __cnfn convert_char16_sat_rtn(ushort16);\n" |
32039 | "char16 __ovld __cnfn convert_char16(ushort16);\n" |
32040 | "char16 __ovld __cnfn convert_char16_sat(ushort16);\n" |
32041 | "char16 __ovld __cnfn convert_char16_rte(int16);\n" |
32042 | "char16 __ovld __cnfn convert_char16_sat_rte(int16);\n" |
32043 | "char16 __ovld __cnfn convert_char16_rtz(int16);\n" |
32044 | "char16 __ovld __cnfn convert_char16_sat_rtz(int16);\n" |
32045 | "char16 __ovld __cnfn convert_char16_rtp(int16);\n" |
32046 | "char16 __ovld __cnfn convert_char16_sat_rtp(int16);\n" |
32047 | "char16 __ovld __cnfn convert_char16_rtn(int16);\n" |
32048 | "char16 __ovld __cnfn convert_char16_sat_rtn(int16);\n" |
32049 | "char16 __ovld __cnfn convert_char16(int16);\n" |
32050 | "char16 __ovld __cnfn convert_char16_sat(int16);\n" |
32051 | "char16 __ovld __cnfn convert_char16_rte(uint16);\n" |
32052 | "char16 __ovld __cnfn convert_char16_sat_rte(uint16);\n" |
32053 | "char16 __ovld __cnfn convert_char16_rtz(uint16);\n" |
32054 | "char16 __ovld __cnfn convert_char16_sat_rtz(uint16);\n" |
32055 | "char16 __ovld __cnfn convert_char16_rtp(uint16);\n" |
32056 | "char16 __ovld __cnfn convert_char16_sat_rtp(uint16);\n" |
32057 | "char16 __ovld __cnfn convert_char16_rtn(uint16);\n" |
32058 | "char16 __ovld __cnfn convert_char16_sat_rtn(uint16);\n" |
32059 | "char16 __ovld __cnfn convert_char16(uint16);\n" |
32060 | "char16 __ovld __cnfn convert_char16_sat(uint16);\n" |
32061 | "char16 __ovld __cnfn convert_char16_rte(long16);\n" |
32062 | "char16 __ovld __cnfn convert_char16_sat_rte(long16);\n" |
32063 | "char16 __ovld __cnfn convert_char16_rtz(long16);\n" |
32064 | "char16 __ovld __cnfn convert_char16_sat_rtz(long16);\n" |
32065 | "char16 __ovld __cnfn convert_char16_rtp(long16);\n" |
32066 | "char16 __ovld __cnfn convert_char16_sat_rtp(long16);\n" |
32067 | "char16 __ovld __cnfn convert_char16_rtn(long16);\n" |
32068 | "char16 __ovld __cnfn convert_char16_sat_rtn(long16);\n" |
32069 | "char16 __ovld __cnfn convert_char16(long16);\n" |
32070 | "char16 __ovld __cnfn convert_char16_sat(long16);\n" |
32071 | "char16 __ovld __cnfn convert_char16_rte(ulong16);\n" |
32072 | "char16 __ovld __cnfn convert_char16_sat_rte(ulong16);\n" |
32073 | "char16 __ovld __cnfn convert_char16_rtz(ulong16);\n" |
32074 | "char16 __ovld __cnfn convert_char16_sat_rtz(ulong16);\n" |
32075 | "char16 __ovld __cnfn convert_char16_rtp(ulong16);\n" |
32076 | "char16 __ovld __cnfn convert_char16_sat_rtp(ulong16);\n" |
32077 | "char16 __ovld __cnfn convert_char16_rtn(ulong16);\n" |
32078 | "char16 __ovld __cnfn convert_char16_sat_rtn(ulong16);\n" |
32079 | "char16 __ovld __cnfn convert_char16(ulong16);\n" |
32080 | "char16 __ovld __cnfn convert_char16_sat(ulong16);\n" |
32081 | "char16 __ovld __cnfn convert_char16_rte(float16);\n" |
32082 | "char16 __ovld __cnfn convert_char16_sat_rte(float16);\n" |
32083 | "char16 __ovld __cnfn convert_char16_rtz(float16);\n" |
32084 | "char16 __ovld __cnfn convert_char16_sat_rtz(float16);\n" |
32085 | "char16 __ovld __cnfn convert_char16_rtp(float16);\n" |
32086 | "char16 __ovld __cnfn convert_char16_sat_rtp(float16);\n" |
32087 | "char16 __ovld __cnfn convert_char16_rtn(float16);\n" |
32088 | "char16 __ovld __cnfn convert_char16_sat_rtn(float16);\n" |
32089 | "char16 __ovld __cnfn convert_char16(float16);\n" |
32090 | "char16 __ovld __cnfn convert_char16_sat(float16);\n" |
32091 | "uchar16 __ovld __cnfn convert_uchar16_rte(char16);\n" |
32092 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(char16);\n" |
32093 | "uchar16 __ovld __cnfn convert_uchar16_rtz(char16);\n" |
32094 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(char16);\n" |
32095 | "uchar16 __ovld __cnfn convert_uchar16_rtp(char16);\n" |
32096 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(char16);\n" |
32097 | "uchar16 __ovld __cnfn convert_uchar16_rtn(char16);\n" |
32098 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(char16);\n" |
32099 | "uchar16 __ovld __cnfn convert_uchar16(char16);\n" |
32100 | "uchar16 __ovld __cnfn convert_uchar16_sat(char16);\n" |
32101 | "uchar16 __ovld __cnfn convert_uchar16_rte(uchar16);\n" |
32102 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(uchar16);\n" |
32103 | "uchar16 __ovld __cnfn convert_uchar16_rtz(uchar16);\n" |
32104 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uchar16);\n" |
32105 | "uchar16 __ovld __cnfn convert_uchar16_rtp(uchar16);\n" |
32106 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uchar16);\n" |
32107 | "uchar16 __ovld __cnfn convert_uchar16_rtn(uchar16);\n" |
32108 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uchar16);\n" |
32109 | "uchar16 __ovld __cnfn convert_uchar16(uchar16);\n" |
32110 | "uchar16 __ovld __cnfn convert_uchar16_sat(uchar16);\n" |
32111 | "uchar16 __ovld __cnfn convert_uchar16_rte(short16);\n" |
32112 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(short16);\n" |
32113 | "uchar16 __ovld __cnfn convert_uchar16_rtz(short16);\n" |
32114 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(short16);\n" |
32115 | "uchar16 __ovld __cnfn convert_uchar16_rtp(short16);\n" |
32116 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(short16);\n" |
32117 | "uchar16 __ovld __cnfn convert_uchar16_rtn(short16);\n" |
32118 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(short16);\n" |
32119 | "uchar16 __ovld __cnfn convert_uchar16(short16);\n" |
32120 | "uchar16 __ovld __cnfn convert_uchar16_sat(short16);\n" |
32121 | "uchar16 __ovld __cnfn convert_uchar16_rte(ushort16);\n" |
32122 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(ushort16);\n" |
32123 | "uchar16 __ovld __cnfn convert_uchar16_rtz(ushort16);\n" |
32124 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ushort16);\n" |
32125 | "uchar16 __ovld __cnfn convert_uchar16_rtp(ushort16);\n" |
32126 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ushort16);\n" |
32127 | "uchar16 __ovld __cnfn convert_uchar16_rtn(ushort16);\n" |
32128 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ushort16);\n" |
32129 | "uchar16 __ovld __cnfn convert_uchar16(ushort16);\n" |
32130 | "uchar16 __ovld __cnfn convert_uchar16_sat(ushort16);\n" |
32131 | "uchar16 __ovld __cnfn convert_uchar16_rte(int16);\n" |
32132 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(int16);\n" |
32133 | "uchar16 __ovld __cnfn convert_uchar16_rtz(int16);\n" |
32134 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(int16);\n" |
32135 | "uchar16 __ovld __cnfn convert_uchar16_rtp(int16);\n" |
32136 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(int16);\n" |
32137 | "uchar16 __ovld __cnfn convert_uchar16_rtn(int16);\n" |
32138 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(int16);\n" |
32139 | "uchar16 __ovld __cnfn convert_uchar16(int16);\n" |
32140 | "uchar16 __ovld __cnfn convert_uchar16_sat(int16);\n" |
32141 | "uchar16 __ovld __cnfn convert_uchar16_rte(uint16);\n" |
32142 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(uint16);\n" |
32143 | "uchar16 __ovld __cnfn convert_uchar16_rtz(uint16);\n" |
32144 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uint16);\n" |
32145 | "uchar16 __ovld __cnfn convert_uchar16_rtp(uint16);\n" |
32146 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uint16);\n" |
32147 | "uchar16 __ovld __cnfn convert_uchar16_rtn(uint16);\n" |
32148 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uint16);\n" |
32149 | "uchar16 __ovld __cnfn convert_uchar16(uint16);\n" |
32150 | "uchar16 __ovld __cnfn convert_uchar16_sat(uint16);\n" |
32151 | "uchar16 __ovld __cnfn convert_uchar16_rte(long16);\n" |
32152 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(long16);\n" |
32153 | "uchar16 __ovld __cnfn convert_uchar16_rtz(long16);\n" |
32154 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(long16);\n" |
32155 | "uchar16 __ovld __cnfn convert_uchar16_rtp(long16);\n" |
32156 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(long16);\n" |
32157 | "uchar16 __ovld __cnfn convert_uchar16_rtn(long16);\n" |
32158 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(long16);\n" |
32159 | "uchar16 __ovld __cnfn convert_uchar16(long16);\n" |
32160 | "uchar16 __ovld __cnfn convert_uchar16_sat(long16);\n" |
32161 | "uchar16 __ovld __cnfn convert_uchar16_rte(ulong16);\n" |
32162 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(ulong16);\n" |
32163 | "uchar16 __ovld __cnfn convert_uchar16_rtz(ulong16);\n" |
32164 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ulong16);\n" |
32165 | "uchar16 __ovld __cnfn convert_uchar16_rtp(ulong16);\n" |
32166 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ulong16);\n" |
32167 | "uchar16 __ovld __cnfn convert_uchar16_rtn(ulong16);\n" |
32168 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ulong16);\n" |
32169 | "uchar16 __ovld __cnfn convert_uchar16(ulong16);\n" |
32170 | "uchar16 __ovld __cnfn convert_uchar16_sat(ulong16);\n" |
32171 | "uchar16 __ovld __cnfn convert_uchar16_rte(float16);\n" |
32172 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(float16);\n" |
32173 | "uchar16 __ovld __cnfn convert_uchar16_rtz(float16);\n" |
32174 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(float16);\n" |
32175 | "uchar16 __ovld __cnfn convert_uchar16_rtp(float16);\n" |
32176 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(float16);\n" |
32177 | "uchar16 __ovld __cnfn convert_uchar16_rtn(float16);\n" |
32178 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(float16);\n" |
32179 | "uchar16 __ovld __cnfn convert_uchar16(float16);\n" |
32180 | "uchar16 __ovld __cnfn convert_uchar16_sat(float16);\n" |
32181 | "short16 __ovld __cnfn convert_short16_rte(char16);\n" |
32182 | "short16 __ovld __cnfn convert_short16_sat_rte(char16);\n" |
32183 | "short16 __ovld __cnfn convert_short16_rtz(char16);\n" |
32184 | "short16 __ovld __cnfn convert_short16_sat_rtz(char16);\n" |
32185 | "short16 __ovld __cnfn convert_short16_rtp(char16);\n" |
32186 | "short16 __ovld __cnfn convert_short16_sat_rtp(char16);\n" |
32187 | "short16 __ovld __cnfn convert_short16_rtn(char16);\n" |
32188 | "short16 __ovld __cnfn convert_short16_sat_rtn(char16);\n" |
32189 | "short16 __ovld __cnfn convert_short16(char16);\n" |
32190 | "short16 __ovld __cnfn convert_short16_sat(char16);\n" |
32191 | "short16 __ovld __cnfn convert_short16_rte(uchar16);\n" |
32192 | "short16 __ovld __cnfn convert_short16_sat_rte(uchar16);\n" |
32193 | "short16 __ovld __cnfn convert_short16_rtz(uchar16);\n" |
32194 | "short16 __ovld __cnfn convert_short16_sat_rtz(uchar16);\n" |
32195 | "short16 __ovld __cnfn convert_short16_rtp(uchar16);\n" |
32196 | "short16 __ovld __cnfn convert_short16_sat_rtp(uchar16);\n" |
32197 | "short16 __ovld __cnfn convert_short16_rtn(uchar16);\n" |
32198 | "short16 __ovld __cnfn convert_short16_sat_rtn(uchar16);\n" |
32199 | "short16 __ovld __cnfn convert_short16(uchar16);\n" |
32200 | "short16 __ovld __cnfn convert_short16_sat(uchar16);\n" |
32201 | "short16 __ovld __cnfn convert_short16_rte(short16);\n" |
32202 | "short16 __ovld __cnfn convert_short16_sat_rte(short16);\n" |
32203 | "short16 __ovld __cnfn convert_short16_rtz(short16);\n" |
32204 | "short16 __ovld __cnfn convert_short16_sat_rtz(short16);\n" |
32205 | "short16 __ovld __cnfn convert_short16_rtp(short16);\n" |
32206 | "short16 __ovld __cnfn convert_short16_sat_rtp(short16);\n" |
32207 | "short16 __ovld __cnfn convert_short16_rtn(short16);\n" |
32208 | "short16 __ovld __cnfn convert_short16_sat_rtn(short16);\n" |
32209 | "short16 __ovld __cnfn convert_short16(short16);\n" |
32210 | "short16 __ovld __cnfn convert_short16_sat(short16);\n" |
32211 | "short16 __ovld __cnfn convert_short16_rte(ushort16);\n" |
32212 | "short16 __ovld __cnfn convert_short16_sat_rte(ushort16);\n" |
32213 | "short16 __ovld __cnfn convert_short16_rtz(ushort16);\n" |
32214 | "short16 __ovld __cnfn convert_short16_sat_rtz(ushort16);\n" |
32215 | "short16 __ovld __cnfn convert_short16_rtp(ushort16);\n" |
32216 | "short16 __ovld __cnfn convert_short16_sat_rtp(ushort16);\n" |
32217 | "short16 __ovld __cnfn convert_short16_rtn(ushort16);\n" |
32218 | "short16 __ovld __cnfn convert_short16_sat_rtn(ushort16);\n" |
32219 | "short16 __ovld __cnfn convert_short16(ushort16);\n" |
32220 | "short16 __ovld __cnfn convert_short16_sat(ushort16);\n" |
32221 | "short16 __ovld __cnfn convert_short16_rte(int16);\n" |
32222 | "short16 __ovld __cnfn convert_short16_sat_rte(int16);\n" |
32223 | "short16 __ovld __cnfn convert_short16_rtz(int16);\n" |
32224 | "short16 __ovld __cnfn convert_short16_sat_rtz(int16);\n" |
32225 | "short16 __ovld __cnfn convert_short16_rtp(int16);\n" |
32226 | "short16 __ovld __cnfn convert_short16_sat_rtp(int16);\n" |
32227 | "short16 __ovld __cnfn convert_short16_rtn(int16);\n" |
32228 | "short16 __ovld __cnfn convert_short16_sat_rtn(int16);\n" |
32229 | "short16 __ovld __cnfn convert_short16(int16);\n" |
32230 | "short16 __ovld __cnfn convert_short16_sat(int16);\n" |
32231 | "short16 __ovld __cnfn convert_short16_rte(uint16);\n" |
32232 | "short16 __ovld __cnfn convert_short16_sat_rte(uint16);\n" |
32233 | "short16 __ovld __cnfn convert_short16_rtz(uint16);\n" |
32234 | "short16 __ovld __cnfn convert_short16_sat_rtz(uint16);\n" |
32235 | "short16 __ovld __cnfn convert_short16_rtp(uint16);\n" |
32236 | "short16 __ovld __cnfn convert_short16_sat_rtp(uint16);\n" |
32237 | "short16 __ovld __cnfn convert_short16_rtn(uint16);\n" |
32238 | "short16 __ovld __cnfn convert_short16_sat_rtn(uint16);\n" |
32239 | "short16 __ovld __cnfn convert_short16(uint16);\n" |
32240 | "short16 __ovld __cnfn convert_short16_sat(uint16);\n" |
32241 | "short16 __ovld __cnfn convert_short16_rte(long16);\n" |
32242 | "short16 __ovld __cnfn convert_short16_sat_rte(long16);\n" |
32243 | "short16 __ovld __cnfn convert_short16_rtz(long16);\n" |
32244 | "short16 __ovld __cnfn convert_short16_sat_rtz(long16);\n" |
32245 | "short16 __ovld __cnfn convert_short16_rtp(long16);\n" |
32246 | "short16 __ovld __cnfn convert_short16_sat_rtp(long16);\n" |
32247 | "short16 __ovld __cnfn convert_short16_rtn(long16);\n" |
32248 | "short16 __ovld __cnfn convert_short16_sat_rtn(long16);\n" |
32249 | "short16 __ovld __cnfn convert_short16(long16);\n" |
32250 | "short16 __ovld __cnfn convert_short16_sat(long16);\n" |
32251 | "short16 __ovld __cnfn convert_short16_rte(ulong16);\n" |
32252 | "short16 __ovld __cnfn convert_short16_sat_rte(ulong16);\n" |
32253 | "short16 __ovld __cnfn convert_short16_rtz(ulong16);\n" |
32254 | "short16 __ovld __cnfn convert_short16_sat_rtz(ulong16);\n" |
32255 | "short16 __ovld __cnfn convert_short16_rtp(ulong16);\n" |
32256 | "short16 __ovld __cnfn convert_short16_sat_rtp(ulong16);\n" |
32257 | "short16 __ovld __cnfn convert_short16_rtn(ulong16);\n" |
32258 | "short16 __ovld __cnfn convert_short16_sat_rtn(ulong16);\n" |
32259 | "short16 __ovld __cnfn convert_short16(ulong16);\n" |
32260 | "short16 __ovld __cnfn convert_short16_sat(ulong16);\n" |
32261 | "short16 __ovld __cnfn convert_short16_rte(float16);\n" |
32262 | "short16 __ovld __cnfn convert_short16_sat_rte(float16);\n" |
32263 | "short16 __ovld __cnfn convert_short16_rtz(float16);\n" |
32264 | "short16 __ovld __cnfn convert_short16_sat_rtz(float16);\n" |
32265 | "short16 __ovld __cnfn convert_short16_rtp(float16);\n" |
32266 | "short16 __ovld __cnfn convert_short16_sat_rtp(float16);\n" |
32267 | "short16 __ovld __cnfn convert_short16_rtn(float16);\n" |
32268 | "short16 __ovld __cnfn convert_short16_sat_rtn(float16);\n" |
32269 | "short16 __ovld __cnfn convert_short16(float16);\n" |
32270 | "short16 __ovld __cnfn convert_short16_sat(float16);\n" |
32271 | "ushort16 __ovld __cnfn convert_ushort16_rte(char16);\n" |
32272 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(char16);\n" |
32273 | "ushort16 __ovld __cnfn convert_ushort16_rtz(char16);\n" |
32274 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(char16);\n" |
32275 | "ushort16 __ovld __cnfn convert_ushort16_rtp(char16);\n" |
32276 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(char16);\n" |
32277 | "ushort16 __ovld __cnfn convert_ushort16_rtn(char16);\n" |
32278 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(char16);\n" |
32279 | "ushort16 __ovld __cnfn convert_ushort16(char16);\n" |
32280 | "ushort16 __ovld __cnfn convert_ushort16_sat(char16);\n" |
32281 | "ushort16 __ovld __cnfn convert_ushort16_rte(uchar16);\n" |
32282 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(uchar16);\n" |
32283 | "ushort16 __ovld __cnfn convert_ushort16_rtz(uchar16);\n" |
32284 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uchar16);\n" |
32285 | "ushort16 __ovld __cnfn convert_ushort16_rtp(uchar16);\n" |
32286 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uchar16);\n" |
32287 | "ushort16 __ovld __cnfn convert_ushort16_rtn(uchar16);\n" |
32288 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uchar16);\n" |
32289 | "ushort16 __ovld __cnfn convert_ushort16(uchar16);\n" |
32290 | "ushort16 __ovld __cnfn convert_ushort16_sat(uchar16);\n" |
32291 | "ushort16 __ovld __cnfn convert_ushort16_rte(short16);\n" |
32292 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(short16);\n" |
32293 | "ushort16 __ovld __cnfn convert_ushort16_rtz(short16);\n" |
32294 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(short16);\n" |
32295 | "ushort16 __ovld __cnfn convert_ushort16_rtp(short16);\n" |
32296 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(short16);\n" |
32297 | "ushort16 __ovld __cnfn convert_ushort16_rtn(short16);\n" |
32298 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(short16);\n" |
32299 | "ushort16 __ovld __cnfn convert_ushort16(short16);\n" |
32300 | "ushort16 __ovld __cnfn convert_ushort16_sat(short16);\n" |
32301 | "ushort16 __ovld __cnfn convert_ushort16_rte(ushort16);\n" |
32302 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(ushort16);\n" |
32303 | "ushort16 __ovld __cnfn convert_ushort16_rtz(ushort16);\n" |
32304 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ushort16);\n" |
32305 | "ushort16 __ovld __cnfn convert_ushort16_rtp(ushort16);\n" |
32306 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ushort16);\n" |
32307 | "ushort16 __ovld __cnfn convert_ushort16_rtn(ushort16);\n" |
32308 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ushort16);\n" |
32309 | "ushort16 __ovld __cnfn convert_ushort16(ushort16);\n" |
32310 | "ushort16 __ovld __cnfn convert_ushort16_sat(ushort16);\n" |
32311 | "ushort16 __ovld __cnfn convert_ushort16_rte(int16);\n" |
32312 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(int16);\n" |
32313 | "ushort16 __ovld __cnfn convert_ushort16_rtz(int16);\n" |
32314 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(int16);\n" |
32315 | "ushort16 __ovld __cnfn convert_ushort16_rtp(int16);\n" |
32316 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(int16);\n" |
32317 | "ushort16 __ovld __cnfn convert_ushort16_rtn(int16);\n" |
32318 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(int16);\n" |
32319 | "ushort16 __ovld __cnfn convert_ushort16(int16);\n" |
32320 | "ushort16 __ovld __cnfn convert_ushort16_sat(int16);\n" |
32321 | "ushort16 __ovld __cnfn convert_ushort16_rte(uint16);\n" |
32322 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(uint16);\n" |
32323 | "ushort16 __ovld __cnfn convert_ushort16_rtz(uint16);\n" |
32324 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uint16);\n" |
32325 | "ushort16 __ovld __cnfn convert_ushort16_rtp(uint16);\n" |
32326 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uint16);\n" |
32327 | "ushort16 __ovld __cnfn convert_ushort16_rtn(uint16);\n" |
32328 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uint16);\n" |
32329 | "ushort16 __ovld __cnfn convert_ushort16(uint16);\n" |
32330 | "ushort16 __ovld __cnfn convert_ushort16_sat(uint16);\n" |
32331 | "ushort16 __ovld __cnfn convert_ushort16_rte(long16);\n" |
32332 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(long16);\n" |
32333 | "ushort16 __ovld __cnfn convert_ushort16_rtz(long16);\n" |
32334 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(long16);\n" |
32335 | "ushort16 __ovld __cnfn convert_ushort16_rtp(long16);\n" |
32336 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(long16);\n" |
32337 | "ushort16 __ovld __cnfn convert_ushort16_rtn(long16);\n" |
32338 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(long16);\n" |
32339 | "ushort16 __ovld __cnfn convert_ushort16(long16);\n" |
32340 | "ushort16 __ovld __cnfn convert_ushort16_sat(long16);\n" |
32341 | "ushort16 __ovld __cnfn convert_ushort16_rte(ulong16);\n" |
32342 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(ulong16);\n" |
32343 | "ushort16 __ovld __cnfn convert_ushort16_rtz(ulong16);\n" |
32344 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ulong16);\n" |
32345 | "ushort16 __ovld __cnfn convert_ushort16_rtp(ulong16);\n" |
32346 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ulong16);\n" |
32347 | "ushort16 __ovld __cnfn convert_ushort16_rtn(ulong16);\n" |
32348 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ulong16);\n" |
32349 | "ushort16 __ovld __cnfn convert_ushort16(ulong16);\n" |
32350 | "ushort16 __ovld __cnfn convert_ushort16_sat(ulong16);\n" |
32351 | "ushort16 __ovld __cnfn convert_ushort16_rte(float16);\n" |
32352 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(float16);\n" |
32353 | "ushort16 __ovld __cnfn convert_ushort16_rtz(float16);\n" |
32354 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(float16);\n" |
32355 | "ushort16 __ovld __cnfn convert_ushort16_rtp(float16);\n" |
32356 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(float16);\n" |
32357 | "ushort16 __ovld __cnfn convert_ushort16_rtn(float16);\n" |
32358 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(float16);\n" |
32359 | "ushort16 __ovld __cnfn convert_ushort16(float16);\n" |
32360 | "ushort16 __ovld __cnfn convert_ushort16_sat(float16);\n" |
32361 | "int16 __ovld __cnfn convert_int16_rte(char16);\n" |
32362 | "int16 __ovld __cnfn convert_int16_sat_rte(char16);\n" |
32363 | "int16 __ovld __cnfn convert_int16_rtz(char16);\n" |
32364 | "int16 __ovld __cnfn convert_int16_sat_rtz(char16);\n" |
32365 | "int16 __ovld __cnfn convert_int16_rtp(char16);\n" |
32366 | "int16 __ovld __cnfn convert_int16_sat_rtp(char16);\n" |
32367 | "int16 __ovld __cnfn convert_int16_rtn(char16);\n" |
32368 | "int16 __ovld __cnfn convert_int16_sat_rtn(char16);\n" |
32369 | "int16 __ovld __cnfn convert_int16(char16);\n" |
32370 | "int16 __ovld __cnfn convert_int16_sat(char16);\n" |
32371 | "int16 __ovld __cnfn convert_int16_rte(uchar16);\n" |
32372 | "int16 __ovld __cnfn convert_int16_sat_rte(uchar16);\n" |
32373 | "int16 __ovld __cnfn convert_int16_rtz(uchar16);\n" |
32374 | "int16 __ovld __cnfn convert_int16_sat_rtz(uchar16);\n" |
32375 | "int16 __ovld __cnfn convert_int16_rtp(uchar16);\n" |
32376 | "int16 __ovld __cnfn convert_int16_sat_rtp(uchar16);\n" |
32377 | "int16 __ovld __cnfn convert_int16_rtn(uchar16);\n" |
32378 | "int16 __ovld __cnfn convert_int16_sat_rtn(uchar16);\n" |
32379 | "int16 __ovld __cnfn convert_int16(uchar16);\n" |
32380 | "int16 __ovld __cnfn convert_int16_sat(uchar16);\n" |
32381 | "int16 __ovld __cnfn convert_int16_rte(short16);\n" |
32382 | "int16 __ovld __cnfn convert_int16_sat_rte(short16);\n" |
32383 | "int16 __ovld __cnfn convert_int16_rtz(short16);\n" |
32384 | "int16 __ovld __cnfn convert_int16_sat_rtz(short16);\n" |
32385 | "int16 __ovld __cnfn convert_int16_rtp(short16);\n" |
32386 | "int16 __ovld __cnfn convert_int16_sat_rtp(short16);\n" |
32387 | "int16 __ovld __cnfn convert_int16_rtn(short16);\n" |
32388 | "int16 __ovld __cnfn convert_int16_sat_rtn(short16);\n" |
32389 | "int16 __ovld __cnfn convert_int16(short16);\n" |
32390 | "int16 __ovld __cnfn convert_int16_sat(short16);\n" |
32391 | "int16 __ovld __cnfn convert_int16_rte(ushort16);\n" |
32392 | "int16 __ovld __cnfn convert_int16_sat_rte(ushort16);\n" |
32393 | "int16 __ovld __cnfn convert_int16_rtz(ushort16);\n" |
32394 | "int16 __ovld __cnfn convert_int16_sat_rtz(ushort16);\n" |
32395 | "int16 __ovld __cnfn convert_int16_rtp(ushort16);\n" |
32396 | "int16 __ovld __cnfn convert_int16_sat_rtp(ushort16);\n" |
32397 | "int16 __ovld __cnfn convert_int16_rtn(ushort16);\n" |
32398 | "int16 __ovld __cnfn convert_int16_sat_rtn(ushort16);\n" |
32399 | "int16 __ovld __cnfn convert_int16(ushort16);\n" |
32400 | "int16 __ovld __cnfn convert_int16_sat(ushort16);\n" |
32401 | "int16 __ovld __cnfn convert_int16_rte(int16);\n" |
32402 | "int16 __ovld __cnfn convert_int16_sat_rte(int16);\n" |
32403 | "int16 __ovld __cnfn convert_int16_rtz(int16);\n" |
32404 | "int16 __ovld __cnfn convert_int16_sat_rtz(int16);\n" |
32405 | "int16 __ovld __cnfn convert_int16_rtp(int16);\n" |
32406 | "int16 __ovld __cnfn convert_int16_sat_rtp(int16);\n" |
32407 | "int16 __ovld __cnfn convert_int16_rtn(int16);\n" |
32408 | "int16 __ovld __cnfn convert_int16_sat_rtn(int16);\n" |
32409 | "int16 __ovld __cnfn convert_int16(int16);\n" |
32410 | "int16 __ovld __cnfn convert_int16_sat(int16);\n" |
32411 | "int16 __ovld __cnfn convert_int16_rte(uint16);\n" |
32412 | "int16 __ovld __cnfn convert_int16_sat_rte(uint16);\n" |
32413 | "int16 __ovld __cnfn convert_int16_rtz(uint16);\n" |
32414 | "int16 __ovld __cnfn convert_int16_sat_rtz(uint16);\n" |
32415 | "int16 __ovld __cnfn convert_int16_rtp(uint16);\n" |
32416 | "int16 __ovld __cnfn convert_int16_sat_rtp(uint16);\n" |
32417 | "int16 __ovld __cnfn convert_int16_rtn(uint16);\n" |
32418 | "int16 __ovld __cnfn convert_int16_sat_rtn(uint16);\n" |
32419 | "int16 __ovld __cnfn convert_int16(uint16);\n" |
32420 | "int16 __ovld __cnfn convert_int16_sat(uint16);\n" |
32421 | "int16 __ovld __cnfn convert_int16_rte(long16);\n" |
32422 | "int16 __ovld __cnfn convert_int16_sat_rte(long16);\n" |
32423 | "int16 __ovld __cnfn convert_int16_rtz(long16);\n" |
32424 | "int16 __ovld __cnfn convert_int16_sat_rtz(long16);\n" |
32425 | "int16 __ovld __cnfn convert_int16_rtp(long16);\n" |
32426 | "int16 __ovld __cnfn convert_int16_sat_rtp(long16);\n" |
32427 | "int16 __ovld __cnfn convert_int16_rtn(long16);\n" |
32428 | "int16 __ovld __cnfn convert_int16_sat_rtn(long16);\n" |
32429 | "int16 __ovld __cnfn convert_int16(long16);\n" |
32430 | "int16 __ovld __cnfn convert_int16_sat(long16);\n" |
32431 | "int16 __ovld __cnfn convert_int16_rte(ulong16);\n" |
32432 | "int16 __ovld __cnfn convert_int16_sat_rte(ulong16);\n" |
32433 | "int16 __ovld __cnfn convert_int16_rtz(ulong16);\n" |
32434 | "int16 __ovld __cnfn convert_int16_sat_rtz(ulong16);\n" |
32435 | "int16 __ovld __cnfn convert_int16_rtp(ulong16);\n" |
32436 | "int16 __ovld __cnfn convert_int16_sat_rtp(ulong16);\n" |
32437 | "int16 __ovld __cnfn convert_int16_rtn(ulong16);\n" |
32438 | "int16 __ovld __cnfn convert_int16_sat_rtn(ulong16);\n" |
32439 | "int16 __ovld __cnfn convert_int16(ulong16);\n" |
32440 | "int16 __ovld __cnfn convert_int16_sat(ulong16);\n" |
32441 | "int16 __ovld __cnfn convert_int16_rte(float16);\n" |
32442 | "int16 __ovld __cnfn convert_int16_sat_rte(float16);\n" |
32443 | "int16 __ovld __cnfn convert_int16_rtz(float16);\n" |
32444 | "int16 __ovld __cnfn convert_int16_sat_rtz(float16);\n" |
32445 | "int16 __ovld __cnfn convert_int16_rtp(float16);\n" |
32446 | "int16 __ovld __cnfn convert_int16_sat_rtp(float16);\n" |
32447 | "int16 __ovld __cnfn convert_int16_rtn(float16);\n" |
32448 | "int16 __ovld __cnfn convert_int16_sat_rtn(float16);\n" |
32449 | "int16 __ovld __cnfn convert_int16(float16);\n" |
32450 | "int16 __ovld __cnfn convert_int16_sat(float16);\n" |
32451 | "uint16 __ovld __cnfn convert_uint16_rte(char16);\n" |
32452 | "uint16 __ovld __cnfn convert_uint16_sat_rte(char16);\n" |
32453 | "uint16 __ovld __cnfn convert_uint16_rtz(char16);\n" |
32454 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(char16);\n" |
32455 | "uint16 __ovld __cnfn convert_uint16_rtp(char16);\n" |
32456 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(char16);\n" |
32457 | "uint16 __ovld __cnfn convert_uint16_rtn(char16);\n" |
32458 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(char16);\n" |
32459 | "uint16 __ovld __cnfn convert_uint16(char16);\n" |
32460 | "uint16 __ovld __cnfn convert_uint16_sat(char16);\n" |
32461 | "uint16 __ovld __cnfn convert_uint16_rte(uchar16);\n" |
32462 | "uint16 __ovld __cnfn convert_uint16_sat_rte(uchar16);\n" |
32463 | "uint16 __ovld __cnfn convert_uint16_rtz(uchar16);\n" |
32464 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(uchar16);\n" |
32465 | "uint16 __ovld __cnfn convert_uint16_rtp(uchar16);\n" |
32466 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(uchar16);\n" |
32467 | "uint16 __ovld __cnfn convert_uint16_rtn(uchar16);\n" |
32468 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(uchar16);\n" |
32469 | "uint16 __ovld __cnfn convert_uint16(uchar16);\n" |
32470 | "uint16 __ovld __cnfn convert_uint16_sat(uchar16);\n" |
32471 | "uint16 __ovld __cnfn convert_uint16_rte(short16);\n" |
32472 | "uint16 __ovld __cnfn convert_uint16_sat_rte(short16);\n" |
32473 | "uint16 __ovld __cnfn convert_uint16_rtz(short16);\n" |
32474 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(short16);\n" |
32475 | "uint16 __ovld __cnfn convert_uint16_rtp(short16);\n" |
32476 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(short16);\n" |
32477 | "uint16 __ovld __cnfn convert_uint16_rtn(short16);\n" |
32478 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(short16);\n" |
32479 | "uint16 __ovld __cnfn convert_uint16(short16);\n" |
32480 | "uint16 __ovld __cnfn convert_uint16_sat(short16);\n" |
32481 | "uint16 __ovld __cnfn convert_uint16_rte(ushort16);\n" |
32482 | "uint16 __ovld __cnfn convert_uint16_sat_rte(ushort16);\n" |
32483 | "uint16 __ovld __cnfn convert_uint16_rtz(ushort16);\n" |
32484 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(ushort16);\n" |
32485 | "uint16 __ovld __cnfn convert_uint16_rtp(ushort16);\n" |
32486 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(ushort16);\n" |
32487 | "uint16 __ovld __cnfn convert_uint16_rtn(ushort16);\n" |
32488 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(ushort16);\n" |
32489 | "uint16 __ovld __cnfn convert_uint16(ushort16);\n" |
32490 | "uint16 __ovld __cnfn convert_uint16_sat(ushort16);\n" |
32491 | "uint16 __ovld __cnfn convert_uint16_rte(int16);\n" |
32492 | "uint16 __ovld __cnfn convert_uint16_sat_rte(int16);\n" |
32493 | "uint16 __ovld __cnfn convert_uint16_rtz(int16);\n" |
32494 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(int16);\n" |
32495 | "uint16 __ovld __cnfn convert_uint16_rtp(int16);\n" |
32496 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(int16);\n" |
32497 | "uint16 __ovld __cnfn convert_uint16_rtn(int16);\n" |
32498 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(int16);\n" |
32499 | "uint16 __ovld __cnfn convert_uint16(int16);\n" |
32500 | "uint16 __ovld __cnfn convert_uint16_sat(int16);\n" |
32501 | "uint16 __ovld __cnfn convert_uint16_rte(uint16);\n" |
32502 | "uint16 __ovld __cnfn convert_uint16_sat_rte(uint16);\n" |
32503 | "uint16 __ovld __cnfn convert_uint16_rtz(uint16);\n" |
32504 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(uint16);\n" |
32505 | "uint16 __ovld __cnfn convert_uint16_rtp(uint16);\n" |
32506 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(uint16);\n" |
32507 | "uint16 __ovld __cnfn convert_uint16_rtn(uint16);\n" |
32508 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(uint16);\n" |
32509 | "uint16 __ovld __cnfn convert_uint16(uint16);\n" |
32510 | "uint16 __ovld __cnfn convert_uint16_sat(uint16);\n" |
32511 | "uint16 __ovld __cnfn convert_uint16_rte(long16);\n" |
32512 | "uint16 __ovld __cnfn convert_uint16_sat_rte(long16);\n" |
32513 | "uint16 __ovld __cnfn convert_uint16_rtz(long16);\n" |
32514 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(long16);\n" |
32515 | "uint16 __ovld __cnfn convert_uint16_rtp(long16);\n" |
32516 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(long16);\n" |
32517 | "uint16 __ovld __cnfn convert_uint16_rtn(long16);\n" |
32518 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(long16);\n" |
32519 | "uint16 __ovld __cnfn convert_uint16(long16);\n" |
32520 | "uint16 __ovld __cnfn convert_uint16_sat(long16);\n" |
32521 | "uint16 __ovld __cnfn convert_uint16_rte(ulong16);\n" |
32522 | "uint16 __ovld __cnfn convert_uint16_sat_rte(ulong16);\n" |
32523 | "uint16 __ovld __cnfn convert_uint16_rtz(ulong16);\n" |
32524 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(ulong16);\n" |
32525 | "uint16 __ovld __cnfn convert_uint16_rtp(ulong16);\n" |
32526 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(ulong16);\n" |
32527 | "uint16 __ovld __cnfn convert_uint16_rtn(ulong16);\n" |
32528 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(ulong16);\n" |
32529 | "uint16 __ovld __cnfn convert_uint16(ulong16);\n" |
32530 | "uint16 __ovld __cnfn convert_uint16_sat(ulong16);\n" |
32531 | "uint16 __ovld __cnfn convert_uint16_rte(float16);\n" |
32532 | "uint16 __ovld __cnfn convert_uint16_sat_rte(float16);\n" |
32533 | "uint16 __ovld __cnfn convert_uint16_rtz(float16);\n" |
32534 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(float16);\n" |
32535 | "uint16 __ovld __cnfn convert_uint16_rtp(float16);\n" |
32536 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(float16);\n" |
32537 | "uint16 __ovld __cnfn convert_uint16_rtn(float16);\n" |
32538 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(float16);\n" |
32539 | "uint16 __ovld __cnfn convert_uint16(float16);\n" |
32540 | "uint16 __ovld __cnfn convert_uint16_sat(float16);\n" |
32541 | "long16 __ovld __cnfn convert_long16_rte(char16);\n" |
32542 | "long16 __ovld __cnfn convert_long16_sat_rte(char16);\n" |
32543 | "long16 __ovld __cnfn convert_long16_rtz(char16);\n" |
32544 | "long16 __ovld __cnfn convert_long16_sat_rtz(char16);\n" |
32545 | "long16 __ovld __cnfn convert_long16_rtp(char16);\n" |
32546 | "long16 __ovld __cnfn convert_long16_sat_rtp(char16);\n" |
32547 | "long16 __ovld __cnfn convert_long16_rtn(char16);\n" |
32548 | "long16 __ovld __cnfn convert_long16_sat_rtn(char16);\n" |
32549 | "long16 __ovld __cnfn convert_long16(char16);\n" |
32550 | "long16 __ovld __cnfn convert_long16_sat(char16);\n" |
32551 | "long16 __ovld __cnfn convert_long16_rte(uchar16);\n" |
32552 | "long16 __ovld __cnfn convert_long16_sat_rte(uchar16);\n" |
32553 | "long16 __ovld __cnfn convert_long16_rtz(uchar16);\n" |
32554 | "long16 __ovld __cnfn convert_long16_sat_rtz(uchar16);\n" |
32555 | "long16 __ovld __cnfn convert_long16_rtp(uchar16);\n" |
32556 | "long16 __ovld __cnfn convert_long16_sat_rtp(uchar16);\n" |
32557 | "long16 __ovld __cnfn convert_long16_rtn(uchar16);\n" |
32558 | "long16 __ovld __cnfn convert_long16_sat_rtn(uchar16);\n" |
32559 | "long16 __ovld __cnfn convert_long16(uchar16);\n" |
32560 | "long16 __ovld __cnfn convert_long16_sat(uchar16);\n" |
32561 | "long16 __ovld __cnfn convert_long16_rte(short16);\n" |
32562 | "long16 __ovld __cnfn convert_long16_sat_rte(short16);\n" |
32563 | "long16 __ovld __cnfn convert_long16_rtz(short16);\n" |
32564 | "long16 __ovld __cnfn convert_long16_sat_rtz(short16);\n" |
32565 | "long16 __ovld __cnfn convert_long16_rtp(short16);\n" |
32566 | "long16 __ovld __cnfn convert_long16_sat_rtp(short16);\n" |
32567 | "long16 __ovld __cnfn convert_long16_rtn(short16);\n" |
32568 | "long16 __ovld __cnfn convert_long16_sat_rtn(short16);\n" |
32569 | "long16 __ovld __cnfn convert_long16(short16);\n" |
32570 | "long16 __ovld __cnfn convert_long16_sat(short16);\n" |
32571 | "long16 __ovld __cnfn convert_long16_rte(ushort16);\n" |
32572 | "long16 __ovld __cnfn convert_long16_sat_rte(ushort16);\n" |
32573 | "long16 __ovld __cnfn convert_long16_rtz(ushort16);\n" |
32574 | "long16 __ovld __cnfn convert_long16_sat_rtz(ushort16);\n" |
32575 | "long16 __ovld __cnfn convert_long16_rtp(ushort16);\n" |
32576 | "long16 __ovld __cnfn convert_long16_sat_rtp(ushort16);\n" |
32577 | "long16 __ovld __cnfn convert_long16_rtn(ushort16);\n" |
32578 | "long16 __ovld __cnfn convert_long16_sat_rtn(ushort16);\n" |
32579 | "long16 __ovld __cnfn convert_long16(ushort16);\n" |
32580 | "long16 __ovld __cnfn convert_long16_sat(ushort16);\n" |
32581 | "long16 __ovld __cnfn convert_long16_rte(int16);\n" |
32582 | "long16 __ovld __cnfn convert_long16_sat_rte(int16);\n" |
32583 | "long16 __ovld __cnfn convert_long16_rtz(int16);\n" |
32584 | "long16 __ovld __cnfn convert_long16_sat_rtz(int16);\n" |
32585 | "long16 __ovld __cnfn convert_long16_rtp(int16);\n" |
32586 | "long16 __ovld __cnfn convert_long16_sat_rtp(int16);\n" |
32587 | "long16 __ovld __cnfn convert_long16_rtn(int16);\n" |
32588 | "long16 __ovld __cnfn convert_long16_sat_rtn(int16);\n" |
32589 | "long16 __ovld __cnfn convert_long16(int16);\n" |
32590 | "long16 __ovld __cnfn convert_long16_sat(int16);\n" |
32591 | "long16 __ovld __cnfn convert_long16_rte(uint16);\n" |
32592 | "long16 __ovld __cnfn convert_long16_sat_rte(uint16);\n" |
32593 | "long16 __ovld __cnfn convert_long16_rtz(uint16);\n" |
32594 | "long16 __ovld __cnfn convert_long16_sat_rtz(uint16);\n" |
32595 | "long16 __ovld __cnfn convert_long16_rtp(uint16);\n" |
32596 | "long16 __ovld __cnfn convert_long16_sat_rtp(uint16);\n" |
32597 | "long16 __ovld __cnfn convert_long16_rtn(uint16);\n" |
32598 | "long16 __ovld __cnfn convert_long16_sat_rtn(uint16);\n" |
32599 | "long16 __ovld __cnfn convert_long16(uint16);\n" |
32600 | "long16 __ovld __cnfn convert_long16_sat(uint16);\n" |
32601 | "long16 __ovld __cnfn convert_long16_rte(long16);\n" |
32602 | "long16 __ovld __cnfn convert_long16_sat_rte(long16);\n" |
32603 | "long16 __ovld __cnfn convert_long16_rtz(long16);\n" |
32604 | "long16 __ovld __cnfn convert_long16_sat_rtz(long16);\n" |
32605 | "long16 __ovld __cnfn convert_long16_rtp(long16);\n" |
32606 | "long16 __ovld __cnfn convert_long16_sat_rtp(long16);\n" |
32607 | "long16 __ovld __cnfn convert_long16_rtn(long16);\n" |
32608 | "long16 __ovld __cnfn convert_long16_sat_rtn(long16);\n" |
32609 | "long16 __ovld __cnfn convert_long16(long16);\n" |
32610 | "long16 __ovld __cnfn convert_long16_sat(long16);\n" |
32611 | "long16 __ovld __cnfn convert_long16_rte(ulong16);\n" |
32612 | "long16 __ovld __cnfn convert_long16_sat_rte(ulong16);\n" |
32613 | "long16 __ovld __cnfn convert_long16_rtz(ulong16);\n" |
32614 | "long16 __ovld __cnfn convert_long16_sat_rtz(ulong16);\n" |
32615 | "long16 __ovld __cnfn convert_long16_rtp(ulong16);\n" |
32616 | "long16 __ovld __cnfn convert_long16_sat_rtp(ulong16);\n" |
32617 | "long16 __ovld __cnfn convert_long16_rtn(ulong16);\n" |
32618 | "long16 __ovld __cnfn convert_long16_sat_rtn(ulong16);\n" |
32619 | "long16 __ovld __cnfn convert_long16(ulong16);\n" |
32620 | "long16 __ovld __cnfn convert_long16_sat(ulong16);\n" |
32621 | "long16 __ovld __cnfn convert_long16_rte(float16);\n" |
32622 | "long16 __ovld __cnfn convert_long16_sat_rte(float16);\n" |
32623 | "long16 __ovld __cnfn convert_long16_rtz(float16);\n" |
32624 | "long16 __ovld __cnfn convert_long16_sat_rtz(float16);\n" |
32625 | "long16 __ovld __cnfn convert_long16_rtp(float16);\n" |
32626 | "long16 __ovld __cnfn convert_long16_sat_rtp(float16);\n" |
32627 | "long16 __ovld __cnfn convert_long16_rtn(float16);\n" |
32628 | "long16 __ovld __cnfn convert_long16_sat_rtn(float16);\n" |
32629 | "long16 __ovld __cnfn convert_long16(float16);\n" |
32630 | "long16 __ovld __cnfn convert_long16_sat(float16);\n" |
32631 | "ulong16 __ovld __cnfn convert_ulong16_rte(char16);\n" |
32632 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(char16);\n" |
32633 | "ulong16 __ovld __cnfn convert_ulong16_rtz(char16);\n" |
32634 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(char16);\n" |
32635 | "ulong16 __ovld __cnfn convert_ulong16_rtp(char16);\n" |
32636 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(char16);\n" |
32637 | "ulong16 __ovld __cnfn convert_ulong16_rtn(char16);\n" |
32638 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(char16);\n" |
32639 | "ulong16 __ovld __cnfn convert_ulong16(char16);\n" |
32640 | "ulong16 __ovld __cnfn convert_ulong16_sat(char16);\n" |
32641 | "ulong16 __ovld __cnfn convert_ulong16_rte(uchar16);\n" |
32642 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(uchar16);\n" |
32643 | "ulong16 __ovld __cnfn convert_ulong16_rtz(uchar16);\n" |
32644 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uchar16);\n" |
32645 | "ulong16 __ovld __cnfn convert_ulong16_rtp(uchar16);\n" |
32646 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uchar16);\n" |
32647 | "ulong16 __ovld __cnfn convert_ulong16_rtn(uchar16);\n" |
32648 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uchar16);\n" |
32649 | "ulong16 __ovld __cnfn convert_ulong16(uchar16);\n" |
32650 | "ulong16 __ovld __cnfn convert_ulong16_sat(uchar16);\n" |
32651 | "ulong16 __ovld __cnfn convert_ulong16_rte(short16);\n" |
32652 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(short16);\n" |
32653 | "ulong16 __ovld __cnfn convert_ulong16_rtz(short16);\n" |
32654 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(short16);\n" |
32655 | "ulong16 __ovld __cnfn convert_ulong16_rtp(short16);\n" |
32656 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(short16);\n" |
32657 | "ulong16 __ovld __cnfn convert_ulong16_rtn(short16);\n" |
32658 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(short16);\n" |
32659 | "ulong16 __ovld __cnfn convert_ulong16(short16);\n" |
32660 | "ulong16 __ovld __cnfn convert_ulong16_sat(short16);\n" |
32661 | "ulong16 __ovld __cnfn convert_ulong16_rte(ushort16);\n" |
32662 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(ushort16);\n" |
32663 | "ulong16 __ovld __cnfn convert_ulong16_rtz(ushort16);\n" |
32664 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ushort16);\n" |
32665 | "ulong16 __ovld __cnfn convert_ulong16_rtp(ushort16);\n" |
32666 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ushort16);\n" |
32667 | "ulong16 __ovld __cnfn convert_ulong16_rtn(ushort16);\n" |
32668 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ushort16);\n" |
32669 | "ulong16 __ovld __cnfn convert_ulong16(ushort16);\n" |
32670 | "ulong16 __ovld __cnfn convert_ulong16_sat(ushort16);\n" |
32671 | "ulong16 __ovld __cnfn convert_ulong16_rte(int16);\n" |
32672 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(int16);\n" |
32673 | "ulong16 __ovld __cnfn convert_ulong16_rtz(int16);\n" |
32674 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(int16);\n" |
32675 | "ulong16 __ovld __cnfn convert_ulong16_rtp(int16);\n" |
32676 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(int16);\n" |
32677 | "ulong16 __ovld __cnfn convert_ulong16_rtn(int16);\n" |
32678 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(int16);\n" |
32679 | "ulong16 __ovld __cnfn convert_ulong16(int16);\n" |
32680 | "ulong16 __ovld __cnfn convert_ulong16_sat(int16);\n" |
32681 | "ulong16 __ovld __cnfn convert_ulong16_rte(uint16);\n" |
32682 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(uint16);\n" |
32683 | "ulong16 __ovld __cnfn convert_ulong16_rtz(uint16);\n" |
32684 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uint16);\n" |
32685 | "ulong16 __ovld __cnfn convert_ulong16_rtp(uint16);\n" |
32686 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uint16);\n" |
32687 | "ulong16 __ovld __cnfn convert_ulong16_rtn(uint16);\n" |
32688 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uint16);\n" |
32689 | "ulong16 __ovld __cnfn convert_ulong16(uint16);\n" |
32690 | "ulong16 __ovld __cnfn convert_ulong16_sat(uint16);\n" |
32691 | "ulong16 __ovld __cnfn convert_ulong16_rte(long16);\n" |
32692 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(long16);\n" |
32693 | "ulong16 __ovld __cnfn convert_ulong16_rtz(long16);\n" |
32694 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(long16);\n" |
32695 | "ulong16 __ovld __cnfn convert_ulong16_rtp(long16);\n" |
32696 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(long16);\n" |
32697 | "ulong16 __ovld __cnfn convert_ulong16_rtn(long16);\n" |
32698 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(long16);\n" |
32699 | "ulong16 __ovld __cnfn convert_ulong16(long16);\n" |
32700 | "ulong16 __ovld __cnfn convert_ulong16_sat(long16);\n" |
32701 | "ulong16 __ovld __cnfn convert_ulong16_rte(ulong16);\n" |
32702 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(ulong16);\n" |
32703 | "ulong16 __ovld __cnfn convert_ulong16_rtz(ulong16);\n" |
32704 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ulong16);\n" |
32705 | "ulong16 __ovld __cnfn convert_ulong16_rtp(ulong16);\n" |
32706 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ulong16);\n" |
32707 | "ulong16 __ovld __cnfn convert_ulong16_rtn(ulong16);\n" |
32708 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ulong16);\n" |
32709 | "ulong16 __ovld __cnfn convert_ulong16(ulong16);\n" |
32710 | "ulong16 __ovld __cnfn convert_ulong16_sat(ulong16);\n" |
32711 | "ulong16 __ovld __cnfn convert_ulong16_rte(float16);\n" |
32712 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(float16);\n" |
32713 | "ulong16 __ovld __cnfn convert_ulong16_rtz(float16);\n" |
32714 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(float16);\n" |
32715 | "ulong16 __ovld __cnfn convert_ulong16_rtp(float16);\n" |
32716 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(float16);\n" |
32717 | "ulong16 __ovld __cnfn convert_ulong16_rtn(float16);\n" |
32718 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(float16);\n" |
32719 | "ulong16 __ovld __cnfn convert_ulong16(float16);\n" |
32720 | "ulong16 __ovld __cnfn convert_ulong16_sat(float16);\n" |
32721 | "float16 __ovld __cnfn convert_float16_rte(char16);\n" |
32722 | "float16 __ovld __cnfn convert_float16_rtz(char16);\n" |
32723 | "float16 __ovld __cnfn convert_float16_rtp(char16);\n" |
32724 | "float16 __ovld __cnfn convert_float16_rtn(char16);\n" |
32725 | "float16 __ovld __cnfn convert_float16(char16);\n" |
32726 | "float16 __ovld __cnfn convert_float16_rte(uchar16);\n" |
32727 | "float16 __ovld __cnfn convert_float16_rtz(uchar16);\n" |
32728 | "float16 __ovld __cnfn convert_float16_rtp(uchar16);\n" |
32729 | "float16 __ovld __cnfn convert_float16_rtn(uchar16);\n" |
32730 | "float16 __ovld __cnfn convert_float16(uchar16);\n" |
32731 | "float16 __ovld __cnfn convert_float16_rte(short16);\n" |
32732 | "float16 __ovld __cnfn convert_float16_rtz(short16);\n" |
32733 | "float16 __ovld __cnfn convert_float16_rtp(short16);\n" |
32734 | "float16 __ovld __cnfn convert_float16_rtn(short16);\n" |
32735 | "float16 __ovld __cnfn convert_float16(short16);\n" |
32736 | "float16 __ovld __cnfn convert_float16_rte(ushort16);\n" |
32737 | "float16 __ovld __cnfn convert_float16_rtz(ushort16);\n" |
32738 | "float16 __ovld __cnfn convert_float16_rtp(ushort16);\n" |
32739 | "float16 __ovld __cnfn convert_float16_rtn(ushort16);\n" |
32740 | "float16 __ovld __cnfn convert_float16(ushort16);\n" |
32741 | "float16 __ovld __cnfn convert_float16_rte(int16);\n" |
32742 | "float16 __ovld __cnfn convert_float16_rtz(int16);\n" |
32743 | "float16 __ovld __cnfn convert_float16_rtp(int16);\n" |
32744 | "float16 __ovld __cnfn convert_float16_rtn(int16);\n" |
32745 | "float16 __ovld __cnfn convert_float16(int16);\n" |
32746 | "float16 __ovld __cnfn convert_float16_rte(uint16);\n" |
32747 | "float16 __ovld __cnfn convert_float16_rtz(uint16);\n" |
32748 | "float16 __ovld __cnfn convert_float16_rtp(uint16);\n" |
32749 | "float16 __ovld __cnfn convert_float16_rtn(uint16);\n" |
32750 | "float16 __ovld __cnfn convert_float16(uint16);\n" |
32751 | "float16 __ovld __cnfn convert_float16_rte(long16);\n" |
32752 | "float16 __ovld __cnfn convert_float16_rtz(long16);\n" |
32753 | "float16 __ovld __cnfn convert_float16_rtp(long16);\n" |
32754 | "float16 __ovld __cnfn convert_float16_rtn(long16);\n" |
32755 | "float16 __ovld __cnfn convert_float16(long16);\n" |
32756 | "float16 __ovld __cnfn convert_float16_rte(ulong16);\n" |
32757 | "float16 __ovld __cnfn convert_float16_rtz(ulong16);\n" |
32758 | "float16 __ovld __cnfn convert_float16_rtp(ulong16);\n" |
32759 | "float16 __ovld __cnfn convert_float16_rtn(ulong16);\n" |
32760 | "float16 __ovld __cnfn convert_float16(ulong16);\n" |
32761 | "float16 __ovld __cnfn convert_float16_rte(float16);\n" |
32762 | "float16 __ovld __cnfn convert_float16_rtz(float16);\n" |
32763 | "float16 __ovld __cnfn convert_float16_rtp(float16);\n" |
32764 | "float16 __ovld __cnfn convert_float16_rtn(float16);\n" |
32765 | "float16 __ovld __cnfn convert_float16(float16);\n" |
32766 | "\n" |
32767 | "// Conversions with double data type parameters or return value.\n" |
32768 | "\n" |
32769 | "#ifdef cl_khr_fp64\n" |
32770 | "char __ovld __cnfn convert_char(double);\n" |
32771 | "char __ovld __cnfn convert_char_rte(double);\n" |
32772 | "char __ovld __cnfn convert_char_rtn(double);\n" |
32773 | "char __ovld __cnfn convert_char_rtp(double);\n" |
32774 | "char __ovld __cnfn convert_char_rtz(double);\n" |
32775 | "char __ovld __cnfn convert_char_sat(double);\n" |
32776 | "char __ovld __cnfn convert_char_sat_rte(double);\n" |
32777 | "char __ovld __cnfn convert_char_sat_rtn(double);\n" |
32778 | "char __ovld __cnfn convert_char_sat_rtp(double);\n" |
32779 | "char __ovld __cnfn convert_char_sat_rtz(double);\n" |
32780 | "char2 __ovld __cnfn convert_char2(double2);\n" |
32781 | "char2 __ovld __cnfn convert_char2_rte(double2);\n" |
32782 | "char2 __ovld __cnfn convert_char2_rtn(double2);\n" |
32783 | "char2 __ovld __cnfn convert_char2_rtp(double2);\n" |
32784 | "char2 __ovld __cnfn convert_char2_rtz(double2);\n" |
32785 | "char2 __ovld __cnfn convert_char2_sat(double2);\n" |
32786 | "char2 __ovld __cnfn convert_char2_sat_rte(double2);\n" |
32787 | "char2 __ovld __cnfn convert_char2_sat_rtn(double2);\n" |
32788 | "char2 __ovld __cnfn convert_char2_sat_rtp(double2);\n" |
32789 | "char2 __ovld __cnfn convert_char2_sat_rtz(double2);\n" |
32790 | "char3 __ovld __cnfn convert_char3(double3);\n" |
32791 | "char3 __ovld __cnfn convert_char3_rte(double3);\n" |
32792 | "char3 __ovld __cnfn convert_char3_rtn(double3);\n" |
32793 | "char3 __ovld __cnfn convert_char3_rtp(double3);\n" |
32794 | "char3 __ovld __cnfn convert_char3_rtz(double3);\n" |
32795 | "char3 __ovld __cnfn convert_char3_sat(double3);\n" |
32796 | "char3 __ovld __cnfn convert_char3_sat_rte(double3);\n" |
32797 | "char3 __ovld __cnfn convert_char3_sat_rtn(double3);\n" |
32798 | "char3 __ovld __cnfn convert_char3_sat_rtp(double3);\n" |
32799 | "char3 __ovld __cnfn convert_char3_sat_rtz(double3);\n" |
32800 | "char4 __ovld __cnfn convert_char4(double4);\n" |
32801 | "char4 __ovld __cnfn convert_char4_rte(double4);\n" |
32802 | "char4 __ovld __cnfn convert_char4_rtn(double4);\n" |
32803 | "char4 __ovld __cnfn convert_char4_rtp(double4);\n" |
32804 | "char4 __ovld __cnfn convert_char4_rtz(double4);\n" |
32805 | "char4 __ovld __cnfn convert_char4_sat(double4);\n" |
32806 | "char4 __ovld __cnfn convert_char4_sat_rte(double4);\n" |
32807 | "char4 __ovld __cnfn convert_char4_sat_rtn(double4);\n" |
32808 | "char4 __ovld __cnfn convert_char4_sat_rtp(double4);\n" |
32809 | "char4 __ovld __cnfn convert_char4_sat_rtz(double4);\n" |
32810 | "char8 __ovld __cnfn convert_char8(double8);\n" |
32811 | "char8 __ovld __cnfn convert_char8_rte(double8);\n" |
32812 | "char8 __ovld __cnfn convert_char8_rtn(double8);\n" |
32813 | "char8 __ovld __cnfn convert_char8_rtp(double8);\n" |
32814 | "char8 __ovld __cnfn convert_char8_rtz(double8);\n" |
32815 | "char8 __ovld __cnfn convert_char8_sat(double8);\n" |
32816 | "char8 __ovld __cnfn convert_char8_sat_rte(double8);\n" |
32817 | "char8 __ovld __cnfn convert_char8_sat_rtn(double8);\n" |
32818 | "char8 __ovld __cnfn convert_char8_sat_rtp(double8);\n" |
32819 | "char8 __ovld __cnfn convert_char8_sat_rtz(double8);\n" |
32820 | "char16 __ovld __cnfn convert_char16(double16);\n" |
32821 | "char16 __ovld __cnfn convert_char16_rte(double16);\n" |
32822 | "char16 __ovld __cnfn convert_char16_rtn(double16);\n" |
32823 | "char16 __ovld __cnfn convert_char16_rtp(double16);\n" |
32824 | "char16 __ovld __cnfn convert_char16_rtz(double16);\n" |
32825 | "char16 __ovld __cnfn convert_char16_sat(double16);\n" |
32826 | "char16 __ovld __cnfn convert_char16_sat_rte(double16);\n" |
32827 | "char16 __ovld __cnfn convert_char16_sat_rtn(double16);\n" |
32828 | "char16 __ovld __cnfn convert_char16_sat_rtp(double16);\n" |
32829 | "char16 __ovld __cnfn convert_char16_sat_rtz(double16);\n" |
32830 | "\n" |
32831 | "uchar __ovld __cnfn convert_uchar(double);\n" |
32832 | "uchar __ovld __cnfn convert_uchar_rte(double);\n" |
32833 | "uchar __ovld __cnfn convert_uchar_rtn(double);\n" |
32834 | "uchar __ovld __cnfn convert_uchar_rtp(double);\n" |
32835 | "uchar __ovld __cnfn convert_uchar_rtz(double);\n" |
32836 | "uchar __ovld __cnfn convert_uchar_sat(double);\n" |
32837 | "uchar __ovld __cnfn convert_uchar_sat_rte(double);\n" |
32838 | "uchar __ovld __cnfn convert_uchar_sat_rtn(double);\n" |
32839 | "uchar __ovld __cnfn convert_uchar_sat_rtp(double);\n" |
32840 | "uchar __ovld __cnfn convert_uchar_sat_rtz(double);\n" |
32841 | "uchar2 __ovld __cnfn convert_uchar2(double2);\n" |
32842 | "uchar2 __ovld __cnfn convert_uchar2_rte(double2);\n" |
32843 | "uchar2 __ovld __cnfn convert_uchar2_rtn(double2);\n" |
32844 | "uchar2 __ovld __cnfn convert_uchar2_rtp(double2);\n" |
32845 | "uchar2 __ovld __cnfn convert_uchar2_rtz(double2);\n" |
32846 | "uchar2 __ovld __cnfn convert_uchar2_sat(double2);\n" |
32847 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(double2);\n" |
32848 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(double2);\n" |
32849 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(double2);\n" |
32850 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(double2);\n" |
32851 | "uchar3 __ovld __cnfn convert_uchar3(double3);\n" |
32852 | "uchar3 __ovld __cnfn convert_uchar3_rte(double3);\n" |
32853 | "uchar3 __ovld __cnfn convert_uchar3_rtn(double3);\n" |
32854 | "uchar3 __ovld __cnfn convert_uchar3_rtp(double3);\n" |
32855 | "uchar3 __ovld __cnfn convert_uchar3_rtz(double3);\n" |
32856 | "uchar3 __ovld __cnfn convert_uchar3_sat(double3);\n" |
32857 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(double3);\n" |
32858 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(double3);\n" |
32859 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(double3);\n" |
32860 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(double3);\n" |
32861 | "uchar4 __ovld __cnfn convert_uchar4(double4);\n" |
32862 | "uchar4 __ovld __cnfn convert_uchar4_rte(double4);\n" |
32863 | "uchar4 __ovld __cnfn convert_uchar4_rtn(double4);\n" |
32864 | "uchar4 __ovld __cnfn convert_uchar4_rtp(double4);\n" |
32865 | "uchar4 __ovld __cnfn convert_uchar4_rtz(double4);\n" |
32866 | "uchar4 __ovld __cnfn convert_uchar4_sat(double4);\n" |
32867 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(double4);\n" |
32868 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(double4);\n" |
32869 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(double4);\n" |
32870 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(double4);\n" |
32871 | "uchar8 __ovld __cnfn convert_uchar8(double8);\n" |
32872 | "uchar8 __ovld __cnfn convert_uchar8_rte(double8);\n" |
32873 | "uchar8 __ovld __cnfn convert_uchar8_rtn(double8);\n" |
32874 | "uchar8 __ovld __cnfn convert_uchar8_rtp(double8);\n" |
32875 | "uchar8 __ovld __cnfn convert_uchar8_rtz(double8);\n" |
32876 | "uchar8 __ovld __cnfn convert_uchar8_sat(double8);\n" |
32877 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(double8);\n" |
32878 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(double8);\n" |
32879 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(double8);\n" |
32880 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(double8);\n" |
32881 | "uchar16 __ovld __cnfn convert_uchar16(double16);\n" |
32882 | "uchar16 __ovld __cnfn convert_uchar16_rte(double16);\n" |
32883 | "uchar16 __ovld __cnfn convert_uchar16_rtn(double16);\n" |
32884 | "uchar16 __ovld __cnfn convert_uchar16_rtp(double16);\n" |
32885 | "uchar16 __ovld __cnfn convert_uchar16_rtz(double16);\n" |
32886 | "uchar16 __ovld __cnfn convert_uchar16_sat(double16);\n" |
32887 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(double16);\n" |
32888 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(double16);\n" |
32889 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(double16);\n" |
32890 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(double16);\n" |
32891 | "\n" |
32892 | "short __ovld __cnfn convert_short(double);\n" |
32893 | "short __ovld __cnfn convert_short_rte(double);\n" |
32894 | "short __ovld __cnfn convert_short_rtn(double);\n" |
32895 | "short __ovld __cnfn convert_short_rtp(double);\n" |
32896 | "short __ovld __cnfn convert_short_rtz(double);\n" |
32897 | "short __ovld __cnfn convert_short_sat(double);\n" |
32898 | "short __ovld __cnfn convert_short_sat_rte(double);\n" |
32899 | "short __ovld __cnfn convert_short_sat_rtn(double);\n" |
32900 | "short __ovld __cnfn convert_short_sat_rtp(double);\n" |
32901 | "short __ovld __cnfn convert_short_sat_rtz(double);\n" |
32902 | "short2 __ovld __cnfn convert_short2(double2);\n" |
32903 | "short2 __ovld __cnfn convert_short2_rte(double2);\n" |
32904 | "short2 __ovld __cnfn convert_short2_rtn(double2);\n" |
32905 | "short2 __ovld __cnfn convert_short2_rtp(double2);\n" |
32906 | "short2 __ovld __cnfn convert_short2_rtz(double2);\n" |
32907 | "short2 __ovld __cnfn convert_short2_sat(double2);\n" |
32908 | "short2 __ovld __cnfn convert_short2_sat_rte(double2);\n" |
32909 | "short2 __ovld __cnfn convert_short2_sat_rtn(double2);\n" |
32910 | "short2 __ovld __cnfn convert_short2_sat_rtp(double2);\n" |
32911 | "short2 __ovld __cnfn convert_short2_sat_rtz(double2);\n" |
32912 | "short3 __ovld __cnfn convert_short3(double3);\n" |
32913 | "short3 __ovld __cnfn convert_short3_rte(double3);\n" |
32914 | "short3 __ovld __cnfn convert_short3_rtn(double3);\n" |
32915 | "short3 __ovld __cnfn convert_short3_rtp(double3);\n" |
32916 | "short3 __ovld __cnfn convert_short3_rtz(double3);\n" |
32917 | "short3 __ovld __cnfn convert_short3_sat(double3);\n" |
32918 | "short3 __ovld __cnfn convert_short3_sat_rte(double3);\n" |
32919 | "short3 __ovld __cnfn convert_short3_sat_rtn(double3);\n" |
32920 | "short3 __ovld __cnfn convert_short3_sat_rtp(double3);\n" |
32921 | "short3 __ovld __cnfn convert_short3_sat_rtz(double3);\n" |
32922 | "short4 __ovld __cnfn convert_short4(double4);\n" |
32923 | "short4 __ovld __cnfn convert_short4_rte(double4);\n" |
32924 | "short4 __ovld __cnfn convert_short4_rtn(double4);\n" |
32925 | "short4 __ovld __cnfn convert_short4_rtp(double4);\n" |
32926 | "short4 __ovld __cnfn convert_short4_rtz(double4);\n" |
32927 | "short4 __ovld __cnfn convert_short4_sat(double4);\n" |
32928 | "short4 __ovld __cnfn convert_short4_sat_rte(double4);\n" |
32929 | "short4 __ovld __cnfn convert_short4_sat_rtn(double4);\n" |
32930 | "short4 __ovld __cnfn convert_short4_sat_rtp(double4);\n" |
32931 | "short4 __ovld __cnfn convert_short4_sat_rtz(double4);\n" |
32932 | "short8 __ovld __cnfn convert_short8(double8);\n" |
32933 | "short8 __ovld __cnfn convert_short8_rte(double8);\n" |
32934 | "short8 __ovld __cnfn convert_short8_rtn(double8);\n" |
32935 | "short8 __ovld __cnfn convert_short8_rtp(double8);\n" |
32936 | "short8 __ovld __cnfn convert_short8_rtz(double8);\n" |
32937 | "short8 __ovld __cnfn convert_short8_sat(double8);\n" |
32938 | "short8 __ovld __cnfn convert_short8_sat_rte(double8);\n" |
32939 | "short8 __ovld __cnfn convert_short8_sat_rtn(double8);\n" |
32940 | "short8 __ovld __cnfn convert_short8_sat_rtp(double8);\n" |
32941 | "short8 __ovld __cnfn convert_short8_sat_rtz(double8);\n" |
32942 | "short16 __ovld __cnfn convert_short16(double16);\n" |
32943 | "short16 __ovld __cnfn convert_short16_rte(double16);\n" |
32944 | "short16 __ovld __cnfn convert_short16_rtn(double16);\n" |
32945 | "short16 __ovld __cnfn convert_short16_rtp(double16);\n" |
32946 | "short16 __ovld __cnfn convert_short16_rtz(double16);\n" |
32947 | "short16 __ovld __cnfn convert_short16_sat(double16);\n" |
32948 | "short16 __ovld __cnfn convert_short16_sat_rte(double16);\n" |
32949 | "short16 __ovld __cnfn convert_short16_sat_rtn(double16);\n" |
32950 | "short16 __ovld __cnfn convert_short16_sat_rtp(double16);\n" |
32951 | "short16 __ovld __cnfn convert_short16_sat_rtz(double16);\n" |
32952 | "\n" |
32953 | "ushort __ovld __cnfn convert_ushort(double);\n" |
32954 | "ushort __ovld __cnfn convert_ushort_rte(double);\n" |
32955 | "ushort __ovld __cnfn convert_ushort_rtn(double);\n" |
32956 | "ushort __ovld __cnfn convert_ushort_rtp(double);\n" |
32957 | "ushort __ovld __cnfn convert_ushort_rtz(double);\n" |
32958 | "ushort __ovld __cnfn convert_ushort_sat(double);\n" |
32959 | "ushort __ovld __cnfn convert_ushort_sat_rte(double);\n" |
32960 | "ushort __ovld __cnfn convert_ushort_sat_rtn(double);\n" |
32961 | "ushort __ovld __cnfn convert_ushort_sat_rtp(double);\n" |
32962 | "ushort __ovld __cnfn convert_ushort_sat_rtz(double);\n" |
32963 | "ushort2 __ovld __cnfn convert_ushort2(double2);\n" |
32964 | "ushort2 __ovld __cnfn convert_ushort2_rte(double2);\n" |
32965 | "ushort2 __ovld __cnfn convert_ushort2_rtn(double2);\n" |
32966 | "ushort2 __ovld __cnfn convert_ushort2_rtp(double2);\n" |
32967 | "ushort2 __ovld __cnfn convert_ushort2_rtz(double2);\n" |
32968 | "ushort2 __ovld __cnfn convert_ushort2_sat(double2);\n" |
32969 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(double2);\n" |
32970 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(double2);\n" |
32971 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(double2);\n" |
32972 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(double2);\n" |
32973 | "ushort3 __ovld __cnfn convert_ushort3(double3);\n" |
32974 | "ushort3 __ovld __cnfn convert_ushort3_rte(double3);\n" |
32975 | "ushort3 __ovld __cnfn convert_ushort3_rtn(double3);\n" |
32976 | "ushort3 __ovld __cnfn convert_ushort3_rtp(double3);\n" |
32977 | "ushort3 __ovld __cnfn convert_ushort3_rtz(double3);\n" |
32978 | "ushort3 __ovld __cnfn convert_ushort3_sat(double3);\n" |
32979 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(double3);\n" |
32980 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(double3);\n" |
32981 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(double3);\n" |
32982 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(double3);\n" |
32983 | "ushort4 __ovld __cnfn convert_ushort4(double4);\n" |
32984 | "ushort4 __ovld __cnfn convert_ushort4_rte(double4);\n" |
32985 | "ushort4 __ovld __cnfn convert_ushort4_rtn(double4);\n" |
32986 | "ushort4 __ovld __cnfn convert_ushort4_rtp(double4);\n" |
32987 | "ushort4 __ovld __cnfn convert_ushort4_rtz(double4);\n" |
32988 | "ushort4 __ovld __cnfn convert_ushort4_sat(double4);\n" |
32989 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(double4);\n" |
32990 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(double4);\n" |
32991 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(double4);\n" |
32992 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(double4);\n" |
32993 | "ushort8 __ovld __cnfn convert_ushort8(double8);\n" |
32994 | "ushort8 __ovld __cnfn convert_ushort8_rte(double8);\n" |
32995 | "ushort8 __ovld __cnfn convert_ushort8_rtn(double8);\n" |
32996 | "ushort8 __ovld __cnfn convert_ushort8_rtp(double8);\n" |
32997 | "ushort8 __ovld __cnfn convert_ushort8_rtz(double8);\n" |
32998 | "ushort8 __ovld __cnfn convert_ushort8_sat(double8);\n" |
32999 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(double8);\n" |
33000 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(double8);\n" |
33001 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(double8);\n" |
33002 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(double8);\n" |
33003 | "ushort16 __ovld __cnfn convert_ushort16(double16);\n" |
33004 | "ushort16 __ovld __cnfn convert_ushort16_rte(double16);\n" |
33005 | "ushort16 __ovld __cnfn convert_ushort16_rtn(double16);\n" |
33006 | "ushort16 __ovld __cnfn convert_ushort16_rtp(double16);\n" |
33007 | "ushort16 __ovld __cnfn convert_ushort16_rtz(double16);\n" |
33008 | "ushort16 __ovld __cnfn convert_ushort16_sat(double16);\n" |
33009 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(double16);\n" |
33010 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(double16);\n" |
33011 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(double16);\n" |
33012 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(double16);\n" |
33013 | "\n" |
33014 | "int __ovld __cnfn convert_int(double);\n" |
33015 | "int __ovld __cnfn convert_int_rte(double);\n" |
33016 | "int __ovld __cnfn convert_int_rtn(double);\n" |
33017 | "int __ovld __cnfn convert_int_rtp(double);\n" |
33018 | "int __ovld __cnfn convert_int_rtz(double);\n" |
33019 | "int __ovld __cnfn convert_int_sat(double);\n" |
33020 | "int __ovld __cnfn convert_int_sat_rte(double);\n" |
33021 | "int __ovld __cnfn convert_int_sat_rtn(double);\n" |
33022 | "int __ovld __cnfn convert_int_sat_rtp(double);\n" |
33023 | "int __ovld __cnfn convert_int_sat_rtz(double);\n" |
33024 | "int2 __ovld __cnfn convert_int2(double2);\n" |
33025 | "int2 __ovld __cnfn convert_int2_rte(double2);\n" |
33026 | "int2 __ovld __cnfn convert_int2_rtn(double2);\n" |
33027 | "int2 __ovld __cnfn convert_int2_rtp(double2);\n" |
33028 | "int2 __ovld __cnfn convert_int2_rtz(double2);\n" |
33029 | "int2 __ovld __cnfn convert_int2_sat(double2);\n" |
33030 | "int2 __ovld __cnfn convert_int2_sat_rte(double2);\n" |
33031 | "int2 __ovld __cnfn convert_int2_sat_rtn(double2);\n" |
33032 | "int2 __ovld __cnfn convert_int2_sat_rtp(double2);\n" |
33033 | "int2 __ovld __cnfn convert_int2_sat_rtz(double2);\n" |
33034 | "int3 __ovld __cnfn convert_int3(double3);\n" |
33035 | "int3 __ovld __cnfn convert_int3_rte(double3);\n" |
33036 | "int3 __ovld __cnfn convert_int3_rtn(double3);\n" |
33037 | "int3 __ovld __cnfn convert_int3_rtp(double3);\n" |
33038 | "int3 __ovld __cnfn convert_int3_rtz(double3);\n" |
33039 | "int3 __ovld __cnfn convert_int3_sat(double3);\n" |
33040 | "int3 __ovld __cnfn convert_int3_sat_rte(double3);\n" |
33041 | "int3 __ovld __cnfn convert_int3_sat_rtn(double3);\n" |
33042 | "int3 __ovld __cnfn convert_int3_sat_rtp(double3);\n" |
33043 | "int3 __ovld __cnfn convert_int3_sat_rtz(double3);\n" |
33044 | "int4 __ovld __cnfn convert_int4(double4);\n" |
33045 | "int4 __ovld __cnfn convert_int4_rte(double4);\n" |
33046 | "int4 __ovld __cnfn convert_int4_rtn(double4);\n" |
33047 | "int4 __ovld __cnfn convert_int4_rtp(double4);\n" |
33048 | "int4 __ovld __cnfn convert_int4_rtz(double4);\n" |
33049 | "int4 __ovld __cnfn convert_int4_sat(double4);\n" |
33050 | "int4 __ovld __cnfn convert_int4_sat_rte(double4);\n" |
33051 | "int4 __ovld __cnfn convert_int4_sat_rtn(double4);\n" |
33052 | "int4 __ovld __cnfn convert_int4_sat_rtp(double4);\n" |
33053 | "int4 __ovld __cnfn convert_int4_sat_rtz(double4);\n" |
33054 | "int8 __ovld __cnfn convert_int8(double8);\n" |
33055 | "int8 __ovld __cnfn convert_int8_rte(double8);\n" |
33056 | "int8 __ovld __cnfn convert_int8_rtn(double8);\n" |
33057 | "int8 __ovld __cnfn convert_int8_rtp(double8);\n" |
33058 | "int8 __ovld __cnfn convert_int8_rtz(double8);\n" |
33059 | "int8 __ovld __cnfn convert_int8_sat(double8);\n" |
33060 | "int8 __ovld __cnfn convert_int8_sat_rte(double8);\n" |
33061 | "int8 __ovld __cnfn convert_int8_sat_rtn(double8);\n" |
33062 | "int8 __ovld __cnfn convert_int8_sat_rtp(double8);\n" |
33063 | "int8 __ovld __cnfn convert_int8_sat_rtz(double8);\n" |
33064 | "int16 __ovld __cnfn convert_int16(double16);\n" |
33065 | "int16 __ovld __cnfn convert_int16_rte(double16);\n" |
33066 | "int16 __ovld __cnfn convert_int16_rtn(double16);\n" |
33067 | "int16 __ovld __cnfn convert_int16_rtp(double16);\n" |
33068 | "int16 __ovld __cnfn convert_int16_rtz(double16);\n" |
33069 | "int16 __ovld __cnfn convert_int16_sat(double16);\n" |
33070 | "int16 __ovld __cnfn convert_int16_sat_rte(double16);\n" |
33071 | "int16 __ovld __cnfn convert_int16_sat_rtn(double16);\n" |
33072 | "int16 __ovld __cnfn convert_int16_sat_rtp(double16);\n" |
33073 | "int16 __ovld __cnfn convert_int16_sat_rtz(double16);\n" |
33074 | "\n" |
33075 | "uint __ovld __cnfn convert_uint(double);\n" |
33076 | "uint __ovld __cnfn convert_uint_rte(double);\n" |
33077 | "uint __ovld __cnfn convert_uint_rtn(double);\n" |
33078 | "uint __ovld __cnfn convert_uint_rtp(double);\n" |
33079 | "uint __ovld __cnfn convert_uint_rtz(double);\n" |
33080 | "uint __ovld __cnfn convert_uint_sat(double);\n" |
33081 | "uint __ovld __cnfn convert_uint_sat_rte(double);\n" |
33082 | "uint __ovld __cnfn convert_uint_sat_rtn(double);\n" |
33083 | "uint __ovld __cnfn convert_uint_sat_rtp(double);\n" |
33084 | "uint __ovld __cnfn convert_uint_sat_rtz(double);\n" |
33085 | "uint2 __ovld __cnfn convert_uint2(double2);\n" |
33086 | "uint2 __ovld __cnfn convert_uint2_rte(double2);\n" |
33087 | "uint2 __ovld __cnfn convert_uint2_rtn(double2);\n" |
33088 | "uint2 __ovld __cnfn convert_uint2_rtp(double2);\n" |
33089 | "uint2 __ovld __cnfn convert_uint2_rtz(double2);\n" |
33090 | "uint2 __ovld __cnfn convert_uint2_sat(double2);\n" |
33091 | "uint2 __ovld __cnfn convert_uint2_sat_rte(double2);\n" |
33092 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(double2);\n" |
33093 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(double2);\n" |
33094 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(double2);\n" |
33095 | "uint3 __ovld __cnfn convert_uint3(double3);\n" |
33096 | "uint3 __ovld __cnfn convert_uint3_rte(double3);\n" |
33097 | "uint3 __ovld __cnfn convert_uint3_rtn(double3);\n" |
33098 | "uint3 __ovld __cnfn convert_uint3_rtp(double3);\n" |
33099 | "uint3 __ovld __cnfn convert_uint3_rtz(double3);\n" |
33100 | "uint3 __ovld __cnfn convert_uint3_sat(double3);\n" |
33101 | "uint3 __ovld __cnfn convert_uint3_sat_rte(double3);\n" |
33102 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(double3);\n" |
33103 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(double3);\n" |
33104 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(double3);\n" |
33105 | "uint4 __ovld __cnfn convert_uint4(double4);\n" |
33106 | "uint4 __ovld __cnfn convert_uint4_rte(double4);\n" |
33107 | "uint4 __ovld __cnfn convert_uint4_rtn(double4);\n" |
33108 | "uint4 __ovld __cnfn convert_uint4_rtp(double4);\n" |
33109 | "uint4 __ovld __cnfn convert_uint4_rtz(double4);\n" |
33110 | "uint4 __ovld __cnfn convert_uint4_sat(double4);\n" |
33111 | "uint4 __ovld __cnfn convert_uint4_sat_rte(double4);\n" |
33112 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(double4);\n" |
33113 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(double4);\n" |
33114 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(double4);\n" |
33115 | "uint8 __ovld __cnfn convert_uint8(double8);\n" |
33116 | "uint8 __ovld __cnfn convert_uint8_rte(double8);\n" |
33117 | "uint8 __ovld __cnfn convert_uint8_rtn(double8);\n" |
33118 | "uint8 __ovld __cnfn convert_uint8_rtp(double8);\n" |
33119 | "uint8 __ovld __cnfn convert_uint8_rtz(double8);\n" |
33120 | "uint8 __ovld __cnfn convert_uint8_sat(double8);\n" |
33121 | "uint8 __ovld __cnfn convert_uint8_sat_rte(double8);\n" |
33122 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(double8);\n" |
33123 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(double8);\n" |
33124 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(double8);\n" |
33125 | "uint16 __ovld __cnfn convert_uint16(double16);\n" |
33126 | "uint16 __ovld __cnfn convert_uint16_rte(double16);\n" |
33127 | "uint16 __ovld __cnfn convert_uint16_rtn(double16);\n" |
33128 | "uint16 __ovld __cnfn convert_uint16_rtp(double16);\n" |
33129 | "uint16 __ovld __cnfn convert_uint16_rtz(double16);\n" |
33130 | "uint16 __ovld __cnfn convert_uint16_sat(double16);\n" |
33131 | "uint16 __ovld __cnfn convert_uint16_sat_rte(double16);\n" |
33132 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(double16);\n" |
33133 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(double16);\n" |
33134 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(double16);\n" |
33135 | "\n" |
33136 | "long __ovld __cnfn convert_long(double);\n" |
33137 | "long __ovld __cnfn convert_long_rte(double);\n" |
33138 | "long __ovld __cnfn convert_long_rtn(double);\n" |
33139 | "long __ovld __cnfn convert_long_rtp(double);\n" |
33140 | "long __ovld __cnfn convert_long_rtz(double);\n" |
33141 | "long __ovld __cnfn convert_long_sat(double);\n" |
33142 | "long __ovld __cnfn convert_long_sat_rte(double);\n" |
33143 | "long __ovld __cnfn convert_long_sat_rtn(double);\n" |
33144 | "long __ovld __cnfn convert_long_sat_rtp(double);\n" |
33145 | "long __ovld __cnfn convert_long_sat_rtz(double);\n" |
33146 | "long2 __ovld __cnfn convert_long2(double2);\n" |
33147 | "long2 __ovld __cnfn convert_long2_rte(double2);\n" |
33148 | "long2 __ovld __cnfn convert_long2_rtn(double2);\n" |
33149 | "long2 __ovld __cnfn convert_long2_rtp(double2);\n" |
33150 | "long2 __ovld __cnfn convert_long2_rtz(double2);\n" |
33151 | "long2 __ovld __cnfn convert_long2_sat(double2);\n" |
33152 | "long2 __ovld __cnfn convert_long2_sat_rte(double2);\n" |
33153 | "long2 __ovld __cnfn convert_long2_sat_rtn(double2);\n" |
33154 | "long2 __ovld __cnfn convert_long2_sat_rtp(double2);\n" |
33155 | "long2 __ovld __cnfn convert_long2_sat_rtz(double2);\n" |
33156 | "long3 __ovld __cnfn convert_long3(double3);\n" |
33157 | "long3 __ovld __cnfn convert_long3_rte(double3);\n" |
33158 | "long3 __ovld __cnfn convert_long3_rtn(double3);\n" |
33159 | "long3 __ovld __cnfn convert_long3_rtp(double3);\n" |
33160 | "long3 __ovld __cnfn convert_long3_rtz(double3);\n" |
33161 | "long3 __ovld __cnfn convert_long3_sat(double3);\n" |
33162 | "long3 __ovld __cnfn convert_long3_sat_rte(double3);\n" |
33163 | "long3 __ovld __cnfn convert_long3_sat_rtn(double3);\n" |
33164 | "long3 __ovld __cnfn convert_long3_sat_rtp(double3);\n" |
33165 | "long3 __ovld __cnfn convert_long3_sat_rtz(double3);\n" |
33166 | "long4 __ovld __cnfn convert_long4(double4);\n" |
33167 | "long4 __ovld __cnfn convert_long4_rte(double4);\n" |
33168 | "long4 __ovld __cnfn convert_long4_rtn(double4);\n" |
33169 | "long4 __ovld __cnfn convert_long4_rtp(double4);\n" |
33170 | "long4 __ovld __cnfn convert_long4_rtz(double4);\n" |
33171 | "long4 __ovld __cnfn convert_long4_sat(double4);\n" |
33172 | "long4 __ovld __cnfn convert_long4_sat_rte(double4);\n" |
33173 | "long4 __ovld __cnfn convert_long4_sat_rtn(double4);\n" |
33174 | "long4 __ovld __cnfn convert_long4_sat_rtp(double4);\n" |
33175 | "long4 __ovld __cnfn convert_long4_sat_rtz(double4);\n" |
33176 | "long8 __ovld __cnfn convert_long8(double8);\n" |
33177 | "long8 __ovld __cnfn convert_long8_rte(double8);\n" |
33178 | "long8 __ovld __cnfn convert_long8_rtn(double8);\n" |
33179 | "long8 __ovld __cnfn convert_long8_rtp(double8);\n" |
33180 | "long8 __ovld __cnfn convert_long8_rtz(double8);\n" |
33181 | "long8 __ovld __cnfn convert_long8_sat(double8);\n" |
33182 | "long8 __ovld __cnfn convert_long8_sat_rte(double8);\n" |
33183 | "long8 __ovld __cnfn convert_long8_sat_rtn(double8);\n" |
33184 | "long8 __ovld __cnfn convert_long8_sat_rtp(double8);\n" |
33185 | "long8 __ovld __cnfn convert_long8_sat_rtz(double8);\n" |
33186 | "long16 __ovld __cnfn convert_long16(double16);\n" |
33187 | "long16 __ovld __cnfn convert_long16_rte(double16);\n" |
33188 | "long16 __ovld __cnfn convert_long16_rtn(double16);\n" |
33189 | "long16 __ovld __cnfn convert_long16_rtp(double16);\n" |
33190 | "long16 __ovld __cnfn convert_long16_rtz(double16);\n" |
33191 | "long16 __ovld __cnfn convert_long16_sat(double16);\n" |
33192 | "long16 __ovld __cnfn convert_long16_sat_rte(double16);\n" |
33193 | "long16 __ovld __cnfn convert_long16_sat_rtn(double16);\n" |
33194 | "long16 __ovld __cnfn convert_long16_sat_rtp(double16);\n" |
33195 | "long16 __ovld __cnfn convert_long16_sat_rtz(double16);\n" |
33196 | "\n" |
33197 | "ulong __ovld __cnfn convert_ulong(double);\n" |
33198 | "ulong __ovld __cnfn convert_ulong_rte(double);\n" |
33199 | "ulong __ovld __cnfn convert_ulong_rtn(double);\n" |
33200 | "ulong __ovld __cnfn convert_ulong_rtp(double);\n" |
33201 | "ulong __ovld __cnfn convert_ulong_rtz(double);\n" |
33202 | "ulong __ovld __cnfn convert_ulong_sat(double);\n" |
33203 | "ulong __ovld __cnfn convert_ulong_sat_rte(double);\n" |
33204 | "ulong __ovld __cnfn convert_ulong_sat_rtn(double);\n" |
33205 | "ulong __ovld __cnfn convert_ulong_sat_rtp(double);\n" |
33206 | "ulong __ovld __cnfn convert_ulong_sat_rtz(double);\n" |
33207 | "ulong2 __ovld __cnfn convert_ulong2(double2);\n" |
33208 | "ulong2 __ovld __cnfn convert_ulong2_rte(double2);\n" |
33209 | "ulong2 __ovld __cnfn convert_ulong2_rtn(double2);\n" |
33210 | "ulong2 __ovld __cnfn convert_ulong2_rtp(double2);\n" |
33211 | "ulong2 __ovld __cnfn convert_ulong2_rtz(double2);\n" |
33212 | "ulong2 __ovld __cnfn convert_ulong2_sat(double2);\n" |
33213 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(double2);\n" |
33214 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(double2);\n" |
33215 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(double2);\n" |
33216 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(double2);\n" |
33217 | "ulong3 __ovld __cnfn convert_ulong3(double3);\n" |
33218 | "ulong3 __ovld __cnfn convert_ulong3_rte(double3);\n" |
33219 | "ulong3 __ovld __cnfn convert_ulong3_rtn(double3);\n" |
33220 | "ulong3 __ovld __cnfn convert_ulong3_rtp(double3);\n" |
33221 | "ulong3 __ovld __cnfn convert_ulong3_rtz(double3);\n" |
33222 | "ulong3 __ovld __cnfn convert_ulong3_sat(double3);\n" |
33223 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(double3);\n" |
33224 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(double3);\n" |
33225 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(double3);\n" |
33226 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(double3);\n" |
33227 | "ulong4 __ovld __cnfn convert_ulong4(double4);\n" |
33228 | "ulong4 __ovld __cnfn convert_ulong4_rte(double4);\n" |
33229 | "ulong4 __ovld __cnfn convert_ulong4_rtn(double4);\n" |
33230 | "ulong4 __ovld __cnfn convert_ulong4_rtp(double4);\n" |
33231 | "ulong4 __ovld __cnfn convert_ulong4_rtz(double4);\n" |
33232 | "ulong4 __ovld __cnfn convert_ulong4_sat(double4);\n" |
33233 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(double4);\n" |
33234 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(double4);\n" |
33235 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(double4);\n" |
33236 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(double4);\n" |
33237 | "ulong8 __ovld __cnfn convert_ulong8(double8);\n" |
33238 | "ulong8 __ovld __cnfn convert_ulong8_rte(double8);\n" |
33239 | "ulong8 __ovld __cnfn convert_ulong8_rtn(double8);\n" |
33240 | "ulong8 __ovld __cnfn convert_ulong8_rtp(double8);\n" |
33241 | "ulong8 __ovld __cnfn convert_ulong8_rtz(double8);\n" |
33242 | "ulong8 __ovld __cnfn convert_ulong8_sat(double8);\n" |
33243 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(double8);\n" |
33244 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(double8);\n" |
33245 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(double8);\n" |
33246 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(double8);\n" |
33247 | "ulong16 __ovld __cnfn convert_ulong16(double16);\n" |
33248 | "ulong16 __ovld __cnfn convert_ulong16_rte(double16);\n" |
33249 | "ulong16 __ovld __cnfn convert_ulong16_rtn(double16);\n" |
33250 | "ulong16 __ovld __cnfn convert_ulong16_rtp(double16);\n" |
33251 | "ulong16 __ovld __cnfn convert_ulong16_rtz(double16);\n" |
33252 | "ulong16 __ovld __cnfn convert_ulong16_sat(double16);\n" |
33253 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(double16);\n" |
33254 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(double16);\n" |
33255 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(double16);\n" |
33256 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(double16);\n" |
33257 | "\n" |
33258 | "float __ovld __cnfn convert_float(double);\n" |
33259 | "float __ovld __cnfn convert_float_rte(double);\n" |
33260 | "float __ovld __cnfn convert_float_rtn(double);\n" |
33261 | "float __ovld __cnfn convert_float_rtp(double);\n" |
33262 | "float __ovld __cnfn convert_float_rtz(double);\n" |
33263 | "float2 __ovld __cnfn convert_float2(double2);\n" |
33264 | "float2 __ovld __cnfn convert_float2_rte(double2);\n" |
33265 | "float2 __ovld __cnfn convert_float2_rtn(double2);\n" |
33266 | "float2 __ovld __cnfn convert_float2_rtp(double2);\n" |
33267 | "float2 __ovld __cnfn convert_float2_rtz(double2);\n" |
33268 | "float3 __ovld __cnfn convert_float3(double3);\n" |
33269 | "float3 __ovld __cnfn convert_float3_rte(double3);\n" |
33270 | "float3 __ovld __cnfn convert_float3_rtn(double3);\n" |
33271 | "float3 __ovld __cnfn convert_float3_rtp(double3);\n" |
33272 | "float3 __ovld __cnfn convert_float3_rtz(double3);\n" |
33273 | "float4 __ovld __cnfn convert_float4(double4);\n" |
33274 | "float4 __ovld __cnfn convert_float4_rte(double4);\n" |
33275 | "float4 __ovld __cnfn convert_float4_rtn(double4);\n" |
33276 | "float4 __ovld __cnfn convert_float4_rtp(double4);\n" |
33277 | "float4 __ovld __cnfn convert_float4_rtz(double4);\n" |
33278 | "float8 __ovld __cnfn convert_float8(double8);\n" |
33279 | "float8 __ovld __cnfn convert_float8_rte(double8);\n" |
33280 | "float8 __ovld __cnfn convert_float8_rtn(double8);\n" |
33281 | "float8 __ovld __cnfn convert_float8_rtp(double8);\n" |
33282 | "float8 __ovld __cnfn convert_float8_rtz(double8);\n" |
33283 | "float16 __ovld __cnfn convert_float16(double16);\n" |
33284 | "float16 __ovld __cnfn convert_float16_rte(double16);\n" |
33285 | "float16 __ovld __cnfn convert_float16_rtn(double16);\n" |
33286 | "float16 __ovld __cnfn convert_float16_rtp(double16);\n" |
33287 | "float16 __ovld __cnfn convert_float16_rtz(double16);\n" |
33288 | "\n" |
33289 | "double __ovld __cnfn convert_double(char);\n" |
33290 | "double __ovld __cnfn convert_double(double);\n" |
33291 | "double __ovld __cnfn convert_double(float);\n" |
33292 | "double __ovld __cnfn convert_double(int);\n" |
33293 | "double __ovld __cnfn convert_double(long);\n" |
33294 | "double __ovld __cnfn convert_double(short);\n" |
33295 | "double __ovld __cnfn convert_double(uchar);\n" |
33296 | "double __ovld __cnfn convert_double(uint);\n" |
33297 | "double __ovld __cnfn convert_double(ulong);\n" |
33298 | "double __ovld __cnfn convert_double(ushort);\n" |
33299 | "double __ovld __cnfn convert_double_rte(char);\n" |
33300 | "double __ovld __cnfn convert_double_rte(double);\n" |
33301 | "double __ovld __cnfn convert_double_rte(float);\n" |
33302 | "double __ovld __cnfn convert_double_rte(int);\n" |
33303 | "double __ovld __cnfn convert_double_rte(long);\n" |
33304 | "double __ovld __cnfn convert_double_rte(short);\n" |
33305 | "double __ovld __cnfn convert_double_rte(uchar);\n" |
33306 | "double __ovld __cnfn convert_double_rte(uint);\n" |
33307 | "double __ovld __cnfn convert_double_rte(ulong);\n" |
33308 | "double __ovld __cnfn convert_double_rte(ushort);\n" |
33309 | "double __ovld __cnfn convert_double_rtn(char);\n" |
33310 | "double __ovld __cnfn convert_double_rtn(double);\n" |
33311 | "double __ovld __cnfn convert_double_rtn(float);\n" |
33312 | "double __ovld __cnfn convert_double_rtn(int);\n" |
33313 | "double __ovld __cnfn convert_double_rtn(long);\n" |
33314 | "double __ovld __cnfn convert_double_rtn(short);\n" |
33315 | "double __ovld __cnfn convert_double_rtn(uchar);\n" |
33316 | "double __ovld __cnfn convert_double_rtn(uint);\n" |
33317 | "double __ovld __cnfn convert_double_rtn(ulong);\n" |
33318 | "double __ovld __cnfn convert_double_rtn(ushort);\n" |
33319 | "double __ovld __cnfn convert_double_rtp(char);\n" |
33320 | "double __ovld __cnfn convert_double_rtp(double);\n" |
33321 | "double __ovld __cnfn convert_double_rtp(float);\n" |
33322 | "double __ovld __cnfn convert_double_rtp(int);\n" |
33323 | "double __ovld __cnfn convert_double_rtp(long);\n" |
33324 | "double __ovld __cnfn convert_double_rtp(short);\n" |
33325 | "double __ovld __cnfn convert_double_rtp(uchar);\n" |
33326 | "double __ovld __cnfn convert_double_rtp(uint);\n" |
33327 | "double __ovld __cnfn convert_double_rtp(ulong);\n" |
33328 | "double __ovld __cnfn convert_double_rtp(ushort);\n" |
33329 | "double __ovld __cnfn convert_double_rtz(char);\n" |
33330 | "double __ovld __cnfn convert_double_rtz(double);\n" |
33331 | "double __ovld __cnfn convert_double_rtz(float);\n" |
33332 | "double __ovld __cnfn convert_double_rtz(int);\n" |
33333 | "double __ovld __cnfn convert_double_rtz(long);\n" |
33334 | "double __ovld __cnfn convert_double_rtz(short);\n" |
33335 | "double __ovld __cnfn convert_double_rtz(uchar);\n" |
33336 | "double __ovld __cnfn convert_double_rtz(uint);\n" |
33337 | "double __ovld __cnfn convert_double_rtz(ulong);\n" |
33338 | "double __ovld __cnfn convert_double_rtz(ushort);\n" |
33339 | "double2 __ovld __cnfn convert_double2(char2);\n" |
33340 | "double2 __ovld __cnfn convert_double2(double2);\n" |
33341 | "double2 __ovld __cnfn convert_double2(float2);\n" |
33342 | "double2 __ovld __cnfn convert_double2(int2);\n" |
33343 | "double2 __ovld __cnfn convert_double2(long2);\n" |
33344 | "double2 __ovld __cnfn convert_double2(short2);\n" |
33345 | "double2 __ovld __cnfn convert_double2(uchar2);\n" |
33346 | "double2 __ovld __cnfn convert_double2(uint2);\n" |
33347 | "double2 __ovld __cnfn convert_double2(ulong2);\n" |
33348 | "double2 __ovld __cnfn convert_double2(ushort2);\n" |
33349 | "double2 __ovld __cnfn convert_double2_rte(char2);\n" |
33350 | "double2 __ovld __cnfn convert_double2_rte(double2);\n" |
33351 | "double2 __ovld __cnfn convert_double2_rte(float2);\n" |
33352 | "double2 __ovld __cnfn convert_double2_rte(int2);\n" |
33353 | "double2 __ovld __cnfn convert_double2_rte(long2);\n" |
33354 | "double2 __ovld __cnfn convert_double2_rte(short2);\n" |
33355 | "double2 __ovld __cnfn convert_double2_rte(uchar2);\n" |
33356 | "double2 __ovld __cnfn convert_double2_rte(uint2);\n" |
33357 | "double2 __ovld __cnfn convert_double2_rte(ulong2);\n" |
33358 | "double2 __ovld __cnfn convert_double2_rte(ushort2);\n" |
33359 | "double2 __ovld __cnfn convert_double2_rtn(char2);\n" |
33360 | "double2 __ovld __cnfn convert_double2_rtn(double2);\n" |
33361 | "double2 __ovld __cnfn convert_double2_rtn(float2);\n" |
33362 | "double2 __ovld __cnfn convert_double2_rtn(int2);\n" |
33363 | "double2 __ovld __cnfn convert_double2_rtn(long2);\n" |
33364 | "double2 __ovld __cnfn convert_double2_rtn(short2);\n" |
33365 | "double2 __ovld __cnfn convert_double2_rtn(uchar2);\n" |
33366 | "double2 __ovld __cnfn convert_double2_rtn(uint2);\n" |
33367 | "double2 __ovld __cnfn convert_double2_rtn(ulong2);\n" |
33368 | "double2 __ovld __cnfn convert_double2_rtn(ushort2);\n" |
33369 | "double2 __ovld __cnfn convert_double2_rtp(char2);\n" |
33370 | "double2 __ovld __cnfn convert_double2_rtp(double2);\n" |
33371 | "double2 __ovld __cnfn convert_double2_rtp(float2);\n" |
33372 | "double2 __ovld __cnfn convert_double2_rtp(int2);\n" |
33373 | "double2 __ovld __cnfn convert_double2_rtp(long2);\n" |
33374 | "double2 __ovld __cnfn convert_double2_rtp(short2);\n" |
33375 | "double2 __ovld __cnfn convert_double2_rtp(uchar2);\n" |
33376 | "double2 __ovld __cnfn convert_double2_rtp(uint2);\n" |
33377 | "double2 __ovld __cnfn convert_double2_rtp(ulong2);\n" |
33378 | "double2 __ovld __cnfn convert_double2_rtp(ushort2);\n" |
33379 | "double2 __ovld __cnfn convert_double2_rtz(char2);\n" |
33380 | "double2 __ovld __cnfn convert_double2_rtz(double2);\n" |
33381 | "double2 __ovld __cnfn convert_double2_rtz(float2);\n" |
33382 | "double2 __ovld __cnfn convert_double2_rtz(int2);\n" |
33383 | "double2 __ovld __cnfn convert_double2_rtz(long2);\n" |
33384 | "double2 __ovld __cnfn convert_double2_rtz(short2);\n" |
33385 | "double2 __ovld __cnfn convert_double2_rtz(uchar2);\n" |
33386 | "double2 __ovld __cnfn convert_double2_rtz(uint2);\n" |
33387 | "double2 __ovld __cnfn convert_double2_rtz(ulong2);\n" |
33388 | "double2 __ovld __cnfn convert_double2_rtz(ushort2);\n" |
33389 | "double3 __ovld __cnfn convert_double3(char3);\n" |
33390 | "double3 __ovld __cnfn convert_double3(double3);\n" |
33391 | "double3 __ovld __cnfn convert_double3(float3);\n" |
33392 | "double3 __ovld __cnfn convert_double3(int3);\n" |
33393 | "double3 __ovld __cnfn convert_double3(long3);\n" |
33394 | "double3 __ovld __cnfn convert_double3(short3);\n" |
33395 | "double3 __ovld __cnfn convert_double3(uchar3);\n" |
33396 | "double3 __ovld __cnfn convert_double3(uint3);\n" |
33397 | "double3 __ovld __cnfn convert_double3(ulong3);\n" |
33398 | "double3 __ovld __cnfn convert_double3(ushort3);\n" |
33399 | "double3 __ovld __cnfn convert_double3_rte(char3);\n" |
33400 | "double3 __ovld __cnfn convert_double3_rte(double3);\n" |
33401 | "double3 __ovld __cnfn convert_double3_rte(float3);\n" |
33402 | "double3 __ovld __cnfn convert_double3_rte(int3);\n" |
33403 | "double3 __ovld __cnfn convert_double3_rte(long3);\n" |
33404 | "double3 __ovld __cnfn convert_double3_rte(short3);\n" |
33405 | "double3 __ovld __cnfn convert_double3_rte(uchar3);\n" |
33406 | "double3 __ovld __cnfn convert_double3_rte(uint3);\n" |
33407 | "double3 __ovld __cnfn convert_double3_rte(ulong3);\n" |
33408 | "double3 __ovld __cnfn convert_double3_rte(ushort3);\n" |
33409 | "double3 __ovld __cnfn convert_double3_rtn(char3);\n" |
33410 | "double3 __ovld __cnfn convert_double3_rtn(double3);\n" |
33411 | "double3 __ovld __cnfn convert_double3_rtn(float3);\n" |
33412 | "double3 __ovld __cnfn convert_double3_rtn(int3);\n" |
33413 | "double3 __ovld __cnfn convert_double3_rtn(long3);\n" |
33414 | "double3 __ovld __cnfn convert_double3_rtn(short3);\n" |
33415 | "double3 __ovld __cnfn convert_double3_rtn(uchar3);\n" |
33416 | "double3 __ovld __cnfn convert_double3_rtn(uint3);\n" |
33417 | "double3 __ovld __cnfn convert_double3_rtn(ulong3);\n" |
33418 | "double3 __ovld __cnfn convert_double3_rtn(ushort3);\n" |
33419 | "double3 __ovld __cnfn convert_double3_rtp(char3);\n" |
33420 | "double3 __ovld __cnfn convert_double3_rtp(double3);\n" |
33421 | "double3 __ovld __cnfn convert_double3_rtp(float3);\n" |
33422 | "double3 __ovld __cnfn convert_double3_rtp(int3);\n" |
33423 | "double3 __ovld __cnfn convert_double3_rtp(long3);\n" |
33424 | "double3 __ovld __cnfn convert_double3_rtp(short3);\n" |
33425 | "double3 __ovld __cnfn convert_double3_rtp(uchar3);\n" |
33426 | "double3 __ovld __cnfn convert_double3_rtp(uint3);\n" |
33427 | "double3 __ovld __cnfn convert_double3_rtp(ulong3);\n" |
33428 | "double3 __ovld __cnfn convert_double3_rtp(ushort3);\n" |
33429 | "double3 __ovld __cnfn convert_double3_rtz(char3);\n" |
33430 | "double3 __ovld __cnfn convert_double3_rtz(double3);\n" |
33431 | "double3 __ovld __cnfn convert_double3_rtz(float3);\n" |
33432 | "double3 __ovld __cnfn convert_double3_rtz(int3);\n" |
33433 | "double3 __ovld __cnfn convert_double3_rtz(long3);\n" |
33434 | "double3 __ovld __cnfn convert_double3_rtz(short3);\n" |
33435 | "double3 __ovld __cnfn convert_double3_rtz(uchar3);\n" |
33436 | "double3 __ovld __cnfn convert_double3_rtz(uint3);\n" |
33437 | "double3 __ovld __cnfn convert_double3_rtz(ulong3);\n" |
33438 | "double3 __ovld __cnfn convert_double3_rtz(ushort3);\n" |
33439 | "double4 __ovld __cnfn convert_double4(char4);\n" |
33440 | "double4 __ovld __cnfn convert_double4(double4);\n" |
33441 | "double4 __ovld __cnfn convert_double4(float4);\n" |
33442 | "double4 __ovld __cnfn convert_double4(int4);\n" |
33443 | "double4 __ovld __cnfn convert_double4(long4);\n" |
33444 | "double4 __ovld __cnfn convert_double4(short4);\n" |
33445 | "double4 __ovld __cnfn convert_double4(uchar4);\n" |
33446 | "double4 __ovld __cnfn convert_double4(uint4);\n" |
33447 | "double4 __ovld __cnfn convert_double4(ulong4);\n" |
33448 | "double4 __ovld __cnfn convert_double4(ushort4);\n" |
33449 | "double4 __ovld __cnfn convert_double4_rte(char4);\n" |
33450 | "double4 __ovld __cnfn convert_double4_rte(double4);\n" |
33451 | "double4 __ovld __cnfn convert_double4_rte(float4);\n" |
33452 | "double4 __ovld __cnfn convert_double4_rte(int4);\n" |
33453 | "double4 __ovld __cnfn convert_double4_rte(long4);\n" |
33454 | "double4 __ovld __cnfn convert_double4_rte(short4);\n" |
33455 | "double4 __ovld __cnfn convert_double4_rte(uchar4);\n" |
33456 | "double4 __ovld __cnfn convert_double4_rte(uint4);\n" |
33457 | "double4 __ovld __cnfn convert_double4_rte(ulong4);\n" |
33458 | "double4 __ovld __cnfn convert_double4_rte(ushort4);\n" |
33459 | "double4 __ovld __cnfn convert_double4_rtn(char4);\n" |
33460 | "double4 __ovld __cnfn convert_double4_rtn(double4);\n" |
33461 | "double4 __ovld __cnfn convert_double4_rtn(float4);\n" |
33462 | "double4 __ovld __cnfn convert_double4_rtn(int4);\n" |
33463 | "double4 __ovld __cnfn convert_double4_rtn(long4);\n" |
33464 | "double4 __ovld __cnfn convert_double4_rtn(short4);\n" |
33465 | "double4 __ovld __cnfn convert_double4_rtn(uchar4);\n" |
33466 | "double4 __ovld __cnfn convert_double4_rtn(uint4);\n" |
33467 | "double4 __ovld __cnfn convert_double4_rtn(ulong4);\n" |
33468 | "double4 __ovld __cnfn convert_double4_rtn(ushort4);\n" |
33469 | "double4 __ovld __cnfn convert_double4_rtp(char4);\n" |
33470 | "double4 __ovld __cnfn convert_double4_rtp(double4);\n" |
33471 | "double4 __ovld __cnfn convert_double4_rtp(float4);\n" |
33472 | "double4 __ovld __cnfn convert_double4_rtp(int4);\n" |
33473 | "double4 __ovld __cnfn convert_double4_rtp(long4);\n" |
33474 | "double4 __ovld __cnfn convert_double4_rtp(short4);\n" |
33475 | "double4 __ovld __cnfn convert_double4_rtp(uchar4);\n" |
33476 | "double4 __ovld __cnfn convert_double4_rtp(uint4);\n" |
33477 | "double4 __ovld __cnfn convert_double4_rtp(ulong4);\n" |
33478 | "double4 __ovld __cnfn convert_double4_rtp(ushort4);\n" |
33479 | "double4 __ovld __cnfn convert_double4_rtz(char4);\n" |
33480 | "double4 __ovld __cnfn convert_double4_rtz(double4);\n" |
33481 | "double4 __ovld __cnfn convert_double4_rtz(float4);\n" |
33482 | "double4 __ovld __cnfn convert_double4_rtz(int4);\n" |
33483 | "double4 __ovld __cnfn convert_double4_rtz(long4);\n" |
33484 | "double4 __ovld __cnfn convert_double4_rtz(short4);\n" |
33485 | "double4 __ovld __cnfn convert_double4_rtz(uchar4);\n" |
33486 | "double4 __ovld __cnfn convert_double4_rtz(uint4);\n" |
33487 | "double4 __ovld __cnfn convert_double4_rtz(ulong4);\n" |
33488 | "double4 __ovld __cnfn convert_double4_rtz(ushort4);\n" |
33489 | "double8 __ovld __cnfn convert_double8(char8);\n" |
33490 | "double8 __ovld __cnfn convert_double8(double8);\n" |
33491 | "double8 __ovld __cnfn convert_double8(float8);\n" |
33492 | "double8 __ovld __cnfn convert_double8(int8);\n" |
33493 | "double8 __ovld __cnfn convert_double8(long8);\n" |
33494 | "double8 __ovld __cnfn convert_double8(short8);\n" |
33495 | "double8 __ovld __cnfn convert_double8(uchar8);\n" |
33496 | "double8 __ovld __cnfn convert_double8(uint8);\n" |
33497 | "double8 __ovld __cnfn convert_double8(ulong8);\n" |
33498 | "double8 __ovld __cnfn convert_double8(ushort8);\n" |
33499 | "double8 __ovld __cnfn convert_double8_rte(char8);\n" |
33500 | "double8 __ovld __cnfn convert_double8_rte(double8);\n" |
33501 | "double8 __ovld __cnfn convert_double8_rte(float8);\n" |
33502 | "double8 __ovld __cnfn convert_double8_rte(int8);\n" |
33503 | "double8 __ovld __cnfn convert_double8_rte(long8);\n" |
33504 | "double8 __ovld __cnfn convert_double8_rte(short8);\n" |
33505 | "double8 __ovld __cnfn convert_double8_rte(uchar8);\n" |
33506 | "double8 __ovld __cnfn convert_double8_rte(uint8);\n" |
33507 | "double8 __ovld __cnfn convert_double8_rte(ulong8);\n" |
33508 | "double8 __ovld __cnfn convert_double8_rte(ushort8);\n" |
33509 | "double8 __ovld __cnfn convert_double8_rtn(char8);\n" |
33510 | "double8 __ovld __cnfn convert_double8_rtn(double8);\n" |
33511 | "double8 __ovld __cnfn convert_double8_rtn(float8);\n" |
33512 | "double8 __ovld __cnfn convert_double8_rtn(int8);\n" |
33513 | "double8 __ovld __cnfn convert_double8_rtn(long8);\n" |
33514 | "double8 __ovld __cnfn convert_double8_rtn(short8);\n" |
33515 | "double8 __ovld __cnfn convert_double8_rtn(uchar8);\n" |
33516 | "double8 __ovld __cnfn convert_double8_rtn(uint8);\n" |
33517 | "double8 __ovld __cnfn convert_double8_rtn(ulong8);\n" |
33518 | "double8 __ovld __cnfn convert_double8_rtn(ushort8);\n" |
33519 | "double8 __ovld __cnfn convert_double8_rtp(char8);\n" |
33520 | "double8 __ovld __cnfn convert_double8_rtp(double8);\n" |
33521 | "double8 __ovld __cnfn convert_double8_rtp(float8);\n" |
33522 | "double8 __ovld __cnfn convert_double8_rtp(int8);\n" |
33523 | "double8 __ovld __cnfn convert_double8_rtp(long8);\n" |
33524 | "double8 __ovld __cnfn convert_double8_rtp(short8);\n" |
33525 | "double8 __ovld __cnfn convert_double8_rtp(uchar8);\n" |
33526 | "double8 __ovld __cnfn convert_double8_rtp(uint8);\n" |
33527 | "double8 __ovld __cnfn convert_double8_rtp(ulong8);\n" |
33528 | "double8 __ovld __cnfn convert_double8_rtp(ushort8);\n" |
33529 | "double8 __ovld __cnfn convert_double8_rtz(char8);\n" |
33530 | "double8 __ovld __cnfn convert_double8_rtz(double8);\n" |
33531 | "double8 __ovld __cnfn convert_double8_rtz(float8);\n" |
33532 | "double8 __ovld __cnfn convert_double8_rtz(int8);\n" |
33533 | "double8 __ovld __cnfn convert_double8_rtz(long8);\n" |
33534 | "double8 __ovld __cnfn convert_double8_rtz(short8);\n" |
33535 | "double8 __ovld __cnfn convert_double8_rtz(uchar8);\n" |
33536 | "double8 __ovld __cnfn convert_double8_rtz(uint8);\n" |
33537 | "double8 __ovld __cnfn convert_double8_rtz(ulong8);\n" |
33538 | "double8 __ovld __cnfn convert_double8_rtz(ushort8);\n" |
33539 | "double16 __ovld __cnfn convert_double16(char16);\n" |
33540 | "double16 __ovld __cnfn convert_double16(double16);\n" |
33541 | "double16 __ovld __cnfn convert_double16(float16);\n" |
33542 | "double16 __ovld __cnfn convert_double16(int16);\n" |
33543 | "double16 __ovld __cnfn convert_double16(long16);\n" |
33544 | "double16 __ovld __cnfn convert_double16(short16);\n" |
33545 | "double16 __ovld __cnfn convert_double16(uchar16);\n" |
33546 | "double16 __ovld __cnfn convert_double16(uint16);\n" |
33547 | "double16 __ovld __cnfn convert_double16(ulong16);\n" |
33548 | "double16 __ovld __cnfn convert_double16(ushort16);\n" |
33549 | "double16 __ovld __cnfn convert_double16_rte(char16);\n" |
33550 | "double16 __ovld __cnfn convert_double16_rte(double16);\n" |
33551 | "double16 __ovld __cnfn convert_double16_rte(float16);\n" |
33552 | "double16 __ovld __cnfn convert_double16_rte(int16);\n" |
33553 | "double16 __ovld __cnfn convert_double16_rte(long16);\n" |
33554 | "double16 __ovld __cnfn convert_double16_rte(short16);\n" |
33555 | "double16 __ovld __cnfn convert_double16_rte(uchar16);\n" |
33556 | "double16 __ovld __cnfn convert_double16_rte(uint16);\n" |
33557 | "double16 __ovld __cnfn convert_double16_rte(ulong16);\n" |
33558 | "double16 __ovld __cnfn convert_double16_rte(ushort16);\n" |
33559 | "double16 __ovld __cnfn convert_double16_rtn(char16);\n" |
33560 | "double16 __ovld __cnfn convert_double16_rtn(double16);\n" |
33561 | "double16 __ovld __cnfn convert_double16_rtn(float16);\n" |
33562 | "double16 __ovld __cnfn convert_double16_rtn(int16);\n" |
33563 | "double16 __ovld __cnfn convert_double16_rtn(long16);\n" |
33564 | "double16 __ovld __cnfn convert_double16_rtn(short16);\n" |
33565 | "double16 __ovld __cnfn convert_double16_rtn(uchar16);\n" |
33566 | "double16 __ovld __cnfn convert_double16_rtn(uint16);\n" |
33567 | "double16 __ovld __cnfn convert_double16_rtn(ulong16);\n" |
33568 | "double16 __ovld __cnfn convert_double16_rtn(ushort16);\n" |
33569 | "double16 __ovld __cnfn convert_double16_rtp(char16);\n" |
33570 | "double16 __ovld __cnfn convert_double16_rtp(double16);\n" |
33571 | "double16 __ovld __cnfn convert_double16_rtp(float16);\n" |
33572 | "double16 __ovld __cnfn convert_double16_rtp(int16);\n" |
33573 | "double16 __ovld __cnfn convert_double16_rtp(long16);\n" |
33574 | "double16 __ovld __cnfn convert_double16_rtp(short16);\n" |
33575 | "double16 __ovld __cnfn convert_double16_rtp(uchar16);\n" |
33576 | "double16 __ovld __cnfn convert_double16_rtp(uint16);\n" |
33577 | "double16 __ovld __cnfn convert_double16_rtp(ulong16);\n" |
33578 | "double16 __ovld __cnfn convert_double16_rtp(ushort16);\n" |
33579 | "double16 __ovld __cnfn convert_double16_rtz(char16);\n" |
33580 | "double16 __ovld __cnfn convert_double16_rtz(double16);\n" |
33581 | "double16 __ovld __cnfn convert_double16_rtz(float16);\n" |
33582 | "double16 __ovld __cnfn convert_double16_rtz(int16);\n" |
33583 | "double16 __ovld __cnfn convert_double16_rtz(long16);\n" |
33584 | "double16 __ovld __cnfn convert_double16_rtz(short16);\n" |
33585 | "double16 __ovld __cnfn convert_double16_rtz(uchar16);\n" |
33586 | "double16 __ovld __cnfn convert_double16_rtz(uint16);\n" |
33587 | "double16 __ovld __cnfn convert_double16_rtz(ulong16);\n" |
33588 | "double16 __ovld __cnfn convert_double16_rtz(ushort16);\n" |
33589 | "#endif //cl_khr_fp64\n" |
33590 | "\n" |
33591 | "#ifdef cl_khr_fp16\n" |
33592 | "// Convert half types to non-double types.\n" |
33593 | "uchar __ovld __cnfn convert_uchar(half);\n" |
33594 | "uchar __ovld __cnfn convert_uchar_rte(half);\n" |
33595 | "uchar __ovld __cnfn convert_uchar_rtp(half);\n" |
33596 | "uchar __ovld __cnfn convert_uchar_rtn(half);\n" |
33597 | "uchar __ovld __cnfn convert_uchar_rtz(half);\n" |
33598 | "uchar __ovld __cnfn convert_uchar_sat(half);\n" |
33599 | "uchar __ovld __cnfn convert_uchar_sat_rte(half);\n" |
33600 | "uchar __ovld __cnfn convert_uchar_sat_rtp(half);\n" |
33601 | "uchar __ovld __cnfn convert_uchar_sat_rtn(half);\n" |
33602 | "uchar __ovld __cnfn convert_uchar_sat_rtz(half);\n" |
33603 | "uchar2 __ovld __cnfn convert_uchar2(half2);\n" |
33604 | "uchar2 __ovld __cnfn convert_uchar2_rte(half2);\n" |
33605 | "uchar2 __ovld __cnfn convert_uchar2_rtp(half2);\n" |
33606 | "uchar2 __ovld __cnfn convert_uchar2_rtn(half2);\n" |
33607 | "uchar2 __ovld __cnfn convert_uchar2_rtz(half2);\n" |
33608 | "uchar2 __ovld __cnfn convert_uchar2_sat(half2);\n" |
33609 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(half2);\n" |
33610 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(half2);\n" |
33611 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(half2);\n" |
33612 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(half2);\n" |
33613 | "uchar3 __ovld __cnfn convert_uchar3(half3);\n" |
33614 | "uchar3 __ovld __cnfn convert_uchar3_rte(half3);\n" |
33615 | "uchar3 __ovld __cnfn convert_uchar3_rtp(half3);\n" |
33616 | "uchar3 __ovld __cnfn convert_uchar3_rtn(half3);\n" |
33617 | "uchar3 __ovld __cnfn convert_uchar3_rtz(half3);\n" |
33618 | "uchar3 __ovld __cnfn convert_uchar3_sat(half3);\n" |
33619 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(half3);\n" |
33620 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(half3);\n" |
33621 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(half3);\n" |
33622 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(half3);\n" |
33623 | "uchar4 __ovld __cnfn convert_uchar4(half4);\n" |
33624 | "uchar4 __ovld __cnfn convert_uchar4_rte(half4);\n" |
33625 | "uchar4 __ovld __cnfn convert_uchar4_rtp(half4);\n" |
33626 | "uchar4 __ovld __cnfn convert_uchar4_rtn(half4);\n" |
33627 | "uchar4 __ovld __cnfn convert_uchar4_rtz(half4);\n" |
33628 | "uchar4 __ovld __cnfn convert_uchar4_sat(half4);\n" |
33629 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(half4);\n" |
33630 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(half4);\n" |
33631 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(half4);\n" |
33632 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(half4);\n" |
33633 | "uchar8 __ovld __cnfn convert_uchar8(half8);\n" |
33634 | "uchar8 __ovld __cnfn convert_uchar8_rte(half8);\n" |
33635 | "uchar8 __ovld __cnfn convert_uchar8_rtp(half8);\n" |
33636 | "uchar8 __ovld __cnfn convert_uchar8_rtn(half8);\n" |
33637 | "uchar8 __ovld __cnfn convert_uchar8_rtz(half8);\n" |
33638 | "uchar8 __ovld __cnfn convert_uchar8_sat(half8);\n" |
33639 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(half8);\n" |
33640 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(half8);\n" |
33641 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(half8);\n" |
33642 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(half8);\n" |
33643 | "uchar16 __ovld __cnfn convert_uchar16(half16);\n" |
33644 | "uchar16 __ovld __cnfn convert_uchar16_rte(half16);\n" |
33645 | "uchar16 __ovld __cnfn convert_uchar16_rtp(half16);\n" |
33646 | "uchar16 __ovld __cnfn convert_uchar16_rtn(half16);\n" |
33647 | "uchar16 __ovld __cnfn convert_uchar16_rtz(half16);\n" |
33648 | "uchar16 __ovld __cnfn convert_uchar16_sat(half16);\n" |
33649 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(half16);\n" |
33650 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(half16);\n" |
33651 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(half16);\n" |
33652 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(half16);\n" |
33653 | "ushort __ovld __cnfn convert_ushort(half);\n" |
33654 | "ushort __ovld __cnfn convert_ushort_rte(half);\n" |
33655 | "ushort __ovld __cnfn convert_ushort_rtp(half);\n" |
33656 | "ushort __ovld __cnfn convert_ushort_rtn(half);\n" |
33657 | "ushort __ovld __cnfn convert_ushort_rtz(half);\n" |
33658 | "ushort __ovld __cnfn convert_ushort_sat(half);\n" |
33659 | "ushort __ovld __cnfn convert_ushort_sat_rte(half);\n" |
33660 | "ushort __ovld __cnfn convert_ushort_sat_rtp(half);\n" |
33661 | "ushort __ovld __cnfn convert_ushort_sat_rtn(half);\n" |
33662 | "ushort __ovld __cnfn convert_ushort_sat_rtz(half);\n" |
33663 | "ushort2 __ovld __cnfn convert_ushort2(half2);\n" |
33664 | "ushort2 __ovld __cnfn convert_ushort2_rte(half2);\n" |
33665 | "ushort2 __ovld __cnfn convert_ushort2_rtp(half2);\n" |
33666 | "ushort2 __ovld __cnfn convert_ushort2_rtn(half2);\n" |
33667 | "ushort2 __ovld __cnfn convert_ushort2_rtz(half2);\n" |
33668 | "ushort2 __ovld __cnfn convert_ushort2_sat(half2);\n" |
33669 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(half2);\n" |
33670 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(half2);\n" |
33671 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(half2);\n" |
33672 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(half2);\n" |
33673 | "ushort3 __ovld __cnfn convert_ushort3(half3);\n" |
33674 | "ushort3 __ovld __cnfn convert_ushort3_rte(half3);\n" |
33675 | "ushort3 __ovld __cnfn convert_ushort3_rtp(half3);\n" |
33676 | "ushort3 __ovld __cnfn convert_ushort3_rtn(half3);\n" |
33677 | "ushort3 __ovld __cnfn convert_ushort3_rtz(half3);\n" |
33678 | "ushort3 __ovld __cnfn convert_ushort3_sat(half3);\n" |
33679 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(half3);\n" |
33680 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(half3);\n" |
33681 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(half3);\n" |
33682 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(half3);\n" |
33683 | "ushort4 __ovld __cnfn convert_ushort4(half4);\n" |
33684 | "ushort4 __ovld __cnfn convert_ushort4_rte(half4);\n" |
33685 | "ushort4 __ovld __cnfn convert_ushort4_rtp(half4);\n" |
33686 | "ushort4 __ovld __cnfn convert_ushort4_rtn(half4);\n" |
33687 | "ushort4 __ovld __cnfn convert_ushort4_rtz(half4);\n" |
33688 | "ushort4 __ovld __cnfn convert_ushort4_sat(half4);\n" |
33689 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(half4);\n" |
33690 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(half4);\n" |
33691 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(half4);\n" |
33692 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(half4);\n" |
33693 | "ushort8 __ovld __cnfn convert_ushort8(half8);\n" |
33694 | "ushort8 __ovld __cnfn convert_ushort8_rte(half8);\n" |
33695 | "ushort8 __ovld __cnfn convert_ushort8_rtp(half8);\n" |
33696 | "ushort8 __ovld __cnfn convert_ushort8_rtn(half8);\n" |
33697 | "ushort8 __ovld __cnfn convert_ushort8_rtz(half8);\n" |
33698 | "ushort8 __ovld __cnfn convert_ushort8_sat(half8);\n" |
33699 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(half8);\n" |
33700 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(half8);\n" |
33701 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(half8);\n" |
33702 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(half8);\n" |
33703 | "ushort16 __ovld __cnfn convert_ushort16(half16);\n" |
33704 | "ushort16 __ovld __cnfn convert_ushort16_rte(half16);\n" |
33705 | "ushort16 __ovld __cnfn convert_ushort16_rtp(half16);\n" |
33706 | "ushort16 __ovld __cnfn convert_ushort16_rtn(half16);\n" |
33707 | "ushort16 __ovld __cnfn convert_ushort16_rtz(half16);\n" |
33708 | "ushort16 __ovld __cnfn convert_ushort16_sat(half16);\n" |
33709 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(half16);\n" |
33710 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(half16);\n" |
33711 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(half16);\n" |
33712 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(half16);\n" |
33713 | "uint __ovld __cnfn convert_uint(half);\n" |
33714 | "uint __ovld __cnfn convert_uint_rte(half);\n" |
33715 | "uint __ovld __cnfn convert_uint_rtp(half);\n" |
33716 | "uint __ovld __cnfn convert_uint_rtn(half);\n" |
33717 | "uint __ovld __cnfn convert_uint_rtz(half);\n" |
33718 | "uint __ovld __cnfn convert_uint_sat(half);\n" |
33719 | "uint __ovld __cnfn convert_uint_sat_rte(half);\n" |
33720 | "uint __ovld __cnfn convert_uint_sat_rtp(half);\n" |
33721 | "uint __ovld __cnfn convert_uint_sat_rtn(half);\n" |
33722 | "uint __ovld __cnfn convert_uint_sat_rtz(half);\n" |
33723 | "uint2 __ovld __cnfn convert_uint2(half2);\n" |
33724 | "uint2 __ovld __cnfn convert_uint2_rte(half2);\n" |
33725 | "uint2 __ovld __cnfn convert_uint2_rtp(half2);\n" |
33726 | "uint2 __ovld __cnfn convert_uint2_rtn(half2);\n" |
33727 | "uint2 __ovld __cnfn convert_uint2_rtz(half2);\n" |
33728 | "uint2 __ovld __cnfn convert_uint2_sat(half2);\n" |
33729 | "uint2 __ovld __cnfn convert_uint2_sat_rte(half2);\n" |
33730 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(half2);\n" |
33731 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(half2);\n" |
33732 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(half2);\n" |
33733 | "uint3 __ovld __cnfn convert_uint3(half3);\n" |
33734 | "uint3 __ovld __cnfn convert_uint3_rte(half3);\n" |
33735 | "uint3 __ovld __cnfn convert_uint3_rtp(half3);\n" |
33736 | "uint3 __ovld __cnfn convert_uint3_rtn(half3);\n" |
33737 | "uint3 __ovld __cnfn convert_uint3_rtz(half3);\n" |
33738 | "uint3 __ovld __cnfn convert_uint3_sat(half3);\n" |
33739 | "uint3 __ovld __cnfn convert_uint3_sat_rte(half3);\n" |
33740 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(half3);\n" |
33741 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(half3);\n" |
33742 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(half3);\n" |
33743 | "uint4 __ovld __cnfn convert_uint4(half4);\n" |
33744 | "uint4 __ovld __cnfn convert_uint4_rte(half4);\n" |
33745 | "uint4 __ovld __cnfn convert_uint4_rtp(half4);\n" |
33746 | "uint4 __ovld __cnfn convert_uint4_rtn(half4);\n" |
33747 | "uint4 __ovld __cnfn convert_uint4_rtz(half4);\n" |
33748 | "uint4 __ovld __cnfn convert_uint4_sat(half4);\n" |
33749 | "uint4 __ovld __cnfn convert_uint4_sat_rte(half4);\n" |
33750 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(half4);\n" |
33751 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(half4);\n" |
33752 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(half4);\n" |
33753 | "uint8 __ovld __cnfn convert_uint8(half8);\n" |
33754 | "uint8 __ovld __cnfn convert_uint8_rte(half8);\n" |
33755 | "uint8 __ovld __cnfn convert_uint8_rtp(half8);\n" |
33756 | "uint8 __ovld __cnfn convert_uint8_rtn(half8);\n" |
33757 | "uint8 __ovld __cnfn convert_uint8_rtz(half8);\n" |
33758 | "uint8 __ovld __cnfn convert_uint8_sat(half8);\n" |
33759 | "uint8 __ovld __cnfn convert_uint8_sat_rte(half8);\n" |
33760 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(half8);\n" |
33761 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(half8);\n" |
33762 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(half8);\n" |
33763 | "uint16 __ovld __cnfn convert_uint16(half16);\n" |
33764 | "uint16 __ovld __cnfn convert_uint16_rte(half16);\n" |
33765 | "uint16 __ovld __cnfn convert_uint16_rtp(half16);\n" |
33766 | "uint16 __ovld __cnfn convert_uint16_rtn(half16);\n" |
33767 | "uint16 __ovld __cnfn convert_uint16_rtz(half16);\n" |
33768 | "uint16 __ovld __cnfn convert_uint16_sat(half16);\n" |
33769 | "uint16 __ovld __cnfn convert_uint16_sat_rte(half16);\n" |
33770 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(half16);\n" |
33771 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(half16);\n" |
33772 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(half16);\n" |
33773 | "ulong __ovld __cnfn convert_ulong(half);\n" |
33774 | "ulong __ovld __cnfn convert_ulong_rte(half);\n" |
33775 | "ulong __ovld __cnfn convert_ulong_rtp(half);\n" |
33776 | "ulong __ovld __cnfn convert_ulong_rtn(half);\n" |
33777 | "ulong __ovld __cnfn convert_ulong_rtz(half);\n" |
33778 | "ulong __ovld __cnfn convert_ulong_sat(half);\n" |
33779 | "ulong __ovld __cnfn convert_ulong_sat_rte(half);\n" |
33780 | "ulong __ovld __cnfn convert_ulong_sat_rtp(half);\n" |
33781 | "ulong __ovld __cnfn convert_ulong_sat_rtn(half);\n" |
33782 | "ulong __ovld __cnfn convert_ulong_sat_rtz(half);\n" |
33783 | "ulong2 __ovld __cnfn convert_ulong2(half2);\n" |
33784 | "ulong2 __ovld __cnfn convert_ulong2_rte(half2);\n" |
33785 | "ulong2 __ovld __cnfn convert_ulong2_rtp(half2);\n" |
33786 | "ulong2 __ovld __cnfn convert_ulong2_rtn(half2);\n" |
33787 | "ulong2 __ovld __cnfn convert_ulong2_rtz(half2);\n" |
33788 | "ulong2 __ovld __cnfn convert_ulong2_sat(half2);\n" |
33789 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(half2);\n" |
33790 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(half2);\n" |
33791 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(half2);\n" |
33792 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(half2);\n" |
33793 | "ulong3 __ovld __cnfn convert_ulong3(half3);\n" |
33794 | "ulong3 __ovld __cnfn convert_ulong3_rte(half3);\n" |
33795 | "ulong3 __ovld __cnfn convert_ulong3_rtp(half3);\n" |
33796 | "ulong3 __ovld __cnfn convert_ulong3_rtn(half3);\n" |
33797 | "ulong3 __ovld __cnfn convert_ulong3_rtz(half3);\n" |
33798 | "ulong3 __ovld __cnfn convert_ulong3_sat(half3);\n" |
33799 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(half3);\n" |
33800 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(half3);\n" |
33801 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(half3);\n" |
33802 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(half3);\n" |
33803 | "ulong4 __ovld __cnfn convert_ulong4(half4);\n" |
33804 | "ulong4 __ovld __cnfn convert_ulong4_rte(half4);\n" |
33805 | "ulong4 __ovld __cnfn convert_ulong4_rtp(half4);\n" |
33806 | "ulong4 __ovld __cnfn convert_ulong4_rtn(half4);\n" |
33807 | "ulong4 __ovld __cnfn convert_ulong4_rtz(half4);\n" |
33808 | "ulong4 __ovld __cnfn convert_ulong4_sat(half4);\n" |
33809 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(half4);\n" |
33810 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(half4);\n" |
33811 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(half4);\n" |
33812 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(half4);\n" |
33813 | "ulong8 __ovld __cnfn convert_ulong8(half8);\n" |
33814 | "ulong8 __ovld __cnfn convert_ulong8_rte(half8);\n" |
33815 | "ulong8 __ovld __cnfn convert_ulong8_rtp(half8);\n" |
33816 | "ulong8 __ovld __cnfn convert_ulong8_rtn(half8);\n" |
33817 | "ulong8 __ovld __cnfn convert_ulong8_rtz(half8);\n" |
33818 | "ulong8 __ovld __cnfn convert_ulong8_sat(half8);\n" |
33819 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(half8);\n" |
33820 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(half8);\n" |
33821 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(half8);\n" |
33822 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(half8);\n" |
33823 | "ulong16 __ovld __cnfn convert_ulong16(half16);\n" |
33824 | "ulong16 __ovld __cnfn convert_ulong16_rte(half16);\n" |
33825 | "ulong16 __ovld __cnfn convert_ulong16_rtp(half16);\n" |
33826 | "ulong16 __ovld __cnfn convert_ulong16_rtn(half16);\n" |
33827 | "ulong16 __ovld __cnfn convert_ulong16_rtz(half16);\n" |
33828 | "ulong16 __ovld __cnfn convert_ulong16_sat(half16);\n" |
33829 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(half16);\n" |
33830 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(half16);\n" |
33831 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(half16);\n" |
33832 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(half16);\n" |
33833 | "char __ovld __cnfn convert_char(half);\n" |
33834 | "char __ovld __cnfn convert_char_rte(half);\n" |
33835 | "char __ovld __cnfn convert_char_rtp(half);\n" |
33836 | "char __ovld __cnfn convert_char_rtn(half);\n" |
33837 | "char __ovld __cnfn convert_char_rtz(half);\n" |
33838 | "char __ovld __cnfn convert_char_sat(half);\n" |
33839 | "char __ovld __cnfn convert_char_sat_rte(half);\n" |
33840 | "char __ovld __cnfn convert_char_sat_rtp(half);\n" |
33841 | "char __ovld __cnfn convert_char_sat_rtn(half);\n" |
33842 | "char __ovld __cnfn convert_char_sat_rtz(half);\n" |
33843 | "char2 __ovld __cnfn convert_char2(half2);\n" |
33844 | "char2 __ovld __cnfn convert_char2_rte(half2);\n" |
33845 | "char2 __ovld __cnfn convert_char2_rtp(half2);\n" |
33846 | "char2 __ovld __cnfn convert_char2_rtn(half2);\n" |
33847 | "char2 __ovld __cnfn convert_char2_rtz(half2);\n" |
33848 | "char2 __ovld __cnfn convert_char2_sat(half2);\n" |
33849 | "char2 __ovld __cnfn convert_char2_sat_rte(half2);\n" |
33850 | "char2 __ovld __cnfn convert_char2_sat_rtp(half2);\n" |
33851 | "char2 __ovld __cnfn convert_char2_sat_rtn(half2);\n" |
33852 | "char2 __ovld __cnfn convert_char2_sat_rtz(half2);\n" |
33853 | "char3 __ovld __cnfn convert_char3(half3);\n" |
33854 | "char3 __ovld __cnfn convert_char3_rte(half3);\n" |
33855 | "char3 __ovld __cnfn convert_char3_rtp(half3);\n" |
33856 | "char3 __ovld __cnfn convert_char3_rtn(half3);\n" |
33857 | "char3 __ovld __cnfn convert_char3_rtz(half3);\n" |
33858 | "char3 __ovld __cnfn convert_char3_sat(half3);\n" |
33859 | "char3 __ovld __cnfn convert_char3_sat_rte(half3);\n" |
33860 | "char3 __ovld __cnfn convert_char3_sat_rtp(half3);\n" |
33861 | "char3 __ovld __cnfn convert_char3_sat_rtn(half3);\n" |
33862 | "char3 __ovld __cnfn convert_char3_sat_rtz(half3);\n" |
33863 | "char4 __ovld __cnfn convert_char4(half4);\n" |
33864 | "char4 __ovld __cnfn convert_char4_rte(half4);\n" |
33865 | "char4 __ovld __cnfn convert_char4_rtp(half4);\n" |
33866 | "char4 __ovld __cnfn convert_char4_rtn(half4);\n" |
33867 | "char4 __ovld __cnfn convert_char4_rtz(half4);\n" |
33868 | "char4 __ovld __cnfn convert_char4_sat(half4);\n" |
33869 | "char4 __ovld __cnfn convert_char4_sat_rte(half4);\n" |
33870 | "char4 __ovld __cnfn convert_char4_sat_rtp(half4);\n" |
33871 | "char4 __ovld __cnfn convert_char4_sat_rtn(half4);\n" |
33872 | "char4 __ovld __cnfn convert_char4_sat_rtz(half4);\n" |
33873 | "char8 __ovld __cnfn convert_char8(half8);\n" |
33874 | "char8 __ovld __cnfn convert_char8_rte(half8);\n" |
33875 | "char8 __ovld __cnfn convert_char8_rtp(half8);\n" |
33876 | "char8 __ovld __cnfn convert_char8_rtn(half8);\n" |
33877 | "char8 __ovld __cnfn convert_char8_rtz(half8);\n" |
33878 | "char8 __ovld __cnfn convert_char8_sat(half8);\n" |
33879 | "char8 __ovld __cnfn convert_char8_sat_rte(half8);\n" |
33880 | "char8 __ovld __cnfn convert_char8_sat_rtp(half8);\n" |
33881 | "char8 __ovld __cnfn convert_char8_sat_rtn(half8);\n" |
33882 | "char8 __ovld __cnfn convert_char8_sat_rtz(half8);\n" |
33883 | "char16 __ovld __cnfn convert_char16(half16);\n" |
33884 | "char16 __ovld __cnfn convert_char16_rte(half16);\n" |
33885 | "char16 __ovld __cnfn convert_char16_rtp(half16);\n" |
33886 | "char16 __ovld __cnfn convert_char16_rtn(half16);\n" |
33887 | "char16 __ovld __cnfn convert_char16_rtz(half16);\n" |
33888 | "char16 __ovld __cnfn convert_char16_sat(half16);\n" |
33889 | "char16 __ovld __cnfn convert_char16_sat_rte(half16);\n" |
33890 | "char16 __ovld __cnfn convert_char16_sat_rtp(half16);\n" |
33891 | "char16 __ovld __cnfn convert_char16_sat_rtn(half16);\n" |
33892 | "char16 __ovld __cnfn convert_char16_sat_rtz(half16);\n" |
33893 | "short __ovld __cnfn convert_short(half);\n" |
33894 | "short __ovld __cnfn convert_short_rte(half);\n" |
33895 | "short __ovld __cnfn convert_short_rtp(half);\n" |
33896 | "short __ovld __cnfn convert_short_rtn(half);\n" |
33897 | "short __ovld __cnfn convert_short_rtz(half);\n" |
33898 | "short __ovld __cnfn convert_short_sat(half);\n" |
33899 | "short __ovld __cnfn convert_short_sat_rte(half);\n" |
33900 | "short __ovld __cnfn convert_short_sat_rtp(half);\n" |
33901 | "short __ovld __cnfn convert_short_sat_rtn(half);\n" |
33902 | "short __ovld __cnfn convert_short_sat_rtz(half);\n" |
33903 | "short2 __ovld __cnfn convert_short2(half2);\n" |
33904 | "short2 __ovld __cnfn convert_short2_rte(half2);\n" |
33905 | "short2 __ovld __cnfn convert_short2_rtp(half2);\n" |
33906 | "short2 __ovld __cnfn convert_short2_rtn(half2);\n" |
33907 | "short2 __ovld __cnfn convert_short2_rtz(half2);\n" |
33908 | "short2 __ovld __cnfn convert_short2_sat(half2);\n" |
33909 | "short2 __ovld __cnfn convert_short2_sat_rte(half2);\n" |
33910 | "short2 __ovld __cnfn convert_short2_sat_rtp(half2);\n" |
33911 | "short2 __ovld __cnfn convert_short2_sat_rtn(half2);\n" |
33912 | "short2 __ovld __cnfn convert_short2_sat_rtz(half2);\n" |
33913 | "short3 __ovld __cnfn convert_short3(half3);\n" |
33914 | "short3 __ovld __cnfn convert_short3_rte(half3);\n" |
33915 | "short3 __ovld __cnfn convert_short3_rtp(half3);\n" |
33916 | "short3 __ovld __cnfn convert_short3_rtn(half3);\n" |
33917 | "short3 __ovld __cnfn convert_short3_rtz(half3);\n" |
33918 | "short3 __ovld __cnfn convert_short3_sat(half3);\n" |
33919 | "short3 __ovld __cnfn convert_short3_sat_rte(half3);\n" |
33920 | "short3 __ovld __cnfn convert_short3_sat_rtp(half3);\n" |
33921 | "short3 __ovld __cnfn convert_short3_sat_rtn(half3);\n" |
33922 | "short3 __ovld __cnfn convert_short3_sat_rtz(half3);\n" |
33923 | "short4 __ovld __cnfn convert_short4(half4);\n" |
33924 | "short4 __ovld __cnfn convert_short4_rte(half4);\n" |
33925 | "short4 __ovld __cnfn convert_short4_rtp(half4);\n" |
33926 | "short4 __ovld __cnfn convert_short4_rtn(half4);\n" |
33927 | "short4 __ovld __cnfn convert_short4_rtz(half4);\n" |
33928 | "short4 __ovld __cnfn convert_short4_sat(half4);\n" |
33929 | "short4 __ovld __cnfn convert_short4_sat_rte(half4);\n" |
33930 | "short4 __ovld __cnfn convert_short4_sat_rtp(half4);\n" |
33931 | "short4 __ovld __cnfn convert_short4_sat_rtn(half4);\n" |
33932 | "short4 __ovld __cnfn convert_short4_sat_rtz(half4);\n" |
33933 | "short8 __ovld __cnfn convert_short8(half8);\n" |
33934 | "short8 __ovld __cnfn convert_short8_rte(half8);\n" |
33935 | "short8 __ovld __cnfn convert_short8_rtp(half8);\n" |
33936 | "short8 __ovld __cnfn convert_short8_rtn(half8);\n" |
33937 | "short8 __ovld __cnfn convert_short8_rtz(half8);\n" |
33938 | "short8 __ovld __cnfn convert_short8_sat(half8);\n" |
33939 | "short8 __ovld __cnfn convert_short8_sat_rte(half8);\n" |
33940 | "short8 __ovld __cnfn convert_short8_sat_rtp(half8);\n" |
33941 | "short8 __ovld __cnfn convert_short8_sat_rtn(half8);\n" |
33942 | "short8 __ovld __cnfn convert_short8_sat_rtz(half8);\n" |
33943 | "short16 __ovld __cnfn convert_short16(half16);\n" |
33944 | "short16 __ovld __cnfn convert_short16_rte(half16);\n" |
33945 | "short16 __ovld __cnfn convert_short16_rtp(half16);\n" |
33946 | "short16 __ovld __cnfn convert_short16_rtn(half16);\n" |
33947 | "short16 __ovld __cnfn convert_short16_rtz(half16);\n" |
33948 | "short16 __ovld __cnfn convert_short16_sat(half16);\n" |
33949 | "short16 __ovld __cnfn convert_short16_sat_rte(half16);\n" |
33950 | "short16 __ovld __cnfn convert_short16_sat_rtp(half16);\n" |
33951 | "short16 __ovld __cnfn convert_short16_sat_rtn(half16);\n" |
33952 | "short16 __ovld __cnfn convert_short16_sat_rtz(half16);\n" |
33953 | "int __ovld __cnfn convert_int(half);\n" |
33954 | "int __ovld __cnfn convert_int_rte(half);\n" |
33955 | "int __ovld __cnfn convert_int_rtp(half);\n" |
33956 | "int __ovld __cnfn convert_int_rtn(half);\n" |
33957 | "int __ovld __cnfn convert_int_rtz(half);\n" |
33958 | "int __ovld __cnfn convert_int_sat(half);\n" |
33959 | "int __ovld __cnfn convert_int_sat_rte(half);\n" |
33960 | "int __ovld __cnfn convert_int_sat_rtp(half);\n" |
33961 | "int __ovld __cnfn convert_int_sat_rtn(half);\n" |
33962 | "int __ovld __cnfn convert_int_sat_rtz(half);\n" |
33963 | "int2 __ovld __cnfn convert_int2(half2);\n" |
33964 | "int2 __ovld __cnfn convert_int2_rte(half2);\n" |
33965 | "int2 __ovld __cnfn convert_int2_rtp(half2);\n" |
33966 | "int2 __ovld __cnfn convert_int2_rtn(half2);\n" |
33967 | "int2 __ovld __cnfn convert_int2_rtz(half2);\n" |
33968 | "int2 __ovld __cnfn convert_int2_sat(half2);\n" |
33969 | "int2 __ovld __cnfn convert_int2_sat_rte(half2);\n" |
33970 | "int2 __ovld __cnfn convert_int2_sat_rtp(half2);\n" |
33971 | "int2 __ovld __cnfn convert_int2_sat_rtn(half2);\n" |
33972 | "int2 __ovld __cnfn convert_int2_sat_rtz(half2);\n" |
33973 | "int3 __ovld __cnfn convert_int3(half3);\n" |
33974 | "int3 __ovld __cnfn convert_int3_rte(half3);\n" |
33975 | "int3 __ovld __cnfn convert_int3_rtp(half3);\n" |
33976 | "int3 __ovld __cnfn convert_int3_rtn(half3);\n" |
33977 | "int3 __ovld __cnfn convert_int3_rtz(half3);\n" |
33978 | "int3 __ovld __cnfn convert_int3_sat(half3);\n" |
33979 | "int3 __ovld __cnfn convert_int3_sat_rte(half3);\n" |
33980 | "int3 __ovld __cnfn convert_int3_sat_rtp(half3);\n" |
33981 | "int3 __ovld __cnfn convert_int3_sat_rtn(half3);\n" |
33982 | "int3 __ovld __cnfn convert_int3_sat_rtz(half3);\n" |
33983 | "int4 __ovld __cnfn convert_int4(half4);\n" |
33984 | "int4 __ovld __cnfn convert_int4_rte(half4);\n" |
33985 | "int4 __ovld __cnfn convert_int4_rtp(half4);\n" |
33986 | "int4 __ovld __cnfn convert_int4_rtn(half4);\n" |
33987 | "int4 __ovld __cnfn convert_int4_rtz(half4);\n" |
33988 | "int4 __ovld __cnfn convert_int4_sat(half4);\n" |
33989 | "int4 __ovld __cnfn convert_int4_sat_rte(half4);\n" |
33990 | "int4 __ovld __cnfn convert_int4_sat_rtp(half4);\n" |
33991 | "int4 __ovld __cnfn convert_int4_sat_rtn(half4);\n" |
33992 | "int4 __ovld __cnfn convert_int4_sat_rtz(half4);\n" |
33993 | "int8 __ovld __cnfn convert_int8(half8);\n" |
33994 | "int8 __ovld __cnfn convert_int8_rte(half8);\n" |
33995 | "int8 __ovld __cnfn convert_int8_rtp(half8);\n" |
33996 | "int8 __ovld __cnfn convert_int8_rtn(half8);\n" |
33997 | "int8 __ovld __cnfn convert_int8_rtz(half8);\n" |
33998 | "int8 __ovld __cnfn convert_int8_sat(half8);\n" |
33999 | "int8 __ovld __cnfn convert_int8_sat_rte(half8);\n" |
34000 | "int8 __ovld __cnfn convert_int8_sat_rtp(half8);\n" |
34001 | "int8 __ovld __cnfn convert_int8_sat_rtn(half8);\n" |
34002 | "int8 __ovld __cnfn convert_int8_sat_rtz(half8);\n" |
34003 | "int16 __ovld __cnfn convert_int16(half16);\n" |
34004 | "int16 __ovld __cnfn convert_int16_rte(half16);\n" |
34005 | "int16 __ovld __cnfn convert_int16_rtp(half16);\n" |
34006 | "int16 __ovld __cnfn convert_int16_rtn(half16);\n" |
34007 | "int16 __ovld __cnfn convert_int16_rtz(half16);\n" |
34008 | "int16 __ovld __cnfn convert_int16_sat(half16);\n" |
34009 | "int16 __ovld __cnfn convert_int16_sat_rte(half16);\n" |
34010 | "int16 __ovld __cnfn convert_int16_sat_rtp(half16);\n" |
34011 | "int16 __ovld __cnfn convert_int16_sat_rtn(half16);\n" |
34012 | "int16 __ovld __cnfn convert_int16_sat_rtz(half16);\n" |
34013 | "long __ovld __cnfn convert_long(half);\n" |
34014 | "long __ovld __cnfn convert_long_rte(half);\n" |
34015 | "long __ovld __cnfn convert_long_rtp(half);\n" |
34016 | "long __ovld __cnfn convert_long_rtn(half);\n" |
34017 | "long __ovld __cnfn convert_long_rtz(half);\n" |
34018 | "long __ovld __cnfn convert_long_sat(half);\n" |
34019 | "long __ovld __cnfn convert_long_sat_rte(half);\n" |
34020 | "long __ovld __cnfn convert_long_sat_rtp(half);\n" |
34021 | "long __ovld __cnfn convert_long_sat_rtn(half);\n" |
34022 | "long __ovld __cnfn convert_long_sat_rtz(half);\n" |
34023 | "long2 __ovld __cnfn convert_long2(half2);\n" |
34024 | "long2 __ovld __cnfn convert_long2_rte(half2);\n" |
34025 | "long2 __ovld __cnfn convert_long2_rtp(half2);\n" |
34026 | "long2 __ovld __cnfn convert_long2_rtn(half2);\n" |
34027 | "long2 __ovld __cnfn convert_long2_rtz(half2);\n" |
34028 | "long2 __ovld __cnfn convert_long2_sat(half2);\n" |
34029 | "long2 __ovld __cnfn convert_long2_sat_rte(half2);\n" |
34030 | "long2 __ovld __cnfn convert_long2_sat_rtp(half2);\n" |
34031 | "long2 __ovld __cnfn convert_long2_sat_rtn(half2);\n" |
34032 | "long2 __ovld __cnfn convert_long2_sat_rtz(half2);\n" |
34033 | "long3 __ovld __cnfn convert_long3(half3);\n" |
34034 | "long3 __ovld __cnfn convert_long3_rte(half3);\n" |
34035 | "long3 __ovld __cnfn convert_long3_rtp(half3);\n" |
34036 | "long3 __ovld __cnfn convert_long3_rtn(half3);\n" |
34037 | "long3 __ovld __cnfn convert_long3_rtz(half3);\n" |
34038 | "long3 __ovld __cnfn convert_long3_sat(half3);\n" |
34039 | "long3 __ovld __cnfn convert_long3_sat_rte(half3);\n" |
34040 | "long3 __ovld __cnfn convert_long3_sat_rtp(half3);\n" |
34041 | "long3 __ovld __cnfn convert_long3_sat_rtn(half3);\n" |
34042 | "long3 __ovld __cnfn convert_long3_sat_rtz(half3);\n" |
34043 | "long4 __ovld __cnfn convert_long4(half4);\n" |
34044 | "long4 __ovld __cnfn convert_long4_rte(half4);\n" |
34045 | "long4 __ovld __cnfn convert_long4_rtp(half4);\n" |
34046 | "long4 __ovld __cnfn convert_long4_rtn(half4);\n" |
34047 | "long4 __ovld __cnfn convert_long4_rtz(half4);\n" |
34048 | "long4 __ovld __cnfn convert_long4_sat(half4);\n" |
34049 | "long4 __ovld __cnfn convert_long4_sat_rte(half4);\n" |
34050 | "long4 __ovld __cnfn convert_long4_sat_rtp(half4);\n" |
34051 | "long4 __ovld __cnfn convert_long4_sat_rtn(half4);\n" |
34052 | "long4 __ovld __cnfn convert_long4_sat_rtz(half4);\n" |
34053 | "long8 __ovld __cnfn convert_long8(half8);\n" |
34054 | "long8 __ovld __cnfn convert_long8_rte(half8);\n" |
34055 | "long8 __ovld __cnfn convert_long8_rtp(half8);\n" |
34056 | "long8 __ovld __cnfn convert_long8_rtn(half8);\n" |
34057 | "long8 __ovld __cnfn convert_long8_rtz(half8);\n" |
34058 | "long8 __ovld __cnfn convert_long8_sat(half8);\n" |
34059 | "long8 __ovld __cnfn convert_long8_sat_rte(half8);\n" |
34060 | "long8 __ovld __cnfn convert_long8_sat_rtp(half8);\n" |
34061 | "long8 __ovld __cnfn convert_long8_sat_rtn(half8);\n" |
34062 | "long8 __ovld __cnfn convert_long8_sat_rtz(half8);\n" |
34063 | "long16 __ovld __cnfn convert_long16(half16);\n" |
34064 | "long16 __ovld __cnfn convert_long16_rte(half16);\n" |
34065 | "long16 __ovld __cnfn convert_long16_rtp(half16);\n" |
34066 | "long16 __ovld __cnfn convert_long16_rtn(half16);\n" |
34067 | "long16 __ovld __cnfn convert_long16_rtz(half16);\n" |
34068 | "long16 __ovld __cnfn convert_long16_sat(half16);\n" |
34069 | "long16 __ovld __cnfn convert_long16_sat_rte(half16);\n" |
34070 | "long16 __ovld __cnfn convert_long16_sat_rtp(half16);\n" |
34071 | "long16 __ovld __cnfn convert_long16_sat_rtn(half16);\n" |
34072 | "long16 __ovld __cnfn convert_long16_sat_rtz(half16);\n" |
34073 | "float __ovld __cnfn convert_float(half);\n" |
34074 | "float __ovld __cnfn convert_float_rte(half);\n" |
34075 | "float __ovld __cnfn convert_float_rtp(half);\n" |
34076 | "float __ovld __cnfn convert_float_rtn(half);\n" |
34077 | "float __ovld __cnfn convert_float_rtz(half);\n" |
34078 | "float2 __ovld __cnfn convert_float2(half2);\n" |
34079 | "float2 __ovld __cnfn convert_float2_rte(half2);\n" |
34080 | "float2 __ovld __cnfn convert_float2_rtp(half2);\n" |
34081 | "float2 __ovld __cnfn convert_float2_rtn(half2);\n" |
34082 | "float2 __ovld __cnfn convert_float2_rtz(half2);\n" |
34083 | "float3 __ovld __cnfn convert_float3(half3);\n" |
34084 | "float3 __ovld __cnfn convert_float3_rte(half3);\n" |
34085 | "float3 __ovld __cnfn convert_float3_rtp(half3);\n" |
34086 | "float3 __ovld __cnfn convert_float3_rtn(half3);\n" |
34087 | "float3 __ovld __cnfn convert_float3_rtz(half3);\n" |
34088 | "float4 __ovld __cnfn convert_float4(half4);\n" |
34089 | "float4 __ovld __cnfn convert_float4_rte(half4);\n" |
34090 | "float4 __ovld __cnfn convert_float4_rtp(half4);\n" |
34091 | "float4 __ovld __cnfn convert_float4_rtn(half4);\n" |
34092 | "float4 __ovld __cnfn convert_float4_rtz(half4);\n" |
34093 | "float8 __ovld __cnfn convert_float8(half8);\n" |
34094 | "float8 __ovld __cnfn convert_float8_rte(half8);\n" |
34095 | "float8 __ovld __cnfn convert_float8_rtp(half8);\n" |
34096 | "float8 __ovld __cnfn convert_float8_rtn(half8);\n" |
34097 | "float8 __ovld __cnfn convert_float8_rtz(half8);\n" |
34098 | "float16 __ovld __cnfn convert_float16(half16);\n" |
34099 | "float16 __ovld __cnfn convert_float16_rte(half16);\n" |
34100 | "float16 __ovld __cnfn convert_float16_rtp(half16);\n" |
34101 | "float16 __ovld __cnfn convert_float16_rtn(half16);\n" |
34102 | "float16 __ovld __cnfn convert_float16_rtz(half16);\n" |
34103 | "\n" |
34104 | "// Convert non-double types to half types.\n" |
34105 | "half __ovld __cnfn convert_half(uchar);\n" |
34106 | "half __ovld __cnfn convert_half(ushort);\n" |
34107 | "half __ovld __cnfn convert_half(uint);\n" |
34108 | "half __ovld __cnfn convert_half(ulong);\n" |
34109 | "half __ovld __cnfn convert_half(char);\n" |
34110 | "half __ovld __cnfn convert_half(short);\n" |
34111 | "half __ovld __cnfn convert_half(int);\n" |
34112 | "half __ovld __cnfn convert_half(long);\n" |
34113 | "half __ovld __cnfn convert_half(float);\n" |
34114 | "half __ovld __cnfn convert_half(half);\n" |
34115 | "half __ovld __cnfn convert_half_rte(uchar);\n" |
34116 | "half __ovld __cnfn convert_half_rte(ushort);\n" |
34117 | "half __ovld __cnfn convert_half_rte(uint);\n" |
34118 | "half __ovld __cnfn convert_half_rte(ulong);\n" |
34119 | "half __ovld __cnfn convert_half_rte(char);\n" |
34120 | "half __ovld __cnfn convert_half_rte(short);\n" |
34121 | "half __ovld __cnfn convert_half_rte(int);\n" |
34122 | "half __ovld __cnfn convert_half_rte(long);\n" |
34123 | "half __ovld __cnfn convert_half_rte(float);\n" |
34124 | "half __ovld __cnfn convert_half_rte(half);\n" |
34125 | "half __ovld __cnfn convert_half_rtp(uchar);\n" |
34126 | "half __ovld __cnfn convert_half_rtp(ushort);\n" |
34127 | "half __ovld __cnfn convert_half_rtp(uint);\n" |
34128 | "half __ovld __cnfn convert_half_rtp(ulong);\n" |
34129 | "half __ovld __cnfn convert_half_rtp(char);\n" |
34130 | "half __ovld __cnfn convert_half_rtp(short);\n" |
34131 | "half __ovld __cnfn convert_half_rtp(int);\n" |
34132 | "half __ovld __cnfn convert_half_rtp(long);\n" |
34133 | "half __ovld __cnfn convert_half_rtp(float);\n" |
34134 | "half __ovld __cnfn convert_half_rtp(half);\n" |
34135 | "half __ovld __cnfn convert_half_rtn(uchar);\n" |
34136 | "half __ovld __cnfn convert_half_rtn(ushort);\n" |
34137 | "half __ovld __cnfn convert_half_rtn(uint);\n" |
34138 | "half __ovld __cnfn convert_half_rtn(ulong);\n" |
34139 | "half __ovld __cnfn convert_half_rtn(char);\n" |
34140 | "half __ovld __cnfn convert_half_rtn(short);\n" |
34141 | "half __ovld __cnfn convert_half_rtn(int);\n" |
34142 | "half __ovld __cnfn convert_half_rtn(long);\n" |
34143 | "half __ovld __cnfn convert_half_rtn(float);\n" |
34144 | "half __ovld __cnfn convert_half_rtn(half);\n" |
34145 | "half __ovld __cnfn convert_half_rtz(uchar);\n" |
34146 | "half __ovld __cnfn convert_half_rtz(ushort);\n" |
34147 | "half __ovld __cnfn convert_half_rtz(uint);\n" |
34148 | "half __ovld __cnfn convert_half_rtz(ulong);\n" |
34149 | "half __ovld __cnfn convert_half_rtz(char);\n" |
34150 | "half __ovld __cnfn convert_half_rtz(short);\n" |
34151 | "half __ovld __cnfn convert_half_rtz(int);\n" |
34152 | "half __ovld __cnfn convert_half_rtz(long);\n" |
34153 | "half __ovld __cnfn convert_half_rtz(float);\n" |
34154 | "half __ovld __cnfn convert_half_rtz(half);\n" |
34155 | "half2 __ovld __cnfn convert_half2(char2);\n" |
34156 | "half2 __ovld __cnfn convert_half2(uchar2);\n" |
34157 | "half2 __ovld __cnfn convert_half2(short2);\n" |
34158 | "half2 __ovld __cnfn convert_half2(ushort2);\n" |
34159 | "half2 __ovld __cnfn convert_half2(int2);\n" |
34160 | "half2 __ovld __cnfn convert_half2(uint2);\n" |
34161 | "half2 __ovld __cnfn convert_half2(long2);\n" |
34162 | "half2 __ovld __cnfn convert_half2(ulong2);\n" |
34163 | "half2 __ovld __cnfn convert_half2(float2);\n" |
34164 | "half2 __ovld __cnfn convert_half2(half2);\n" |
34165 | "half2 __ovld __cnfn convert_half2_rte(char2);\n" |
34166 | "half2 __ovld __cnfn convert_half2_rte(uchar2);\n" |
34167 | "half2 __ovld __cnfn convert_half2_rte(short2);\n" |
34168 | "half2 __ovld __cnfn convert_half2_rte(ushort2);\n" |
34169 | "half2 __ovld __cnfn convert_half2_rte(int2);\n" |
34170 | "half2 __ovld __cnfn convert_half2_rte(uint2);\n" |
34171 | "half2 __ovld __cnfn convert_half2_rte(long2);\n" |
34172 | "half2 __ovld __cnfn convert_half2_rte(ulong2);\n" |
34173 | "half2 __ovld __cnfn convert_half2_rte(float2);\n" |
34174 | "half2 __ovld __cnfn convert_half2_rte(half2);\n" |
34175 | "half2 __ovld __cnfn convert_half2_rtp(char2);\n" |
34176 | "half2 __ovld __cnfn convert_half2_rtp(uchar2);\n" |
34177 | "half2 __ovld __cnfn convert_half2_rtp(short2);\n" |
34178 | "half2 __ovld __cnfn convert_half2_rtp(ushort2);\n" |
34179 | "half2 __ovld __cnfn convert_half2_rtp(int2);\n" |
34180 | "half2 __ovld __cnfn convert_half2_rtp(uint2);\n" |
34181 | "half2 __ovld __cnfn convert_half2_rtp(long2);\n" |
34182 | "half2 __ovld __cnfn convert_half2_rtp(ulong2);\n" |
34183 | "half2 __ovld __cnfn convert_half2_rtp(float2);\n" |
34184 | "half2 __ovld __cnfn convert_half2_rtp(half2);\n" |
34185 | "half2 __ovld __cnfn convert_half2_rtn(char2);\n" |
34186 | "half2 __ovld __cnfn convert_half2_rtn(uchar2);\n" |
34187 | "half2 __ovld __cnfn convert_half2_rtn(short2);\n" |
34188 | "half2 __ovld __cnfn convert_half2_rtn(ushort2);\n" |
34189 | "half2 __ovld __cnfn convert_half2_rtn(int2);\n" |
34190 | "half2 __ovld __cnfn convert_half2_rtn(uint2);\n" |
34191 | "half2 __ovld __cnfn convert_half2_rtn(long2);\n" |
34192 | "half2 __ovld __cnfn convert_half2_rtn(ulong2);\n" |
34193 | "half2 __ovld __cnfn convert_half2_rtn(float2);\n" |
34194 | "half2 __ovld __cnfn convert_half2_rtn(half2);\n" |
34195 | "half2 __ovld __cnfn convert_half2_rtz(char2);\n" |
34196 | "half2 __ovld __cnfn convert_half2_rtz(uchar2);\n" |
34197 | "half2 __ovld __cnfn convert_half2_rtz(short2);\n" |
34198 | "half2 __ovld __cnfn convert_half2_rtz(ushort2);\n" |
34199 | "half2 __ovld __cnfn convert_half2_rtz(int2);\n" |
34200 | "half2 __ovld __cnfn convert_half2_rtz(uint2);\n" |
34201 | "half2 __ovld __cnfn convert_half2_rtz(long2);\n" |
34202 | "half2 __ovld __cnfn convert_half2_rtz(ulong2);\n" |
34203 | "half2 __ovld __cnfn convert_half2_rtz(float2);\n" |
34204 | "half2 __ovld __cnfn convert_half2_rtz(half2);\n" |
34205 | "half3 __ovld __cnfn convert_half3(char3);\n" |
34206 | "half3 __ovld __cnfn convert_half3(uchar3);\n" |
34207 | "half3 __ovld __cnfn convert_half3(short3);\n" |
34208 | "half3 __ovld __cnfn convert_half3(ushort3);\n" |
34209 | "half3 __ovld __cnfn convert_half3(int3);\n" |
34210 | "half3 __ovld __cnfn convert_half3(uint3);\n" |
34211 | "half3 __ovld __cnfn convert_half3(long3);\n" |
34212 | "half3 __ovld __cnfn convert_half3(ulong3);\n" |
34213 | "half3 __ovld __cnfn convert_half3(float3);\n" |
34214 | "half3 __ovld __cnfn convert_half3(half3);\n" |
34215 | "half3 __ovld __cnfn convert_half3_rte(char3);\n" |
34216 | "half3 __ovld __cnfn convert_half3_rte(uchar3);\n" |
34217 | "half3 __ovld __cnfn convert_half3_rte(short3);\n" |
34218 | "half3 __ovld __cnfn convert_half3_rte(ushort3);\n" |
34219 | "half3 __ovld __cnfn convert_half3_rte(int3);\n" |
34220 | "half3 __ovld __cnfn convert_half3_rte(uint3);\n" |
34221 | "half3 __ovld __cnfn convert_half3_rte(long3);\n" |
34222 | "half3 __ovld __cnfn convert_half3_rte(ulong3);\n" |
34223 | "half3 __ovld __cnfn convert_half3_rte(float3);\n" |
34224 | "half3 __ovld __cnfn convert_half3_rte(half3);\n" |
34225 | "half3 __ovld __cnfn convert_half3_rtp(char3);\n" |
34226 | "half3 __ovld __cnfn convert_half3_rtp(uchar3);\n" |
34227 | "half3 __ovld __cnfn convert_half3_rtp(short3);\n" |
34228 | "half3 __ovld __cnfn convert_half3_rtp(ushort3);\n" |
34229 | "half3 __ovld __cnfn convert_half3_rtp(int3);\n" |
34230 | "half3 __ovld __cnfn convert_half3_rtp(uint3);\n" |
34231 | "half3 __ovld __cnfn convert_half3_rtp(long3);\n" |
34232 | "half3 __ovld __cnfn convert_half3_rtp(ulong3);\n" |
34233 | "half3 __ovld __cnfn convert_half3_rtp(float3);\n" |
34234 | "half3 __ovld __cnfn convert_half3_rtp(half3);\n" |
34235 | "half3 __ovld __cnfn convert_half3_rtn(char3);\n" |
34236 | "half3 __ovld __cnfn convert_half3_rtn(uchar3);\n" |
34237 | "half3 __ovld __cnfn convert_half3_rtn(short3);\n" |
34238 | "half3 __ovld __cnfn convert_half3_rtn(ushort3);\n" |
34239 | "half3 __ovld __cnfn convert_half3_rtn(int3);\n" |
34240 | "half3 __ovld __cnfn convert_half3_rtn(uint3);\n" |
34241 | "half3 __ovld __cnfn convert_half3_rtn(long3);\n" |
34242 | "half3 __ovld __cnfn convert_half3_rtn(ulong3);\n" |
34243 | "half3 __ovld __cnfn convert_half3_rtn(float3);\n" |
34244 | "half3 __ovld __cnfn convert_half3_rtn(half3);\n" |
34245 | "half3 __ovld __cnfn convert_half3_rtz(char3);\n" |
34246 | "half3 __ovld __cnfn convert_half3_rtz(uchar3);\n" |
34247 | "half3 __ovld __cnfn convert_half3_rtz(short3);\n" |
34248 | "half3 __ovld __cnfn convert_half3_rtz(ushort3);\n" |
34249 | "half3 __ovld __cnfn convert_half3_rtz(int3);\n" |
34250 | "half3 __ovld __cnfn convert_half3_rtz(uint3);\n" |
34251 | "half3 __ovld __cnfn convert_half3_rtz(long3);\n" |
34252 | "half3 __ovld __cnfn convert_half3_rtz(ulong3);\n" |
34253 | "half3 __ovld __cnfn convert_half3_rtz(float3);\n" |
34254 | "half3 __ovld __cnfn convert_half3_rtz(half3);\n" |
34255 | "half4 __ovld __cnfn convert_half4(char4);\n" |
34256 | "half4 __ovld __cnfn convert_half4(uchar4);\n" |
34257 | "half4 __ovld __cnfn convert_half4(short4);\n" |
34258 | "half4 __ovld __cnfn convert_half4(ushort4);\n" |
34259 | "half4 __ovld __cnfn convert_half4(int4);\n" |
34260 | "half4 __ovld __cnfn convert_half4(uint4);\n" |
34261 | "half4 __ovld __cnfn convert_half4(long4);\n" |
34262 | "half4 __ovld __cnfn convert_half4(ulong4);\n" |
34263 | "half4 __ovld __cnfn convert_half4(float4);\n" |
34264 | "half4 __ovld __cnfn convert_half4(half4);\n" |
34265 | "half4 __ovld __cnfn convert_half4_rte(char4);\n" |
34266 | "half4 __ovld __cnfn convert_half4_rte(uchar4);\n" |
34267 | "half4 __ovld __cnfn convert_half4_rte(short4);\n" |
34268 | "half4 __ovld __cnfn convert_half4_rte(ushort4);\n" |
34269 | "half4 __ovld __cnfn convert_half4_rte(int4);\n" |
34270 | "half4 __ovld __cnfn convert_half4_rte(uint4);\n" |
34271 | "half4 __ovld __cnfn convert_half4_rte(long4);\n" |
34272 | "half4 __ovld __cnfn convert_half4_rte(ulong4);\n" |
34273 | "half4 __ovld __cnfn convert_half4_rte(float4);\n" |
34274 | "half4 __ovld __cnfn convert_half4_rte(half4);\n" |
34275 | "half4 __ovld __cnfn convert_half4_rtp(char4);\n" |
34276 | "half4 __ovld __cnfn convert_half4_rtp(uchar4);\n" |
34277 | "half4 __ovld __cnfn convert_half4_rtp(short4);\n" |
34278 | "half4 __ovld __cnfn convert_half4_rtp(ushort4);\n" |
34279 | "half4 __ovld __cnfn convert_half4_rtp(int4);\n" |
34280 | "half4 __ovld __cnfn convert_half4_rtp(uint4);\n" |
34281 | "half4 __ovld __cnfn convert_half4_rtp(long4);\n" |
34282 | "half4 __ovld __cnfn convert_half4_rtp(ulong4);\n" |
34283 | "half4 __ovld __cnfn convert_half4_rtp(float4);\n" |
34284 | "half4 __ovld __cnfn convert_half4_rtp(half4);\n" |
34285 | "half4 __ovld __cnfn convert_half4_rtn(char4);\n" |
34286 | "half4 __ovld __cnfn convert_half4_rtn(uchar4);\n" |
34287 | "half4 __ovld __cnfn convert_half4_rtn(short4);\n" |
34288 | "half4 __ovld __cnfn convert_half4_rtn(ushort4);\n" |
34289 | "half4 __ovld __cnfn convert_half4_rtn(int4);\n" |
34290 | "half4 __ovld __cnfn convert_half4_rtn(uint4);\n" |
34291 | "half4 __ovld __cnfn convert_half4_rtn(long4);\n" |
34292 | "half4 __ovld __cnfn convert_half4_rtn(ulong4);\n" |
34293 | "half4 __ovld __cnfn convert_half4_rtn(float4);\n" |
34294 | "half4 __ovld __cnfn convert_half4_rtn(half4);\n" |
34295 | "half4 __ovld __cnfn convert_half4_rtz(char4);\n" |
34296 | "half4 __ovld __cnfn convert_half4_rtz(uchar4);\n" |
34297 | "half4 __ovld __cnfn convert_half4_rtz(short4);\n" |
34298 | "half4 __ovld __cnfn convert_half4_rtz(ushort4);\n" |
34299 | "half4 __ovld __cnfn convert_half4_rtz(int4);\n" |
34300 | "half4 __ovld __cnfn convert_half4_rtz(uint4);\n" |
34301 | "half4 __ovld __cnfn convert_half4_rtz(long4);\n" |
34302 | "half4 __ovld __cnfn convert_half4_rtz(ulong4);\n" |
34303 | "half4 __ovld __cnfn convert_half4_rtz(float4);\n" |
34304 | "half4 __ovld __cnfn convert_half4_rtz(half4);\n" |
34305 | "half8 __ovld __cnfn convert_half8(char8);\n" |
34306 | "half8 __ovld __cnfn convert_half8(uchar8);\n" |
34307 | "half8 __ovld __cnfn convert_half8(short8);\n" |
34308 | "half8 __ovld __cnfn convert_half8(ushort8);\n" |
34309 | "half8 __ovld __cnfn convert_half8(int8);\n" |
34310 | "half8 __ovld __cnfn convert_half8(uint8);\n" |
34311 | "half8 __ovld __cnfn convert_half8(long8);\n" |
34312 | "half8 __ovld __cnfn convert_half8(ulong8);\n" |
34313 | "half8 __ovld __cnfn convert_half8(float8);\n" |
34314 | "half8 __ovld __cnfn convert_half8(half8);\n" |
34315 | "half8 __ovld __cnfn convert_half8_rte(char8);\n" |
34316 | "half8 __ovld __cnfn convert_half8_rte(uchar8);\n" |
34317 | "half8 __ovld __cnfn convert_half8_rte(short8);\n" |
34318 | "half8 __ovld __cnfn convert_half8_rte(ushort8);\n" |
34319 | "half8 __ovld __cnfn convert_half8_rte(int8);\n" |
34320 | "half8 __ovld __cnfn convert_half8_rte(uint8);\n" |
34321 | "half8 __ovld __cnfn convert_half8_rte(long8);\n" |
34322 | "half8 __ovld __cnfn convert_half8_rte(ulong8);\n" |
34323 | "half8 __ovld __cnfn convert_half8_rte(float8);\n" |
34324 | "half8 __ovld __cnfn convert_half8_rte(half8);\n" |
34325 | "half8 __ovld __cnfn convert_half8_rtp(char8);\n" |
34326 | "half8 __ovld __cnfn convert_half8_rtp(uchar8);\n" |
34327 | "half8 __ovld __cnfn convert_half8_rtp(short8);\n" |
34328 | "half8 __ovld __cnfn convert_half8_rtp(ushort8);\n" |
34329 | "half8 __ovld __cnfn convert_half8_rtp(int8);\n" |
34330 | "half8 __ovld __cnfn convert_half8_rtp(uint8);\n" |
34331 | "half8 __ovld __cnfn convert_half8_rtp(long8);\n" |
34332 | "half8 __ovld __cnfn convert_half8_rtp(ulong8);\n" |
34333 | "half8 __ovld __cnfn convert_half8_rtp(float8);\n" |
34334 | "half8 __ovld __cnfn convert_half8_rtp(half8);\n" |
34335 | "half8 __ovld __cnfn convert_half8_rtn(char8);\n" |
34336 | "half8 __ovld __cnfn convert_half8_rtn(uchar8);\n" |
34337 | "half8 __ovld __cnfn convert_half8_rtn(short8);\n" |
34338 | "half8 __ovld __cnfn convert_half8_rtn(ushort8);\n" |
34339 | "half8 __ovld __cnfn convert_half8_rtn(int8);\n" |
34340 | "half8 __ovld __cnfn convert_half8_rtn(uint8);\n" |
34341 | "half8 __ovld __cnfn convert_half8_rtn(long8);\n" |
34342 | "half8 __ovld __cnfn convert_half8_rtn(ulong8);\n" |
34343 | "half8 __ovld __cnfn convert_half8_rtn(float8);\n" |
34344 | "half8 __ovld __cnfn convert_half8_rtn(half8);\n" |
34345 | "half8 __ovld __cnfn convert_half8_rtz(char8);\n" |
34346 | "half8 __ovld __cnfn convert_half8_rtz(uchar8);\n" |
34347 | "half8 __ovld __cnfn convert_half8_rtz(short8);\n" |
34348 | "half8 __ovld __cnfn convert_half8_rtz(ushort8);\n" |
34349 | "half8 __ovld __cnfn convert_half8_rtz(int8);\n" |
34350 | "half8 __ovld __cnfn convert_half8_rtz(uint8);\n" |
34351 | "half8 __ovld __cnfn convert_half8_rtz(long8);\n" |
34352 | "half8 __ovld __cnfn convert_half8_rtz(ulong8);\n" |
34353 | "half8 __ovld __cnfn convert_half8_rtz(float8);\n" |
34354 | "half8 __ovld __cnfn convert_half8_rtz(half8);\n" |
34355 | "half16 __ovld __cnfn convert_half16(char16);\n" |
34356 | "half16 __ovld __cnfn convert_half16(uchar16);\n" |
34357 | "half16 __ovld __cnfn convert_half16(short16);\n" |
34358 | "half16 __ovld __cnfn convert_half16(ushort16);\n" |
34359 | "half16 __ovld __cnfn convert_half16(int16);\n" |
34360 | "half16 __ovld __cnfn convert_half16(uint16);\n" |
34361 | "half16 __ovld __cnfn convert_half16(long16);\n" |
34362 | "half16 __ovld __cnfn convert_half16(ulong16);\n" |
34363 | "half16 __ovld __cnfn convert_half16(float16);\n" |
34364 | "half16 __ovld __cnfn convert_half16(half16);\n" |
34365 | "half16 __ovld __cnfn convert_half16_rte(char16);\n" |
34366 | "half16 __ovld __cnfn convert_half16_rte(uchar16);\n" |
34367 | "half16 __ovld __cnfn convert_half16_rte(short16);\n" |
34368 | "half16 __ovld __cnfn convert_half16_rte(ushort16);\n" |
34369 | "half16 __ovld __cnfn convert_half16_rte(int16);\n" |
34370 | "half16 __ovld __cnfn convert_half16_rte(uint16);\n" |
34371 | "half16 __ovld __cnfn convert_half16_rte(long16);\n" |
34372 | "half16 __ovld __cnfn convert_half16_rte(ulong16);\n" |
34373 | "half16 __ovld __cnfn convert_half16_rte(float16);\n" |
34374 | "half16 __ovld __cnfn convert_half16_rte(half16);\n" |
34375 | "half16 __ovld __cnfn convert_half16_rtp(char16);\n" |
34376 | "half16 __ovld __cnfn convert_half16_rtp(uchar16);\n" |
34377 | "half16 __ovld __cnfn convert_half16_rtp(short16);\n" |
34378 | "half16 __ovld __cnfn convert_half16_rtp(ushort16);\n" |
34379 | "half16 __ovld __cnfn convert_half16_rtp(int16);\n" |
34380 | "half16 __ovld __cnfn convert_half16_rtp(uint16);\n" |
34381 | "half16 __ovld __cnfn convert_half16_rtp(long16);\n" |
34382 | "half16 __ovld __cnfn convert_half16_rtp(ulong16);\n" |
34383 | "half16 __ovld __cnfn convert_half16_rtp(float16);\n" |
34384 | "half16 __ovld __cnfn convert_half16_rtp(half16);\n" |
34385 | "half16 __ovld __cnfn convert_half16_rtn(char16);\n" |
34386 | "half16 __ovld __cnfn convert_half16_rtn(uchar16);\n" |
34387 | "half16 __ovld __cnfn convert_half16_rtn(short16);\n" |
34388 | "half16 __ovld __cnfn convert_half16_rtn(ushort16);\n" |
34389 | "half16 __ovld __cnfn convert_half16_rtn(int16);\n" |
34390 | "half16 __ovld __cnfn convert_half16_rtn(uint16);\n" |
34391 | "half16 __ovld __cnfn convert_half16_rtn(long16);\n" |
34392 | "half16 __ovld __cnfn convert_half16_rtn(ulong16);\n" |
34393 | "half16 __ovld __cnfn convert_half16_rtn(float16);\n" |
34394 | "half16 __ovld __cnfn convert_half16_rtn(half16);\n" |
34395 | "half16 __ovld __cnfn convert_half16_rtz(char16);\n" |
34396 | "half16 __ovld __cnfn convert_half16_rtz(uchar16);\n" |
34397 | "half16 __ovld __cnfn convert_half16_rtz(short16);\n" |
34398 | "half16 __ovld __cnfn convert_half16_rtz(ushort16);\n" |
34399 | "half16 __ovld __cnfn convert_half16_rtz(int16);\n" |
34400 | "half16 __ovld __cnfn convert_half16_rtz(uint16);\n" |
34401 | "half16 __ovld __cnfn convert_half16_rtz(long16);\n" |
34402 | "half16 __ovld __cnfn convert_half16_rtz(ulong16);\n" |
34403 | "half16 __ovld __cnfn convert_half16_rtz(float16);\n" |
34404 | "half16 __ovld __cnfn convert_half16_rtz(half16);\n" |
34405 | "\n" |
34406 | "// Convert half types to double types.\n" |
34407 | "#ifdef cl_khr_fp64\n" |
34408 | "double __ovld __cnfn convert_double(half);\n" |
34409 | "double __ovld __cnfn convert_double_rte(half);\n" |
34410 | "double __ovld __cnfn convert_double_rtp(half);\n" |
34411 | "double __ovld __cnfn convert_double_rtn(half);\n" |
34412 | "double __ovld __cnfn convert_double_rtz(half);\n" |
34413 | "double2 __ovld __cnfn convert_double2(half2);\n" |
34414 | "double2 __ovld __cnfn convert_double2_rte(half2);\n" |
34415 | "double2 __ovld __cnfn convert_double2_rtp(half2);\n" |
34416 | "double2 __ovld __cnfn convert_double2_rtn(half2);\n" |
34417 | "double2 __ovld __cnfn convert_double2_rtz(half2);\n" |
34418 | "double3 __ovld __cnfn convert_double3(half3);\n" |
34419 | "double3 __ovld __cnfn convert_double3_rte(half3);\n" |
34420 | "double3 __ovld __cnfn convert_double3_rtp(half3);\n" |
34421 | "double3 __ovld __cnfn convert_double3_rtn(half3);\n" |
34422 | "double3 __ovld __cnfn convert_double3_rtz(half3);\n" |
34423 | "double4 __ovld __cnfn convert_double4(half4);\n" |
34424 | "double4 __ovld __cnfn convert_double4_rte(half4);\n" |
34425 | "double4 __ovld __cnfn convert_double4_rtp(half4);\n" |
34426 | "double4 __ovld __cnfn convert_double4_rtn(half4);\n" |
34427 | "double4 __ovld __cnfn convert_double4_rtz(half4);\n" |
34428 | "double8 __ovld __cnfn convert_double8(half8);\n" |
34429 | "double8 __ovld __cnfn convert_double8_rte(half8);\n" |
34430 | "double8 __ovld __cnfn convert_double8_rtp(half8);\n" |
34431 | "double8 __ovld __cnfn convert_double8_rtn(half8);\n" |
34432 | "double8 __ovld __cnfn convert_double8_rtz(half8);\n" |
34433 | "double16 __ovld __cnfn convert_double16(half16);\n" |
34434 | "double16 __ovld __cnfn convert_double16_rte(half16);\n" |
34435 | "double16 __ovld __cnfn convert_double16_rtp(half16);\n" |
34436 | "double16 __ovld __cnfn convert_double16_rtn(half16);\n" |
34437 | "double16 __ovld __cnfn convert_double16_rtz(half16);\n" |
34438 | "\n" |
34439 | "// Convert double types to half types.\n" |
34440 | "half __ovld __cnfn convert_half(double);\n" |
34441 | "half __ovld __cnfn convert_half_rte(double);\n" |
34442 | "half __ovld __cnfn convert_half_rtp(double);\n" |
34443 | "half __ovld __cnfn convert_half_rtn(double);\n" |
34444 | "half __ovld __cnfn convert_half_rtz(double);\n" |
34445 | "half2 __ovld __cnfn convert_half2(double2);\n" |
34446 | "half2 __ovld __cnfn convert_half2_rte(double2);\n" |
34447 | "half2 __ovld __cnfn convert_half2_rtp(double2);\n" |
34448 | "half2 __ovld __cnfn convert_half2_rtn(double2);\n" |
34449 | "half2 __ovld __cnfn convert_half2_rtz(double2);\n" |
34450 | "half3 __ovld __cnfn convert_half3(double3);\n" |
34451 | "half3 __ovld __cnfn convert_half3_rte(double3);\n" |
34452 | "half3 __ovld __cnfn convert_half3_rtp(double3);\n" |
34453 | "half3 __ovld __cnfn convert_half3_rtn(double3);\n" |
34454 | "half3 __ovld __cnfn convert_half3_rtz(double3);\n" |
34455 | "half4 __ovld __cnfn convert_half4(double4);\n" |
34456 | "half4 __ovld __cnfn convert_half4_rte(double4);\n" |
34457 | "half4 __ovld __cnfn convert_half4_rtp(double4);\n" |
34458 | "half4 __ovld __cnfn convert_half4_rtn(double4);\n" |
34459 | "half4 __ovld __cnfn convert_half4_rtz(double4);\n" |
34460 | "half8 __ovld __cnfn convert_half8(double8);\n" |
34461 | "half8 __ovld __cnfn convert_half8_rte(double8);\n" |
34462 | "half8 __ovld __cnfn convert_half8_rtp(double8);\n" |
34463 | "half8 __ovld __cnfn convert_half8_rtn(double8);\n" |
34464 | "half8 __ovld __cnfn convert_half8_rtz(double8);\n" |
34465 | "half16 __ovld __cnfn convert_half16(double16);\n" |
34466 | "half16 __ovld __cnfn convert_half16_rte(double16);\n" |
34467 | "half16 __ovld __cnfn convert_half16_rtp(double16);\n" |
34468 | "half16 __ovld __cnfn convert_half16_rtn(double16);\n" |
34469 | "half16 __ovld __cnfn convert_half16_rtz(double16);\n" |
34470 | "#endif //cl_khr_fp64\n" |
34471 | "\n" |
34472 | "#endif // cl_khr_fp16\n" |
34473 | "\n" |
34474 | "/**\n" |
34475 | " * OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators\n" |
34476 | " * Reinterprets a data type as another data type of the same size\n" |
34477 | " */\n" |
34478 | "#define as_char(x) __builtin_astype((x), char)\n" |
34479 | "#define as_char2(x) __builtin_astype((x), char2)\n" |
34480 | "#define as_char3(x) __builtin_astype((x), char3)\n" |
34481 | "#define as_char4(x) __builtin_astype((x), char4)\n" |
34482 | "#define as_char8(x) __builtin_astype((x), char8)\n" |
34483 | "#define as_char16(x) __builtin_astype((x), char16)\n" |
34484 | "\n" |
34485 | "#define as_uchar(x) __builtin_astype((x), uchar)\n" |
34486 | "#define as_uchar2(x) __builtin_astype((x), uchar2)\n" |
34487 | "#define as_uchar3(x) __builtin_astype((x), uchar3)\n" |
34488 | "#define as_uchar4(x) __builtin_astype((x), uchar4)\n" |
34489 | "#define as_uchar8(x) __builtin_astype((x), uchar8)\n" |
34490 | "#define as_uchar16(x) __builtin_astype((x), uchar16)\n" |
34491 | "\n" |
34492 | "#define as_short(x) __builtin_astype((x), short)\n" |
34493 | "#define as_short2(x) __builtin_astype((x), short2)\n" |
34494 | "#define as_short3(x) __builtin_astype((x), short3)\n" |
34495 | "#define as_short4(x) __builtin_astype((x), short4)\n" |
34496 | "#define as_short8(x) __builtin_astype((x), short8)\n" |
34497 | "#define as_short16(x) __builtin_astype((x), short16)\n" |
34498 | "\n" |
34499 | "#define as_ushort(x) __builtin_astype((x), ushort)\n" |
34500 | "#define as_ushort2(x) __builtin_astype((x), ushort2)\n" |
34501 | "#define as_ushort3(x) __builtin_astype((x), ushort3)\n" |
34502 | "#define as_ushort4(x) __builtin_astype((x), ushort4)\n" |
34503 | "#define as_ushort8(x) __builtin_astype((x), ushort8)\n" |
34504 | "#define as_ushort16(x) __builtin_astype((x), ushort16)\n" |
34505 | "\n" |
34506 | "#define as_int(x) __builtin_astype((x), int)\n" |
34507 | "#define as_int2(x) __builtin_astype((x), int2)\n" |
34508 | "#define as_int3(x) __builtin_astype((x), int3)\n" |
34509 | "#define as_int4(x) __builtin_astype((x), int4)\n" |
34510 | "#define as_int8(x) __builtin_astype((x), int8)\n" |
34511 | "#define as_int16(x) __builtin_astype((x), int16)\n" |
34512 | "\n" |
34513 | "#define as_uint(x) __builtin_astype((x), uint)\n" |
34514 | "#define as_uint2(x) __builtin_astype((x), uint2)\n" |
34515 | "#define as_uint3(x) __builtin_astype((x), uint3)\n" |
34516 | "#define as_uint4(x) __builtin_astype((x), uint4)\n" |
34517 | "#define as_uint8(x) __builtin_astype((x), uint8)\n" |
34518 | "#define as_uint16(x) __builtin_astype((x), uint16)\n" |
34519 | "\n" |
34520 | "#define as_long(x) __builtin_astype((x), long)\n" |
34521 | "#define as_long2(x) __builtin_astype((x), long2)\n" |
34522 | "#define as_long3(x) __builtin_astype((x), long3)\n" |
34523 | "#define as_long4(x) __builtin_astype((x), long4)\n" |
34524 | "#define as_long8(x) __builtin_astype((x), long8)\n" |
34525 | "#define as_long16(x) __builtin_astype((x), long16)\n" |
34526 | "\n" |
34527 | "#define as_ulong(x) __builtin_astype((x), ulong)\n" |
34528 | "#define as_ulong2(x) __builtin_astype((x), ulong2)\n" |
34529 | "#define as_ulong3(x) __builtin_astype((x), ulong3)\n" |
34530 | "#define as_ulong4(x) __builtin_astype((x), ulong4)\n" |
34531 | "#define as_ulong8(x) __builtin_astype((x), ulong8)\n" |
34532 | "#define as_ulong16(x) __builtin_astype((x), ulong16)\n" |
34533 | "\n" |
34534 | "#define as_float(x) __builtin_astype((x), float)\n" |
34535 | "#define as_float2(x) __builtin_astype((x), float2)\n" |
34536 | "#define as_float3(x) __builtin_astype((x), float3)\n" |
34537 | "#define as_float4(x) __builtin_astype((x), float4)\n" |
34538 | "#define as_float8(x) __builtin_astype((x), float8)\n" |
34539 | "#define as_float16(x) __builtin_astype((x), float16)\n" |
34540 | "\n" |
34541 | "#ifdef cl_khr_fp64\n" |
34542 | "#define as_double(x) __builtin_astype((x), double)\n" |
34543 | "#define as_double2(x) __builtin_astype((x), double2)\n" |
34544 | "#define as_double3(x) __builtin_astype((x), double3)\n" |
34545 | "#define as_double4(x) __builtin_astype((x), double4)\n" |
34546 | "#define as_double8(x) __builtin_astype((x), double8)\n" |
34547 | "#define as_double16(x) __builtin_astype((x), double16)\n" |
34548 | "#endif //cl_khr_fp64\n" |
34549 | "\n" |
34550 | "#ifdef cl_khr_fp16\n" |
34551 | "#define as_half(x) __builtin_astype((x), half)\n" |
34552 | "#define as_half2(x) __builtin_astype((x), half2)\n" |
34553 | "#define as_half3(x) __builtin_astype((x), half3)\n" |
34554 | "#define as_half4(x) __builtin_astype((x), half4)\n" |
34555 | "#define as_half8(x) __builtin_astype((x), half8)\n" |
34556 | "#define as_half16(x) __builtin_astype((x), half16)\n" |
34557 | "#endif //cl_khr_fp16\n" |
34558 | "\n" |
34559 | "// OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers\n" |
34560 | "\n" |
34561 | "#define __kernel_exec(X, typen) __kernel \\\n" |
34562 | " __attribute__((work_group_size_hint(X, 1, 1))) \\\n" |
34563 | " __attribute__((vec_type_hint(typen)))\n" |
34564 | "\n" |
34565 | "#define kernel_exec(X, typen) __kernel \\\n" |
34566 | " __attribute__((work_group_size_hint(X, 1, 1))) \\\n" |
34567 | " __attribute__((vec_type_hint(typen)))\n" |
34568 | "\n" |
34569 | "// OpenCL v1.1 s6.11.1, v1.2 s6.12.1, v2.0 s6.13.1 - Work-item Functions\n" |
34570 | "\n" |
34571 | "/**\n" |
34572 | " * Returns the number of dimensions in use. This is the\n" |
34573 | " * value given to the work_dim argument specified in\n" |
34574 | " * clEnqueueNDRangeKernel.\n" |
34575 | " * For clEnqueueTask, this returns 1.\n" |
34576 | " */\n" |
34577 | "uint __ovld __cnfn get_work_dim(void);\n" |
34578 | "\n" |
34579 | "/**\n" |
34580 | " * Returns the number of global work-items specified for\n" |
34581 | " * dimension identified by dimindx. This value is given by\n" |
34582 | " * the global_work_size argument to\n" |
34583 | " * clEnqueueNDRangeKernel. Valid values of dimindx\n" |
34584 | " * are 0 to get_work_dim() - 1. For other values of\n" |
34585 | " * dimindx, get_global_size() returns 1.\n" |
34586 | " * For clEnqueueTask, this always returns 1.\n" |
34587 | " */\n" |
34588 | "size_t __ovld __cnfn get_global_size(uint dimindx);\n" |
34589 | "\n" |
34590 | "/**\n" |
34591 | " * Returns the unique global work-item ID value for\n" |
34592 | " * dimension identified by dimindx. The global work-item\n" |
34593 | " * ID specifies the work-item ID based on the number of\n" |
34594 | " * global work-items specified to execute the kernel. Valid\n" |
34595 | " * values of dimindx are 0 to get_work_dim() - 1. For\n" |
34596 | " * other values of dimindx, get_global_id() returns 0.\n" |
34597 | " * For clEnqueueTask, this returns 0.\n" |
34598 | " */\n" |
34599 | "size_t __ovld __cnfn get_global_id(uint dimindx);\n" |
34600 | "\n" |
34601 | "/**\n" |
34602 | " * Returns the number of local work-items specified in\n" |
34603 | " * dimension identified by dimindx. This value is given by\n" |
34604 | " * the local_work_size argument to\n" |
34605 | " * clEnqueueNDRangeKernel if local_work_size is not\n" |
34606 | " * NULL; otherwise the OpenCL implementation chooses\n" |
34607 | " * an appropriate local_work_size value which is returned\n" |
34608 | " * by this function. Valid values of dimindx are 0 to\n" |
34609 | " * get_work_dim() - 1. For other values of dimindx,\n" |
34610 | " * get_local_size() returns 1.\n" |
34611 | " * For clEnqueueTask, this always returns 1.\n" |
34612 | " */\n" |
34613 | "size_t __ovld __cnfn get_local_size(uint dimindx);\n" |
34614 | "\n" |
34615 | "/**\n" |
34616 | " * Returns the unique local work-item ID i.e. a work-item\n" |
34617 | " * within a specific work-group for dimension identified by\n" |
34618 | " * dimindx. Valid values of dimindx are 0 to\n" |
34619 | " * get_work_dim() - 1. For other values of dimindx,\n" |
34620 | " * get_local_id() returns 0.\n" |
34621 | " * For clEnqueueTask, this returns 0.\n" |
34622 | " */\n" |
34623 | "size_t __ovld __cnfn get_local_id(uint dimindx);\n" |
34624 | "\n" |
34625 | "/**\n" |
34626 | " * Returns the number of work-groups that will execute a\n" |
34627 | " * kernel for dimension identified by dimindx.\n" |
34628 | " * Valid values of dimindx are 0 to get_work_dim() - 1.\n" |
34629 | " * For other values of dimindx, get_num_groups () returns\n" |
34630 | " * 1.\n" |
34631 | " * For clEnqueueTask, this always returns 1.\n" |
34632 | " */\n" |
34633 | "size_t __ovld __cnfn get_num_groups(uint dimindx);\n" |
34634 | "\n" |
34635 | "/**\n" |
34636 | " * get_group_id returns the work-group ID which is a\n" |
34637 | " * number from 0 .. get_num_groups(dimindx) - 1.\n" |
34638 | " * Valid values of dimindx are 0 to get_work_dim() - 1.\n" |
34639 | " * For other values, get_group_id() returns 0.\n" |
34640 | " * For clEnqueueTask, this returns 0.\n" |
34641 | " */\n" |
34642 | "size_t __ovld __cnfn get_group_id(uint dimindx);\n" |
34643 | "\n" |
34644 | "/**\n" |
34645 | " * get_global_offset returns the offset values specified in\n" |
34646 | " * global_work_offset argument to\n" |
34647 | " * clEnqueueNDRangeKernel.\n" |
34648 | " * Valid values of dimindx are 0 to get_work_dim() - 1.\n" |
34649 | " * For other values, get_global_offset() returns 0.\n" |
34650 | " * For clEnqueueTask, this returns 0.\n" |
34651 | " */\n" |
34652 | "size_t __ovld __cnfn get_global_offset(uint dimindx);\n" |
34653 | "\n" |
34654 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
34655 | "size_t __ovld get_enqueued_local_size(uint dimindx);\n" |
34656 | "size_t __ovld get_global_linear_id(void);\n" |
34657 | "size_t __ovld get_local_linear_id(void);\n" |
34658 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
34659 | "\n" |
34660 | "// OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions\n" |
34661 | "\n" |
34662 | "/**\n" |
34663 | " * Arc cosine function.\n" |
34664 | " */\n" |
34665 | "float __ovld __cnfn acos(float);\n" |
34666 | "float2 __ovld __cnfn acos(float2);\n" |
34667 | "float3 __ovld __cnfn acos(float3);\n" |
34668 | "float4 __ovld __cnfn acos(float4);\n" |
34669 | "float8 __ovld __cnfn acos(float8);\n" |
34670 | "float16 __ovld __cnfn acos(float16);\n" |
34671 | "#ifdef cl_khr_fp64\n" |
34672 | "double __ovld __cnfn acos(double);\n" |
34673 | "double2 __ovld __cnfn acos(double2);\n" |
34674 | "double3 __ovld __cnfn acos(double3);\n" |
34675 | "double4 __ovld __cnfn acos(double4);\n" |
34676 | "double8 __ovld __cnfn acos(double8);\n" |
34677 | "double16 __ovld __cnfn acos(double16);\n" |
34678 | "#endif //cl_khr_fp64\n" |
34679 | "#ifdef cl_khr_fp16\n" |
34680 | "half __ovld __cnfn acos(half);\n" |
34681 | "half2 __ovld __cnfn acos(half2);\n" |
34682 | "half3 __ovld __cnfn acos(half3);\n" |
34683 | "half4 __ovld __cnfn acos(half4);\n" |
34684 | "half8 __ovld __cnfn acos(half8);\n" |
34685 | "half16 __ovld __cnfn acos(half16);\n" |
34686 | "#endif //cl_khr_fp16\n" |
34687 | "\n" |
34688 | "/**\n" |
34689 | " * Inverse hyperbolic cosine.\n" |
34690 | " */\n" |
34691 | "float __ovld __cnfn acosh(float);\n" |
34692 | "float2 __ovld __cnfn acosh(float2);\n" |
34693 | "float3 __ovld __cnfn acosh(float3);\n" |
34694 | "float4 __ovld __cnfn acosh(float4);\n" |
34695 | "float8 __ovld __cnfn acosh(float8);\n" |
34696 | "float16 __ovld __cnfn acosh(float16);\n" |
34697 | "#ifdef cl_khr_fp64\n" |
34698 | "double __ovld __cnfn acosh(double);\n" |
34699 | "double2 __ovld __cnfn acosh(double2);\n" |
34700 | "double3 __ovld __cnfn acosh(double3);\n" |
34701 | "double4 __ovld __cnfn acosh(double4);\n" |
34702 | "double8 __ovld __cnfn acosh(double8);\n" |
34703 | "double16 __ovld __cnfn acosh(double16);\n" |
34704 | "#endif //cl_khr_fp64\n" |
34705 | "#ifdef cl_khr_fp16\n" |
34706 | "half __ovld __cnfn acosh(half);\n" |
34707 | "half2 __ovld __cnfn acosh(half2);\n" |
34708 | "half3 __ovld __cnfn acosh(half3);\n" |
34709 | "half4 __ovld __cnfn acosh(half4);\n" |
34710 | "half8 __ovld __cnfn acosh(half8);\n" |
34711 | "half16 __ovld __cnfn acosh(half16);\n" |
34712 | "#endif //cl_khr_fp16\n" |
34713 | "\n" |
34714 | "/**\n" |
34715 | " * Compute acos (x) / PI.\n" |
34716 | " */\n" |
34717 | "float __ovld __cnfn acospi(float x);\n" |
34718 | "float2 __ovld __cnfn acospi(float2 x);\n" |
34719 | "float3 __ovld __cnfn acospi(float3 x);\n" |
34720 | "float4 __ovld __cnfn acospi(float4 x);\n" |
34721 | "float8 __ovld __cnfn acospi(float8 x);\n" |
34722 | "float16 __ovld __cnfn acospi(float16 x);\n" |
34723 | "#ifdef cl_khr_fp64\n" |
34724 | "double __ovld __cnfn acospi(double x);\n" |
34725 | "double2 __ovld __cnfn acospi(double2 x);\n" |
34726 | "double3 __ovld __cnfn acospi(double3 x);\n" |
34727 | "double4 __ovld __cnfn acospi(double4 x);\n" |
34728 | "double8 __ovld __cnfn acospi(double8 x);\n" |
34729 | "double16 __ovld __cnfn acospi(double16 x);\n" |
34730 | "#endif //cl_khr_fp64\n" |
34731 | "#ifdef cl_khr_fp16\n" |
34732 | "half __ovld __cnfn acospi(half x);\n" |
34733 | "half2 __ovld __cnfn acospi(half2 x);\n" |
34734 | "half3 __ovld __cnfn acospi(half3 x);\n" |
34735 | "half4 __ovld __cnfn acospi(half4 x);\n" |
34736 | "half8 __ovld __cnfn acospi(half8 x);\n" |
34737 | "half16 __ovld __cnfn acospi(half16 x);\n" |
34738 | "#endif //cl_khr_fp16\n" |
34739 | "\n" |
34740 | "/**\n" |
34741 | " * Arc sine function.\n" |
34742 | " */\n" |
34743 | "float __ovld __cnfn asin(float);\n" |
34744 | "float2 __ovld __cnfn asin(float2);\n" |
34745 | "float3 __ovld __cnfn asin(float3);\n" |
34746 | "float4 __ovld __cnfn asin(float4);\n" |
34747 | "float8 __ovld __cnfn asin(float8);\n" |
34748 | "float16 __ovld __cnfn asin(float16);\n" |
34749 | "#ifdef cl_khr_fp64\n" |
34750 | "double __ovld __cnfn asin(double);\n" |
34751 | "double2 __ovld __cnfn asin(double2);\n" |
34752 | "double3 __ovld __cnfn asin(double3);\n" |
34753 | "double4 __ovld __cnfn asin(double4);\n" |
34754 | "double8 __ovld __cnfn asin(double8);\n" |
34755 | "double16 __ovld __cnfn asin(double16);\n" |
34756 | "#endif //cl_khr_fp64\n" |
34757 | "#ifdef cl_khr_fp16\n" |
34758 | "half __ovld __cnfn asin(half);\n" |
34759 | "half2 __ovld __cnfn asin(half2);\n" |
34760 | "half3 __ovld __cnfn asin(half3);\n" |
34761 | "half4 __ovld __cnfn asin(half4);\n" |
34762 | "half8 __ovld __cnfn asin(half8);\n" |
34763 | "half16 __ovld __cnfn asin(half16);\n" |
34764 | "#endif //cl_khr_fp16\n" |
34765 | "\n" |
34766 | "/**\n" |
34767 | " * Inverse hyperbolic sine.\n" |
34768 | " */\n" |
34769 | "float __ovld __cnfn asinh(float);\n" |
34770 | "float2 __ovld __cnfn asinh(float2);\n" |
34771 | "float3 __ovld __cnfn asinh(float3);\n" |
34772 | "float4 __ovld __cnfn asinh(float4);\n" |
34773 | "float8 __ovld __cnfn asinh(float8);\n" |
34774 | "float16 __ovld __cnfn asinh(float16);\n" |
34775 | "#ifdef cl_khr_fp64\n" |
34776 | "double __ovld __cnfn asinh(double);\n" |
34777 | "double2 __ovld __cnfn asinh(double2);\n" |
34778 | "double3 __ovld __cnfn asinh(double3);\n" |
34779 | "double4 __ovld __cnfn asinh(double4);\n" |
34780 | "double8 __ovld __cnfn asinh(double8);\n" |
34781 | "double16 __ovld __cnfn asinh(double16);\n" |
34782 | "#endif //cl_khr_fp64\n" |
34783 | "#ifdef cl_khr_fp16\n" |
34784 | "half __ovld __cnfn asinh(half);\n" |
34785 | "half2 __ovld __cnfn asinh(half2);\n" |
34786 | "half3 __ovld __cnfn asinh(half3);\n" |
34787 | "half4 __ovld __cnfn asinh(half4);\n" |
34788 | "half8 __ovld __cnfn asinh(half8);\n" |
34789 | "half16 __ovld __cnfn asinh(half16);\n" |
34790 | "#endif //cl_khr_fp16\n" |
34791 | "\n" |
34792 | "/**\n" |
34793 | " * Compute asin (x) / PI.\n" |
34794 | " */\n" |
34795 | "float __ovld __cnfn asinpi(float x);\n" |
34796 | "float2 __ovld __cnfn asinpi(float2 x);\n" |
34797 | "float3 __ovld __cnfn asinpi(float3 x);\n" |
34798 | "float4 __ovld __cnfn asinpi(float4 x);\n" |
34799 | "float8 __ovld __cnfn asinpi(float8 x);\n" |
34800 | "float16 __ovld __cnfn asinpi(float16 x);\n" |
34801 | "#ifdef cl_khr_fp64\n" |
34802 | "double __ovld __cnfn asinpi(double x);\n" |
34803 | "double2 __ovld __cnfn asinpi(double2 x);\n" |
34804 | "double3 __ovld __cnfn asinpi(double3 x);\n" |
34805 | "double4 __ovld __cnfn asinpi(double4 x);\n" |
34806 | "double8 __ovld __cnfn asinpi(double8 x);\n" |
34807 | "double16 __ovld __cnfn asinpi(double16 x);\n" |
34808 | "#endif //cl_khr_fp64\n" |
34809 | "#ifdef cl_khr_fp16\n" |
34810 | "half __ovld __cnfn asinpi(half x);\n" |
34811 | "half2 __ovld __cnfn asinpi(half2 x);\n" |
34812 | "half3 __ovld __cnfn asinpi(half3 x);\n" |
34813 | "half4 __ovld __cnfn asinpi(half4 x);\n" |
34814 | "half8 __ovld __cnfn asinpi(half8 x);\n" |
34815 | "half16 __ovld __cnfn asinpi(half16 x);\n" |
34816 | "#endif //cl_khr_fp16\n" |
34817 | "\n" |
34818 | "/**\n" |
34819 | " * Arc tangent function.\n" |
34820 | " */\n" |
34821 | "float __ovld __cnfn atan(float y_over_x);\n" |
34822 | "float2 __ovld __cnfn atan(float2 y_over_x);\n" |
34823 | "float3 __ovld __cnfn atan(float3 y_over_x);\n" |
34824 | "float4 __ovld __cnfn atan(float4 y_over_x);\n" |
34825 | "float8 __ovld __cnfn atan(float8 y_over_x);\n" |
34826 | "float16 __ovld __cnfn atan(float16 y_over_x);\n" |
34827 | "#ifdef cl_khr_fp64\n" |
34828 | "double __ovld __cnfn atan(double y_over_x);\n" |
34829 | "double2 __ovld __cnfn atan(double2 y_over_x);\n" |
34830 | "double3 __ovld __cnfn atan(double3 y_over_x);\n" |
34831 | "double4 __ovld __cnfn atan(double4 y_over_x);\n" |
34832 | "double8 __ovld __cnfn atan(double8 y_over_x);\n" |
34833 | "double16 __ovld __cnfn atan(double16 y_over_x);\n" |
34834 | "#endif //cl_khr_fp64\n" |
34835 | "#ifdef cl_khr_fp16\n" |
34836 | "half __ovld __cnfn atan(half y_over_x);\n" |
34837 | "half2 __ovld __cnfn atan(half2 y_over_x);\n" |
34838 | "half3 __ovld __cnfn atan(half3 y_over_x);\n" |
34839 | "half4 __ovld __cnfn atan(half4 y_over_x);\n" |
34840 | "half8 __ovld __cnfn atan(half8 y_over_x);\n" |
34841 | "half16 __ovld __cnfn atan(half16 y_over_x);\n" |
34842 | "#endif //cl_khr_fp16\n" |
34843 | "\n" |
34844 | "/**\n" |
34845 | " * Arc tangent of y / x.\n" |
34846 | " */\n" |
34847 | "float __ovld __cnfn atan2(float y, float x);\n" |
34848 | "float2 __ovld __cnfn atan2(float2 y, float2 x);\n" |
34849 | "float3 __ovld __cnfn atan2(float3 y, float3 x);\n" |
34850 | "float4 __ovld __cnfn atan2(float4 y, float4 x);\n" |
34851 | "float8 __ovld __cnfn atan2(float8 y, float8 x);\n" |
34852 | "float16 __ovld __cnfn atan2(float16 y, float16 x);\n" |
34853 | "#ifdef cl_khr_fp64\n" |
34854 | "double __ovld __cnfn atan2(double y, double x);\n" |
34855 | "double2 __ovld __cnfn atan2(double2 y, double2 x);\n" |
34856 | "double3 __ovld __cnfn atan2(double3 y, double3 x);\n" |
34857 | "double4 __ovld __cnfn atan2(double4 y, double4 x);\n" |
34858 | "double8 __ovld __cnfn atan2(double8 y, double8 x);\n" |
34859 | "double16 __ovld __cnfn atan2(double16 y, double16 x);\n" |
34860 | "#endif //cl_khr_fp64\n" |
34861 | "#ifdef cl_khr_fp16\n" |
34862 | "half __ovld __cnfn atan2(half y, half x);\n" |
34863 | "half2 __ovld __cnfn atan2(half2 y, half2 x);\n" |
34864 | "half3 __ovld __cnfn atan2(half3 y, half3 x);\n" |
34865 | "half4 __ovld __cnfn atan2(half4 y, half4 x);\n" |
34866 | "half8 __ovld __cnfn atan2(half8 y, half8 x);\n" |
34867 | "half16 __ovld __cnfn atan2(half16 y, half16 x);\n" |
34868 | "#endif //cl_khr_fp16\n" |
34869 | "\n" |
34870 | "/**\n" |
34871 | " * Hyperbolic arc tangent.\n" |
34872 | " */\n" |
34873 | "float __ovld __cnfn atanh(float);\n" |
34874 | "float2 __ovld __cnfn atanh(float2);\n" |
34875 | "float3 __ovld __cnfn atanh(float3);\n" |
34876 | "float4 __ovld __cnfn atanh(float4);\n" |
34877 | "float8 __ovld __cnfn atanh(float8);\n" |
34878 | "float16 __ovld __cnfn atanh(float16);\n" |
34879 | "#ifdef cl_khr_fp64\n" |
34880 | "double __ovld __cnfn atanh(double);\n" |
34881 | "double2 __ovld __cnfn atanh(double2);\n" |
34882 | "double3 __ovld __cnfn atanh(double3);\n" |
34883 | "double4 __ovld __cnfn atanh(double4);\n" |
34884 | "double8 __ovld __cnfn atanh(double8);\n" |
34885 | "double16 __ovld __cnfn atanh(double16);\n" |
34886 | "#endif //cl_khr_fp64\n" |
34887 | "#ifdef cl_khr_fp16\n" |
34888 | "half __ovld __cnfn atanh(half);\n" |
34889 | "half2 __ovld __cnfn atanh(half2);\n" |
34890 | "half3 __ovld __cnfn atanh(half3);\n" |
34891 | "half4 __ovld __cnfn atanh(half4);\n" |
34892 | "half8 __ovld __cnfn atanh(half8);\n" |
34893 | "half16 __ovld __cnfn atanh(half16);\n" |
34894 | "#endif //cl_khr_fp16\n" |
34895 | "\n" |
34896 | "/**\n" |
34897 | " * Compute atan (x) / PI.\n" |
34898 | " */\n" |
34899 | "float __ovld __cnfn atanpi(float x);\n" |
34900 | "float2 __ovld __cnfn atanpi(float2 x);\n" |
34901 | "float3 __ovld __cnfn atanpi(float3 x);\n" |
34902 | "float4 __ovld __cnfn atanpi(float4 x);\n" |
34903 | "float8 __ovld __cnfn atanpi(float8 x);\n" |
34904 | "float16 __ovld __cnfn atanpi(float16 x);\n" |
34905 | "#ifdef cl_khr_fp64\n" |
34906 | "double __ovld __cnfn atanpi(double x);\n" |
34907 | "double2 __ovld __cnfn atanpi(double2 x);\n" |
34908 | "double3 __ovld __cnfn atanpi(double3 x);\n" |
34909 | "double4 __ovld __cnfn atanpi(double4 x);\n" |
34910 | "double8 __ovld __cnfn atanpi(double8 x);\n" |
34911 | "double16 __ovld __cnfn atanpi(double16 x);\n" |
34912 | "#endif //cl_khr_fp64\n" |
34913 | "#ifdef cl_khr_fp16\n" |
34914 | "half __ovld __cnfn atanpi(half x);\n" |
34915 | "half2 __ovld __cnfn atanpi(half2 x);\n" |
34916 | "half3 __ovld __cnfn atanpi(half3 x);\n" |
34917 | "half4 __ovld __cnfn atanpi(half4 x);\n" |
34918 | "half8 __ovld __cnfn atanpi(half8 x);\n" |
34919 | "half16 __ovld __cnfn atanpi(half16 x);\n" |
34920 | "#endif //cl_khr_fp16\n" |
34921 | "\n" |
34922 | "/**\n" |
34923 | " * Compute atan2 (y, x) / PI.\n" |
34924 | " */\n" |
34925 | "float __ovld __cnfn atan2pi(float y, float x);\n" |
34926 | "float2 __ovld __cnfn atan2pi(float2 y, float2 x);\n" |
34927 | "float3 __ovld __cnfn atan2pi(float3 y, float3 x);\n" |
34928 | "float4 __ovld __cnfn atan2pi(float4 y, float4 x);\n" |
34929 | "float8 __ovld __cnfn atan2pi(float8 y, float8 x);\n" |
34930 | "float16 __ovld __cnfn atan2pi(float16 y, float16 x);\n" |
34931 | "#ifdef cl_khr_fp64\n" |
34932 | "double __ovld __cnfn atan2pi(double y, double x);\n" |
34933 | "double2 __ovld __cnfn atan2pi(double2 y, double2 x);\n" |
34934 | "double3 __ovld __cnfn atan2pi(double3 y, double3 x);\n" |
34935 | "double4 __ovld __cnfn atan2pi(double4 y, double4 x);\n" |
34936 | "double8 __ovld __cnfn atan2pi(double8 y, double8 x);\n" |
34937 | "double16 __ovld __cnfn atan2pi(double16 y, double16 x);\n" |
34938 | "#endif //cl_khr_fp64\n" |
34939 | "#ifdef cl_khr_fp16\n" |
34940 | "half __ovld __cnfn atan2pi(half y, half x);\n" |
34941 | "half2 __ovld __cnfn atan2pi(half2 y, half2 x);\n" |
34942 | "half3 __ovld __cnfn atan2pi(half3 y, half3 x);\n" |
34943 | "half4 __ovld __cnfn atan2pi(half4 y, half4 x);\n" |
34944 | "half8 __ovld __cnfn atan2pi(half8 y, half8 x);\n" |
34945 | "half16 __ovld __cnfn atan2pi(half16 y, half16 x);\n" |
34946 | "#endif //cl_khr_fp16\n" |
34947 | "\n" |
34948 | "/**\n" |
34949 | " * Compute cube-root.\n" |
34950 | " */\n" |
34951 | "float __ovld __cnfn cbrt(float);\n" |
34952 | "float2 __ovld __cnfn cbrt(float2);\n" |
34953 | "float3 __ovld __cnfn cbrt(float3);\n" |
34954 | "float4 __ovld __cnfn cbrt(float4);\n" |
34955 | "float8 __ovld __cnfn cbrt(float8);\n" |
34956 | "float16 __ovld __cnfn cbrt(float16);\n" |
34957 | "#ifdef cl_khr_fp64\n" |
34958 | "double __ovld __cnfn cbrt(double);\n" |
34959 | "double2 __ovld __cnfn cbrt(double2);\n" |
34960 | "double3 __ovld __cnfn cbrt(double3);\n" |
34961 | "double4 __ovld __cnfn cbrt(double4);\n" |
34962 | "double8 __ovld __cnfn cbrt(double8);\n" |
34963 | "double16 __ovld __cnfn cbrt(double16);\n" |
34964 | "#endif //cl_khr_fp64\n" |
34965 | "#ifdef cl_khr_fp16\n" |
34966 | "half __ovld __cnfn cbrt(half);\n" |
34967 | "half2 __ovld __cnfn cbrt(half2);\n" |
34968 | "half3 __ovld __cnfn cbrt(half3);\n" |
34969 | "half4 __ovld __cnfn cbrt(half4);\n" |
34970 | "half8 __ovld __cnfn cbrt(half8);\n" |
34971 | "half16 __ovld __cnfn cbrt(half16);\n" |
34972 | "#endif //cl_khr_fp16\n" |
34973 | "\n" |
34974 | "/**\n" |
34975 | " * Round to integral value using the round to positive\n" |
34976 | " * infinity rounding mode.\n" |
34977 | " */\n" |
34978 | "float __ovld __cnfn ceil(float);\n" |
34979 | "float2 __ovld __cnfn ceil(float2);\n" |
34980 | "float3 __ovld __cnfn ceil(float3);\n" |
34981 | "float4 __ovld __cnfn ceil(float4);\n" |
34982 | "float8 __ovld __cnfn ceil(float8);\n" |
34983 | "float16 __ovld __cnfn ceil(float16);\n" |
34984 | "#ifdef cl_khr_fp64\n" |
34985 | "double __ovld __cnfn ceil(double);\n" |
34986 | "double2 __ovld __cnfn ceil(double2);\n" |
34987 | "double3 __ovld __cnfn ceil(double3);\n" |
34988 | "double4 __ovld __cnfn ceil(double4);\n" |
34989 | "double8 __ovld __cnfn ceil(double8);\n" |
34990 | "double16 __ovld __cnfn ceil(double16);\n" |
34991 | "#endif //cl_khr_fp64\n" |
34992 | "#ifdef cl_khr_fp16\n" |
34993 | "half __ovld __cnfn ceil(half);\n" |
34994 | "half2 __ovld __cnfn ceil(half2);\n" |
34995 | "half3 __ovld __cnfn ceil(half3);\n" |
34996 | "half4 __ovld __cnfn ceil(half4);\n" |
34997 | "half8 __ovld __cnfn ceil(half8);\n" |
34998 | "half16 __ovld __cnfn ceil(half16);\n" |
34999 | "#endif //cl_khr_fp16\n" |
35000 | "\n" |
35001 | "/**\n" |
35002 | " * Returns x with its sign changed to match the sign of y.\n" |
35003 | " */\n" |
35004 | "float __ovld __cnfn copysign(float x, float y);\n" |
35005 | "float2 __ovld __cnfn copysign(float2 x, float2 y);\n" |
35006 | "float3 __ovld __cnfn copysign(float3 x, float3 y);\n" |
35007 | "float4 __ovld __cnfn copysign(float4 x, float4 y);\n" |
35008 | "float8 __ovld __cnfn copysign(float8 x, float8 y);\n" |
35009 | "float16 __ovld __cnfn copysign(float16 x, float16 y);\n" |
35010 | "#ifdef cl_khr_fp64\n" |
35011 | "double __ovld __cnfn copysign(double x, double y);\n" |
35012 | "double2 __ovld __cnfn copysign(double2 x, double2 y);\n" |
35013 | "double3 __ovld __cnfn copysign(double3 x, double3 y);\n" |
35014 | "double4 __ovld __cnfn copysign(double4 x, double4 y);\n" |
35015 | "double8 __ovld __cnfn copysign(double8 x, double8 y);\n" |
35016 | "double16 __ovld __cnfn copysign(double16 x, double16 y);\n" |
35017 | "#endif //cl_khr_fp64\n" |
35018 | "#ifdef cl_khr_fp16\n" |
35019 | "half __ovld __cnfn copysign(half x, half y);\n" |
35020 | "half2 __ovld __cnfn copysign(half2 x, half2 y);\n" |
35021 | "half3 __ovld __cnfn copysign(half3 x, half3 y);\n" |
35022 | "half4 __ovld __cnfn copysign(half4 x, half4 y);\n" |
35023 | "half8 __ovld __cnfn copysign(half8 x, half8 y);\n" |
35024 | "half16 __ovld __cnfn copysign(half16 x, half16 y);\n" |
35025 | "#endif //cl_khr_fp16\n" |
35026 | "\n" |
35027 | "/**\n" |
35028 | " * Compute cosine.\n" |
35029 | " */\n" |
35030 | "float __ovld __cnfn cos(float);\n" |
35031 | "float2 __ovld __cnfn cos(float2);\n" |
35032 | "float3 __ovld __cnfn cos(float3);\n" |
35033 | "float4 __ovld __cnfn cos(float4);\n" |
35034 | "float8 __ovld __cnfn cos(float8);\n" |
35035 | "float16 __ovld __cnfn cos(float16);\n" |
35036 | "#ifdef cl_khr_fp64\n" |
35037 | "double __ovld __cnfn cos(double);\n" |
35038 | "double2 __ovld __cnfn cos(double2);\n" |
35039 | "double3 __ovld __cnfn cos(double3);\n" |
35040 | "double4 __ovld __cnfn cos(double4);\n" |
35041 | "double8 __ovld __cnfn cos(double8);\n" |
35042 | "double16 __ovld __cnfn cos(double16);\n" |
35043 | "#endif //cl_khr_fp64\n" |
35044 | "#ifdef cl_khr_fp16\n" |
35045 | "half __ovld __cnfn cos(half);\n" |
35046 | "half2 __ovld __cnfn cos(half2);\n" |
35047 | "half3 __ovld __cnfn cos(half3);\n" |
35048 | "half4 __ovld __cnfn cos(half4);\n" |
35049 | "half8 __ovld __cnfn cos(half8);\n" |
35050 | "half16 __ovld __cnfn cos(half16);\n" |
35051 | "#endif //cl_khr_fp16\n" |
35052 | "\n" |
35053 | "/**\n" |
35054 | " * Compute hyperbolic cosine.\n" |
35055 | " */\n" |
35056 | "float __ovld __cnfn cosh(float);\n" |
35057 | "float2 __ovld __cnfn cosh(float2);\n" |
35058 | "float3 __ovld __cnfn cosh(float3);\n" |
35059 | "float4 __ovld __cnfn cosh(float4);\n" |
35060 | "float8 __ovld __cnfn cosh(float8);\n" |
35061 | "float16 __ovld __cnfn cosh(float16);\n" |
35062 | "#ifdef cl_khr_fp64\n" |
35063 | "double __ovld __cnfn cosh(double);\n" |
35064 | "double2 __ovld __cnfn cosh(double2);\n" |
35065 | "double3 __ovld __cnfn cosh(double3);\n" |
35066 | "double4 __ovld __cnfn cosh(double4);\n" |
35067 | "double8 __ovld __cnfn cosh(double8);\n" |
35068 | "double16 __ovld __cnfn cosh(double16);\n" |
35069 | "#endif //cl_khr_fp64\n" |
35070 | "#ifdef cl_khr_fp16\n" |
35071 | "half __ovld __cnfn cosh(half);\n" |
35072 | "half2 __ovld __cnfn cosh(half2);\n" |
35073 | "half3 __ovld __cnfn cosh(half3);\n" |
35074 | "half4 __ovld __cnfn cosh(half4);\n" |
35075 | "half8 __ovld __cnfn cosh(half8);\n" |
35076 | "half16 __ovld __cnfn cosh(half16);\n" |
35077 | "#endif //cl_khr_fp16\n" |
35078 | "\n" |
35079 | "/**\n" |
35080 | " * Compute cos (PI * x).\n" |
35081 | " */\n" |
35082 | "float __ovld __cnfn cospi(float x);\n" |
35083 | "float2 __ovld __cnfn cospi(float2 x);\n" |
35084 | "float3 __ovld __cnfn cospi(float3 x);\n" |
35085 | "float4 __ovld __cnfn cospi(float4 x);\n" |
35086 | "float8 __ovld __cnfn cospi(float8 x);\n" |
35087 | "float16 __ovld __cnfn cospi(float16 x);\n" |
35088 | "#ifdef cl_khr_fp64\n" |
35089 | "double __ovld __cnfn cospi(double x);\n" |
35090 | "double2 __ovld __cnfn cospi(double2 x);\n" |
35091 | "double3 __ovld __cnfn cospi(double3 x);\n" |
35092 | "double4 __ovld __cnfn cospi(double4 x);\n" |
35093 | "double8 __ovld __cnfn cospi(double8 x);\n" |
35094 | "double16 __ovld __cnfn cospi(double16 x);\n" |
35095 | "#endif //cl_khr_fp64\n" |
35096 | "#ifdef cl_khr_fp16\n" |
35097 | "half __ovld __cnfn cospi(half x);\n" |
35098 | "half2 __ovld __cnfn cospi(half2 x);\n" |
35099 | "half3 __ovld __cnfn cospi(half3 x);\n" |
35100 | "half4 __ovld __cnfn cospi(half4 x);\n" |
35101 | "half8 __ovld __cnfn cospi(half8 x);\n" |
35102 | "half16 __ovld __cnfn cospi(half16 x);\n" |
35103 | "#endif //cl_khr_fp16\n" |
35104 | "\n" |
35105 | "/**\n" |
35106 | " * Complementary error function.\n" |
35107 | " */\n" |
35108 | "float __ovld __cnfn erfc(float);\n" |
35109 | "float2 __ovld __cnfn erfc(float2);\n" |
35110 | "float3 __ovld __cnfn erfc(float3);\n" |
35111 | "float4 __ovld __cnfn erfc(float4);\n" |
35112 | "float8 __ovld __cnfn erfc(float8);\n" |
35113 | "float16 __ovld __cnfn erfc(float16);\n" |
35114 | "#ifdef cl_khr_fp64\n" |
35115 | "double __ovld __cnfn erfc(double);\n" |
35116 | "double2 __ovld __cnfn erfc(double2);\n" |
35117 | "double3 __ovld __cnfn erfc(double3);\n" |
35118 | "double4 __ovld __cnfn erfc(double4);\n" |
35119 | "double8 __ovld __cnfn erfc(double8);\n" |
35120 | "double16 __ovld __cnfn erfc(double16);\n" |
35121 | "#endif //cl_khr_fp64\n" |
35122 | "#ifdef cl_khr_fp16\n" |
35123 | "half __ovld __cnfn erfc(half);\n" |
35124 | "half2 __ovld __cnfn erfc(half2);\n" |
35125 | "half3 __ovld __cnfn erfc(half3);\n" |
35126 | "half4 __ovld __cnfn erfc(half4);\n" |
35127 | "half8 __ovld __cnfn erfc(half8);\n" |
35128 | "half16 __ovld __cnfn erfc(half16);\n" |
35129 | "#endif //cl_khr_fp16\n" |
35130 | "\n" |
35131 | "/**\n" |
35132 | " * Error function encountered in integrating the\n" |
35133 | " * normal distribution.\n" |
35134 | " */\n" |
35135 | "float __ovld __cnfn erf(float);\n" |
35136 | "float2 __ovld __cnfn erf(float2);\n" |
35137 | "float3 __ovld __cnfn erf(float3);\n" |
35138 | "float4 __ovld __cnfn erf(float4);\n" |
35139 | "float8 __ovld __cnfn erf(float8);\n" |
35140 | "float16 __ovld __cnfn erf(float16);\n" |
35141 | "#ifdef cl_khr_fp64\n" |
35142 | "double __ovld __cnfn erf(double);\n" |
35143 | "double2 __ovld __cnfn erf(double2);\n" |
35144 | "double3 __ovld __cnfn erf(double3);\n" |
35145 | "double4 __ovld __cnfn erf(double4);\n" |
35146 | "double8 __ovld __cnfn erf(double8);\n" |
35147 | "double16 __ovld __cnfn erf(double16);\n" |
35148 | "#endif //cl_khr_fp64\n" |
35149 | "#ifdef cl_khr_fp16\n" |
35150 | "half __ovld __cnfn erf(half);\n" |
35151 | "half2 __ovld __cnfn erf(half2);\n" |
35152 | "half3 __ovld __cnfn erf(half3);\n" |
35153 | "half4 __ovld __cnfn erf(half4);\n" |
35154 | "half8 __ovld __cnfn erf(half8);\n" |
35155 | "half16 __ovld __cnfn erf(half16);\n" |
35156 | "#endif //cl_khr_fp16\n" |
35157 | "\n" |
35158 | "/**\n" |
35159 | " * Compute the base e exponential function of x.\n" |
35160 | " */\n" |
35161 | "float __ovld __cnfn exp(float x);\n" |
35162 | "float2 __ovld __cnfn exp(float2 x);\n" |
35163 | "float3 __ovld __cnfn exp(float3 x);\n" |
35164 | "float4 __ovld __cnfn exp(float4 x);\n" |
35165 | "float8 __ovld __cnfn exp(float8 x);\n" |
35166 | "float16 __ovld __cnfn exp(float16 x);\n" |
35167 | "#ifdef cl_khr_fp64\n" |
35168 | "double __ovld __cnfn exp(double x);\n" |
35169 | "double2 __ovld __cnfn exp(double2 x);\n" |
35170 | "double3 __ovld __cnfn exp(double3 x);\n" |
35171 | "double4 __ovld __cnfn exp(double4 x);\n" |
35172 | "double8 __ovld __cnfn exp(double8 x);\n" |
35173 | "double16 __ovld __cnfn exp(double16 x);\n" |
35174 | "#endif //cl_khr_fp64\n" |
35175 | "#ifdef cl_khr_fp16\n" |
35176 | "half __ovld __cnfn exp(half x);\n" |
35177 | "half2 __ovld __cnfn exp(half2 x);\n" |
35178 | "half3 __ovld __cnfn exp(half3 x);\n" |
35179 | "half4 __ovld __cnfn exp(half4 x);\n" |
35180 | "half8 __ovld __cnfn exp(half8 x);\n" |
35181 | "half16 __ovld __cnfn exp(half16 x);\n" |
35182 | "#endif //cl_khr_fp16\n" |
35183 | "\n" |
35184 | "/**\n" |
35185 | " * Exponential base 2 function.\n" |
35186 | " */\n" |
35187 | "float __ovld __cnfn exp2(float);\n" |
35188 | "float2 __ovld __cnfn exp2(float2);\n" |
35189 | "float3 __ovld __cnfn exp2(float3);\n" |
35190 | "float4 __ovld __cnfn exp2(float4);\n" |
35191 | "float8 __ovld __cnfn exp2(float8);\n" |
35192 | "float16 __ovld __cnfn exp2(float16);\n" |
35193 | "#ifdef cl_khr_fp64\n" |
35194 | "double __ovld __cnfn exp2(double);\n" |
35195 | "double2 __ovld __cnfn exp2(double2);\n" |
35196 | "double3 __ovld __cnfn exp2(double3);\n" |
35197 | "double4 __ovld __cnfn exp2(double4);\n" |
35198 | "double8 __ovld __cnfn exp2(double8);\n" |
35199 | "double16 __ovld __cnfn exp2(double16);\n" |
35200 | "#endif //cl_khr_fp64\n" |
35201 | "#ifdef cl_khr_fp16\n" |
35202 | "half __ovld __cnfn exp2(half);\n" |
35203 | "half2 __ovld __cnfn exp2(half2);\n" |
35204 | "half3 __ovld __cnfn exp2(half3);\n" |
35205 | "half4 __ovld __cnfn exp2(half4);\n" |
35206 | "half8 __ovld __cnfn exp2(half8);\n" |
35207 | "half16 __ovld __cnfn exp2(half16);\n" |
35208 | "#endif //cl_khr_fp16\n" |
35209 | "\n" |
35210 | "/**\n" |
35211 | " * Exponential base 10 function.\n" |
35212 | " */\n" |
35213 | "float __ovld __cnfn exp10(float);\n" |
35214 | "float2 __ovld __cnfn exp10(float2);\n" |
35215 | "float3 __ovld __cnfn exp10(float3);\n" |
35216 | "float4 __ovld __cnfn exp10(float4);\n" |
35217 | "float8 __ovld __cnfn exp10(float8);\n" |
35218 | "float16 __ovld __cnfn exp10(float16);\n" |
35219 | "#ifdef cl_khr_fp64\n" |
35220 | "double __ovld __cnfn exp10(double);\n" |
35221 | "double2 __ovld __cnfn exp10(double2);\n" |
35222 | "double3 __ovld __cnfn exp10(double3);\n" |
35223 | "double4 __ovld __cnfn exp10(double4);\n" |
35224 | "double8 __ovld __cnfn exp10(double8);\n" |
35225 | "double16 __ovld __cnfn exp10(double16);\n" |
35226 | "#endif //cl_khr_fp64\n" |
35227 | "#ifdef cl_khr_fp16\n" |
35228 | "half __ovld __cnfn exp10(half);\n" |
35229 | "half2 __ovld __cnfn exp10(half2);\n" |
35230 | "half3 __ovld __cnfn exp10(half3);\n" |
35231 | "half4 __ovld __cnfn exp10(half4);\n" |
35232 | "half8 __ovld __cnfn exp10(half8);\n" |
35233 | "half16 __ovld __cnfn exp10(half16);\n" |
35234 | "#endif //cl_khr_fp16\n" |
35235 | "\n" |
35236 | "/**\n" |
35237 | " * Compute e^x- 1.0.\n" |
35238 | " */\n" |
35239 | "float __ovld __cnfn expm1(float x);\n" |
35240 | "float2 __ovld __cnfn expm1(float2 x);\n" |
35241 | "float3 __ovld __cnfn expm1(float3 x);\n" |
35242 | "float4 __ovld __cnfn expm1(float4 x);\n" |
35243 | "float8 __ovld __cnfn expm1(float8 x);\n" |
35244 | "float16 __ovld __cnfn expm1(float16 x);\n" |
35245 | "#ifdef cl_khr_fp64\n" |
35246 | "double __ovld __cnfn expm1(double x);\n" |
35247 | "double2 __ovld __cnfn expm1(double2 x);\n" |
35248 | "double3 __ovld __cnfn expm1(double3 x);\n" |
35249 | "double4 __ovld __cnfn expm1(double4 x);\n" |
35250 | "double8 __ovld __cnfn expm1(double8 x);\n" |
35251 | "double16 __ovld __cnfn expm1(double16 x);\n" |
35252 | "#endif //cl_khr_fp64\n" |
35253 | "#ifdef cl_khr_fp16\n" |
35254 | "half __ovld __cnfn expm1(half x);\n" |
35255 | "half2 __ovld __cnfn expm1(half2 x);\n" |
35256 | "half3 __ovld __cnfn expm1(half3 x);\n" |
35257 | "half4 __ovld __cnfn expm1(half4 x);\n" |
35258 | "half8 __ovld __cnfn expm1(half8 x);\n" |
35259 | "half16 __ovld __cnfn expm1(half16 x);\n" |
35260 | "#endif //cl_khr_fp16\n" |
35261 | "\n" |
35262 | "/**\n" |
35263 | " * Compute absolute value of a floating-point number.\n" |
35264 | " */\n" |
35265 | "float __ovld __cnfn fabs(float);\n" |
35266 | "float2 __ovld __cnfn fabs(float2);\n" |
35267 | "float3 __ovld __cnfn fabs(float3);\n" |
35268 | "float4 __ovld __cnfn fabs(float4);\n" |
35269 | "float8 __ovld __cnfn fabs(float8);\n" |
35270 | "float16 __ovld __cnfn fabs(float16);\n" |
35271 | "#ifdef cl_khr_fp64\n" |
35272 | "double __ovld __cnfn fabs(double);\n" |
35273 | "double2 __ovld __cnfn fabs(double2);\n" |
35274 | "double3 __ovld __cnfn fabs(double3);\n" |
35275 | "double4 __ovld __cnfn fabs(double4);\n" |
35276 | "double8 __ovld __cnfn fabs(double8);\n" |
35277 | "double16 __ovld __cnfn fabs(double16);\n" |
35278 | "#endif //cl_khr_fp64\n" |
35279 | "#ifdef cl_khr_fp16\n" |
35280 | "half __ovld __cnfn fabs(half);\n" |
35281 | "half2 __ovld __cnfn fabs(half2);\n" |
35282 | "half3 __ovld __cnfn fabs(half3);\n" |
35283 | "half4 __ovld __cnfn fabs(half4);\n" |
35284 | "half8 __ovld __cnfn fabs(half8);\n" |
35285 | "half16 __ovld __cnfn fabs(half16);\n" |
35286 | "#endif //cl_khr_fp16\n" |
35287 | "\n" |
35288 | "/**\n" |
35289 | " * x - y if x > y, +0 if x is less than or equal to y.\n" |
35290 | " */\n" |
35291 | "float __ovld __cnfn fdim(float x, float y);\n" |
35292 | "float2 __ovld __cnfn fdim(float2 x, float2 y);\n" |
35293 | "float3 __ovld __cnfn fdim(float3 x, float3 y);\n" |
35294 | "float4 __ovld __cnfn fdim(float4 x, float4 y);\n" |
35295 | "float8 __ovld __cnfn fdim(float8 x, float8 y);\n" |
35296 | "float16 __ovld __cnfn fdim(float16 x, float16 y);\n" |
35297 | "#ifdef cl_khr_fp64\n" |
35298 | "double __ovld __cnfn fdim(double x, double y);\n" |
35299 | "double2 __ovld __cnfn fdim(double2 x, double2 y);\n" |
35300 | "double3 __ovld __cnfn fdim(double3 x, double3 y);\n" |
35301 | "double4 __ovld __cnfn fdim(double4 x, double4 y);\n" |
35302 | "double8 __ovld __cnfn fdim(double8 x, double8 y);\n" |
35303 | "double16 __ovld __cnfn fdim(double16 x, double16 y);\n" |
35304 | "#endif //cl_khr_fp64\n" |
35305 | "#ifdef cl_khr_fp16\n" |
35306 | "half __ovld __cnfn fdim(half x, half y);\n" |
35307 | "half2 __ovld __cnfn fdim(half2 x, half2 y);\n" |
35308 | "half3 __ovld __cnfn fdim(half3 x, half3 y);\n" |
35309 | "half4 __ovld __cnfn fdim(half4 x, half4 y);\n" |
35310 | "half8 __ovld __cnfn fdim(half8 x, half8 y);\n" |
35311 | "half16 __ovld __cnfn fdim(half16 x, half16 y);\n" |
35312 | "#endif //cl_khr_fp16\n" |
35313 | "\n" |
35314 | "/**\n" |
35315 | " * Round to integral value using the round to -ve\n" |
35316 | " * infinity rounding mode.\n" |
35317 | " */\n" |
35318 | "float __ovld __cnfn floor(float);\n" |
35319 | "float2 __ovld __cnfn floor(float2);\n" |
35320 | "float3 __ovld __cnfn floor(float3);\n" |
35321 | "float4 __ovld __cnfn floor(float4);\n" |
35322 | "float8 __ovld __cnfn floor(float8);\n" |
35323 | "float16 __ovld __cnfn floor(float16);\n" |
35324 | "#ifdef cl_khr_fp64\n" |
35325 | "double __ovld __cnfn floor(double);\n" |
35326 | "double2 __ovld __cnfn floor(double2);\n" |
35327 | "double3 __ovld __cnfn floor(double3);\n" |
35328 | "double4 __ovld __cnfn floor(double4);\n" |
35329 | "double8 __ovld __cnfn floor(double8);\n" |
35330 | "double16 __ovld __cnfn floor(double16);\n" |
35331 | "#endif //cl_khr_fp64\n" |
35332 | "#ifdef cl_khr_fp16\n" |
35333 | "half __ovld __cnfn floor(half);\n" |
35334 | "half2 __ovld __cnfn floor(half2);\n" |
35335 | "half3 __ovld __cnfn floor(half3);\n" |
35336 | "half4 __ovld __cnfn floor(half4);\n" |
35337 | "half8 __ovld __cnfn floor(half8);\n" |
35338 | "half16 __ovld __cnfn floor(half16);\n" |
35339 | "#endif //cl_khr_fp16\n" |
35340 | "\n" |
35341 | "/**\n" |
35342 | " * Returns the correctly rounded floating-point\n" |
35343 | " * representation of the sum of c with the infinitely\n" |
35344 | " * precise product of a and b. Rounding of\n" |
35345 | " * intermediate products shall not occur. Edge case\n" |
35346 | " * behavior is per the IEEE 754-2008 standard.\n" |
35347 | " */\n" |
35348 | "float __ovld __cnfn fma(float a, float b, float c);\n" |
35349 | "float2 __ovld __cnfn fma(float2 a, float2 b, float2 c);\n" |
35350 | "float3 __ovld __cnfn fma(float3 a, float3 b, float3 c);\n" |
35351 | "float4 __ovld __cnfn fma(float4 a, float4 b, float4 c);\n" |
35352 | "float8 __ovld __cnfn fma(float8 a, float8 b, float8 c);\n" |
35353 | "float16 __ovld __cnfn fma(float16 a, float16 b, float16 c);\n" |
35354 | "#ifdef cl_khr_fp64\n" |
35355 | "double __ovld __cnfn fma(double a, double b, double c);\n" |
35356 | "double2 __ovld __cnfn fma(double2 a, double2 b, double2 c);\n" |
35357 | "double3 __ovld __cnfn fma(double3 a, double3 b, double3 c);\n" |
35358 | "double4 __ovld __cnfn fma(double4 a, double4 b, double4 c);\n" |
35359 | "double8 __ovld __cnfn fma(double8 a, double8 b, double8 c);\n" |
35360 | "double16 __ovld __cnfn fma(double16 a, double16 b, double16 c);\n" |
35361 | "#endif //cl_khr_fp64\n" |
35362 | "#ifdef cl_khr_fp16\n" |
35363 | "half __ovld __cnfn fma(half a, half b, half c);\n" |
35364 | "half2 __ovld __cnfn fma(half2 a, half2 b, half2 c);\n" |
35365 | "half3 __ovld __cnfn fma(half3 a, half3 b, half3 c);\n" |
35366 | "half4 __ovld __cnfn fma(half4 a, half4 b, half4 c);\n" |
35367 | "half8 __ovld __cnfn fma(half8 a, half8 b, half8 c);\n" |
35368 | "half16 __ovld __cnfn fma(half16 a, half16 b, half16 c);\n" |
35369 | "#endif //cl_khr_fp16\n" |
35370 | "\n" |
35371 | "/**\n" |
35372 | " * Returns y if x < y, otherwise it returns x. If one\n" |
35373 | " * argument is a NaN, fmax() returns the other\n" |
35374 | " * argument. If both arguments are NaNs, fmax()\n" |
35375 | " * returns a NaN.\n" |
35376 | " */\n" |
35377 | "float __ovld __cnfn fmax(float x, float y);\n" |
35378 | "float2 __ovld __cnfn fmax(float2 x, float2 y);\n" |
35379 | "float3 __ovld __cnfn fmax(float3 x, float3 y);\n" |
35380 | "float4 __ovld __cnfn fmax(float4 x, float4 y);\n" |
35381 | "float8 __ovld __cnfn fmax(float8 x, float8 y);\n" |
35382 | "float16 __ovld __cnfn fmax(float16 x, float16 y);\n" |
35383 | "float2 __ovld __cnfn fmax(float2 x, float y);\n" |
35384 | "float3 __ovld __cnfn fmax(float3 x, float y);\n" |
35385 | "float4 __ovld __cnfn fmax(float4 x, float y);\n" |
35386 | "float8 __ovld __cnfn fmax(float8 x, float y);\n" |
35387 | "float16 __ovld __cnfn fmax(float16 x, float y);\n" |
35388 | "#ifdef cl_khr_fp64\n" |
35389 | "double __ovld __cnfn fmax(double x, double y);\n" |
35390 | "double2 __ovld __cnfn fmax(double2 x, double2 y);\n" |
35391 | "double3 __ovld __cnfn fmax(double3 x, double3 y);\n" |
35392 | "double4 __ovld __cnfn fmax(double4 x, double4 y);\n" |
35393 | "double8 __ovld __cnfn fmax(double8 x, double8 y);\n" |
35394 | "double16 __ovld __cnfn fmax(double16 x, double16 y);\n" |
35395 | "double2 __ovld __cnfn fmax(double2 x, double y);\n" |
35396 | "double3 __ovld __cnfn fmax(double3 x, double y);\n" |
35397 | "double4 __ovld __cnfn fmax(double4 x, double y);\n" |
35398 | "double8 __ovld __cnfn fmax(double8 x, double y);\n" |
35399 | "double16 __ovld __cnfn fmax(double16 x, double y);\n" |
35400 | "#endif //cl_khr_fp64\n" |
35401 | "#ifdef cl_khr_fp16\n" |
35402 | "half __ovld __cnfn fmax(half x, half y);\n" |
35403 | "half2 __ovld __cnfn fmax(half2 x, half2 y);\n" |
35404 | "half3 __ovld __cnfn fmax(half3 x, half3 y);\n" |
35405 | "half4 __ovld __cnfn fmax(half4 x, half4 y);\n" |
35406 | "half8 __ovld __cnfn fmax(half8 x, half8 y);\n" |
35407 | "half16 __ovld __cnfn fmax(half16 x, half16 y);\n" |
35408 | "half2 __ovld __cnfn fmax(half2 x, half y);\n" |
35409 | "half3 __ovld __cnfn fmax(half3 x, half y);\n" |
35410 | "half4 __ovld __cnfn fmax(half4 x, half y);\n" |
35411 | "half8 __ovld __cnfn fmax(half8 x, half y);\n" |
35412 | "half16 __ovld __cnfn fmax(half16 x, half y);\n" |
35413 | "#endif //cl_khr_fp16\n" |
35414 | "\n" |
35415 | "/**\n" |
35416 | " * Returns y if y < x, otherwise it returns x. If one\n" |
35417 | " * argument is a NaN, fmin() returns the other\n" |
35418 | " * argument. If both arguments are NaNs, fmin()\n" |
35419 | " * returns a NaN.\n" |
35420 | " */\n" |
35421 | "float __ovld __cnfn fmin(float x, float y);\n" |
35422 | "float2 __ovld __cnfn fmin(float2 x, float2 y);\n" |
35423 | "float3 __ovld __cnfn fmin(float3 x, float3 y);\n" |
35424 | "float4 __ovld __cnfn fmin(float4 x, float4 y);\n" |
35425 | "float8 __ovld __cnfn fmin(float8 x, float8 y);\n" |
35426 | "float16 __ovld __cnfn fmin(float16 x, float16 y);\n" |
35427 | "float2 __ovld __cnfn fmin(float2 x, float y);\n" |
35428 | "float3 __ovld __cnfn fmin(float3 x, float y);\n" |
35429 | "float4 __ovld __cnfn fmin(float4 x, float y);\n" |
35430 | "float8 __ovld __cnfn fmin(float8 x, float y);\n" |
35431 | "float16 __ovld __cnfn fmin(float16 x, float y);\n" |
35432 | "#ifdef cl_khr_fp64\n" |
35433 | "double __ovld __cnfn fmin(double x, double y);\n" |
35434 | "double2 __ovld __cnfn fmin(double2 x, double2 y);\n" |
35435 | "double3 __ovld __cnfn fmin(double3 x, double3 y);\n" |
35436 | "double4 __ovld __cnfn fmin(double4 x, double4 y);\n" |
35437 | "double8 __ovld __cnfn fmin(double8 x, double8 y);\n" |
35438 | "double16 __ovld __cnfn fmin(double16 x, double16 y);\n" |
35439 | "double2 __ovld __cnfn fmin(double2 x, double y);\n" |
35440 | "double3 __ovld __cnfn fmin(double3 x, double y);\n" |
35441 | "double4 __ovld __cnfn fmin(double4 x, double y);\n" |
35442 | "double8 __ovld __cnfn fmin(double8 x, double y);\n" |
35443 | "double16 __ovld __cnfn fmin(double16 x, double y);\n" |
35444 | "#endif //cl_khr_fp64\n" |
35445 | "#ifdef cl_khr_fp16\n" |
35446 | "half __ovld __cnfn fmin(half x, half y);\n" |
35447 | "half2 __ovld __cnfn fmin(half2 x, half2 y);\n" |
35448 | "half3 __ovld __cnfn fmin(half3 x, half3 y);\n" |
35449 | "half4 __ovld __cnfn fmin(half4 x, half4 y);\n" |
35450 | "half8 __ovld __cnfn fmin(half8 x, half8 y);\n" |
35451 | "half16 __ovld __cnfn fmin(half16 x, half16 y);\n" |
35452 | "half2 __ovld __cnfn fmin(half2 x, half y);\n" |
35453 | "half3 __ovld __cnfn fmin(half3 x, half y);\n" |
35454 | "half4 __ovld __cnfn fmin(half4 x, half y);\n" |
35455 | "half8 __ovld __cnfn fmin(half8 x, half y);\n" |
35456 | "half16 __ovld __cnfn fmin(half16 x, half y);\n" |
35457 | "#endif //cl_khr_fp16\n" |
35458 | "\n" |
35459 | "/**\n" |
35460 | " * Modulus. Returns x - y * trunc (x/y).\n" |
35461 | " */\n" |
35462 | "float __ovld __cnfn fmod(float x, float y);\n" |
35463 | "float2 __ovld __cnfn fmod(float2 x, float2 y);\n" |
35464 | "float3 __ovld __cnfn fmod(float3 x, float3 y);\n" |
35465 | "float4 __ovld __cnfn fmod(float4 x, float4 y);\n" |
35466 | "float8 __ovld __cnfn fmod(float8 x, float8 y);\n" |
35467 | "float16 __ovld __cnfn fmod(float16 x, float16 y);\n" |
35468 | "#ifdef cl_khr_fp64\n" |
35469 | "double __ovld __cnfn fmod(double x, double y);\n" |
35470 | "double2 __ovld __cnfn fmod(double2 x, double2 y);\n" |
35471 | "double3 __ovld __cnfn fmod(double3 x, double3 y);\n" |
35472 | "double4 __ovld __cnfn fmod(double4 x, double4 y);\n" |
35473 | "double8 __ovld __cnfn fmod(double8 x, double8 y);\n" |
35474 | "double16 __ovld __cnfn fmod(double16 x, double16 y);\n" |
35475 | "#endif //cl_khr_fp64\n" |
35476 | "#ifdef cl_khr_fp16\n" |
35477 | "half __ovld __cnfn fmod(half x, half y);\n" |
35478 | "half2 __ovld __cnfn fmod(half2 x, half2 y);\n" |
35479 | "half3 __ovld __cnfn fmod(half3 x, half3 y);\n" |
35480 | "half4 __ovld __cnfn fmod(half4 x, half4 y);\n" |
35481 | "half8 __ovld __cnfn fmod(half8 x, half8 y);\n" |
35482 | "half16 __ovld __cnfn fmod(half16 x, half16 y);\n" |
35483 | "#endif //cl_khr_fp16\n" |
35484 | "\n" |
35485 | "/**\n" |
35486 | " * Returns fmin(x - floor (x), 0x1.fffffep-1f ).\n" |
35487 | " * floor(x) is returned in iptr.\n" |
35488 | " */\n" |
35489 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
35490 | "float __ovld fract(float x, float *iptr);\n" |
35491 | "float2 __ovld fract(float2 x, float2 *iptr);\n" |
35492 | "float3 __ovld fract(float3 x, float3 *iptr);\n" |
35493 | "float4 __ovld fract(float4 x, float4 *iptr);\n" |
35494 | "float8 __ovld fract(float8 x, float8 *iptr);\n" |
35495 | "float16 __ovld fract(float16 x, float16 *iptr);\n" |
35496 | "#ifdef cl_khr_fp64\n" |
35497 | "double __ovld fract(double x, double *iptr);\n" |
35498 | "double2 __ovld fract(double2 x, double2 *iptr);\n" |
35499 | "double3 __ovld fract(double3 x, double3 *iptr);\n" |
35500 | "double4 __ovld fract(double4 x, double4 *iptr);\n" |
35501 | "double8 __ovld fract(double8 x, double8 *iptr);\n" |
35502 | "double16 __ovld fract(double16 x, double16 *iptr);\n" |
35503 | "#endif //cl_khr_fp64\n" |
35504 | "#ifdef cl_khr_fp16\n" |
35505 | "half __ovld fract(half x, half *iptr);\n" |
35506 | "half2 __ovld fract(half2 x, half2 *iptr);\n" |
35507 | "half3 __ovld fract(half3 x, half3 *iptr);\n" |
35508 | "half4 __ovld fract(half4 x, half4 *iptr);\n" |
35509 | "half8 __ovld fract(half8 x, half8 *iptr);\n" |
35510 | "half16 __ovld fract(half16 x, half16 *iptr);\n" |
35511 | "#endif //cl_khr_fp16\n" |
35512 | "#else\n" |
35513 | "float __ovld fract(float x, __global float *iptr);\n" |
35514 | "float2 __ovld fract(float2 x, __global float2 *iptr);\n" |
35515 | "float3 __ovld fract(float3 x, __global float3 *iptr);\n" |
35516 | "float4 __ovld fract(float4 x, __global float4 *iptr);\n" |
35517 | "float8 __ovld fract(float8 x, __global float8 *iptr);\n" |
35518 | "float16 __ovld fract(float16 x, __global float16 *iptr);\n" |
35519 | "float __ovld fract(float x, __local float *iptr);\n" |
35520 | "float2 __ovld fract(float2 x, __local float2 *iptr);\n" |
35521 | "float3 __ovld fract(float3 x, __local float3 *iptr);\n" |
35522 | "float4 __ovld fract(float4 x, __local float4 *iptr);\n" |
35523 | "float8 __ovld fract(float8 x, __local float8 *iptr);\n" |
35524 | "float16 __ovld fract(float16 x, __local float16 *iptr);\n" |
35525 | "float __ovld fract(float x, __private float *iptr);\n" |
35526 | "float2 __ovld fract(float2 x, __private float2 *iptr);\n" |
35527 | "float3 __ovld fract(float3 x, __private float3 *iptr);\n" |
35528 | "float4 __ovld fract(float4 x, __private float4 *iptr);\n" |
35529 | "float8 __ovld fract(float8 x, __private float8 *iptr);\n" |
35530 | "float16 __ovld fract(float16 x, __private float16 *iptr);\n" |
35531 | "#ifdef cl_khr_fp64\n" |
35532 | "double __ovld fract(double x, __global double *iptr);\n" |
35533 | "double2 __ovld fract(double2 x, __global double2 *iptr);\n" |
35534 | "double3 __ovld fract(double3 x, __global double3 *iptr);\n" |
35535 | "double4 __ovld fract(double4 x, __global double4 *iptr);\n" |
35536 | "double8 __ovld fract(double8 x, __global double8 *iptr);\n" |
35537 | "double16 __ovld fract(double16 x, __global double16 *iptr);\n" |
35538 | "double __ovld fract(double x, __local double *iptr);\n" |
35539 | "double2 __ovld fract(double2 x, __local double2 *iptr);\n" |
35540 | "double3 __ovld fract(double3 x, __local double3 *iptr);\n" |
35541 | "double4 __ovld fract(double4 x, __local double4 *iptr);\n" |
35542 | "double8 __ovld fract(double8 x, __local double8 *iptr);\n" |
35543 | "double16 __ovld fract(double16 x, __local double16 *iptr);\n" |
35544 | "double __ovld fract(double x, __private double *iptr);\n" |
35545 | "double2 __ovld fract(double2 x, __private double2 *iptr);\n" |
35546 | "double3 __ovld fract(double3 x, __private double3 *iptr);\n" |
35547 | "double4 __ovld fract(double4 x, __private double4 *iptr);\n" |
35548 | "double8 __ovld fract(double8 x, __private double8 *iptr);\n" |
35549 | "double16 __ovld fract(double16 x, __private double16 *iptr);\n" |
35550 | "#endif //cl_khr_fp64\n" |
35551 | "#ifdef cl_khr_fp16\n" |
35552 | "half __ovld fract(half x, __global half *iptr);\n" |
35553 | "half2 __ovld fract(half2 x, __global half2 *iptr);\n" |
35554 | "half3 __ovld fract(half3 x, __global half3 *iptr);\n" |
35555 | "half4 __ovld fract(half4 x, __global half4 *iptr);\n" |
35556 | "half8 __ovld fract(half8 x, __global half8 *iptr);\n" |
35557 | "half16 __ovld fract(half16 x, __global half16 *iptr);\n" |
35558 | "half __ovld fract(half x, __local half *iptr);\n" |
35559 | "half2 __ovld fract(half2 x, __local half2 *iptr);\n" |
35560 | "half3 __ovld fract(half3 x, __local half3 *iptr);\n" |
35561 | "half4 __ovld fract(half4 x, __local half4 *iptr);\n" |
35562 | "half8 __ovld fract(half8 x, __local half8 *iptr);\n" |
35563 | "half16 __ovld fract(half16 x, __local half16 *iptr);\n" |
35564 | "half __ovld fract(half x, __private half *iptr);\n" |
35565 | "half2 __ovld fract(half2 x, __private half2 *iptr);\n" |
35566 | "half3 __ovld fract(half3 x, __private half3 *iptr);\n" |
35567 | "half4 __ovld fract(half4 x, __private half4 *iptr);\n" |
35568 | "half8 __ovld fract(half8 x, __private half8 *iptr);\n" |
35569 | "half16 __ovld fract(half16 x, __private half16 *iptr);\n" |
35570 | "#endif //cl_khr_fp16\n" |
35571 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
35572 | "\n" |
35573 | "/**\n" |
35574 | " * Extract mantissa and exponent from x. For each\n" |
35575 | " * component the mantissa returned is a float with\n" |
35576 | " * magnitude in the interval [1/2, 1) or 0. Each\n" |
35577 | " * component of x equals mantissa returned * 2^exp.\n" |
35578 | " */\n" |
35579 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
35580 | "float __ovld frexp(float x, int *exp);\n" |
35581 | "float2 __ovld frexp(float2 x, int2 *exp);\n" |
35582 | "float3 __ovld frexp(float3 x, int3 *exp);\n" |
35583 | "float4 __ovld frexp(float4 x, int4 *exp);\n" |
35584 | "float8 __ovld frexp(float8 x, int8 *exp);\n" |
35585 | "float16 __ovld frexp(float16 x, int16 *exp);\n" |
35586 | "#ifdef cl_khr_fp64\n" |
35587 | "double __ovld frexp(double x, int *exp);\n" |
35588 | "double2 __ovld frexp(double2 x, int2 *exp);\n" |
35589 | "double3 __ovld frexp(double3 x, int3 *exp);\n" |
35590 | "double4 __ovld frexp(double4 x, int4 *exp);\n" |
35591 | "double8 __ovld frexp(double8 x, int8 *exp);\n" |
35592 | "double16 __ovld frexp(double16 x, int16 *exp);\n" |
35593 | "#endif //cl_khr_fp64\n" |
35594 | "#ifdef cl_khr_fp16\n" |
35595 | "half __ovld frexp(half x, int *exp);\n" |
35596 | "half2 __ovld frexp(half2 x, int2 *exp);\n" |
35597 | "half3 __ovld frexp(half3 x, int3 *exp);\n" |
35598 | "half4 __ovld frexp(half4 x, int4 *exp);\n" |
35599 | "half8 __ovld frexp(half8 x, int8 *exp);\n" |
35600 | "half16 __ovld frexp(half16 x, int16 *exp);\n" |
35601 | "#endif //cl_khr_fp16\n" |
35602 | "#else\n" |
35603 | "float __ovld frexp(float x, __global int *exp);\n" |
35604 | "float2 __ovld frexp(float2 x, __global int2 *exp);\n" |
35605 | "float3 __ovld frexp(float3 x, __global int3 *exp);\n" |
35606 | "float4 __ovld frexp(float4 x, __global int4 *exp);\n" |
35607 | "float8 __ovld frexp(float8 x, __global int8 *exp);\n" |
35608 | "float16 __ovld frexp(float16 x, __global int16 *exp);\n" |
35609 | "float __ovld frexp(float x, __local int *exp);\n" |
35610 | "float2 __ovld frexp(float2 x, __local int2 *exp);\n" |
35611 | "float3 __ovld frexp(float3 x, __local int3 *exp);\n" |
35612 | "float4 __ovld frexp(float4 x, __local int4 *exp);\n" |
35613 | "float8 __ovld frexp(float8 x, __local int8 *exp);\n" |
35614 | "float16 __ovld frexp(float16 x, __local int16 *exp);\n" |
35615 | "float __ovld frexp(float x, __private int *exp);\n" |
35616 | "float2 __ovld frexp(float2 x, __private int2 *exp);\n" |
35617 | "float3 __ovld frexp(float3 x, __private int3 *exp);\n" |
35618 | "float4 __ovld frexp(float4 x, __private int4 *exp);\n" |
35619 | "float8 __ovld frexp(float8 x, __private int8 *exp);\n" |
35620 | "float16 __ovld frexp(float16 x, __private int16 *exp);\n" |
35621 | "#ifdef cl_khr_fp64\n" |
35622 | "double __ovld frexp(double x, __global int *exp);\n" |
35623 | "double2 __ovld frexp(double2 x, __global int2 *exp);\n" |
35624 | "double3 __ovld frexp(double3 x, __global int3 *exp);\n" |
35625 | "double4 __ovld frexp(double4 x, __global int4 *exp);\n" |
35626 | "double8 __ovld frexp(double8 x, __global int8 *exp);\n" |
35627 | "double16 __ovld frexp(double16 x, __global int16 *exp);\n" |
35628 | "double __ovld frexp(double x, __local int *exp);\n" |
35629 | "double2 __ovld frexp(double2 x, __local int2 *exp);\n" |
35630 | "double3 __ovld frexp(double3 x, __local int3 *exp);\n" |
35631 | "double4 __ovld frexp(double4 x, __local int4 *exp);\n" |
35632 | "double8 __ovld frexp(double8 x, __local int8 *exp);\n" |
35633 | "double16 __ovld frexp(double16 x, __local int16 *exp);\n" |
35634 | "double __ovld frexp(double x, __private int *exp);\n" |
35635 | "double2 __ovld frexp(double2 x, __private int2 *exp);\n" |
35636 | "double3 __ovld frexp(double3 x, __private int3 *exp);\n" |
35637 | "double4 __ovld frexp(double4 x, __private int4 *exp);\n" |
35638 | "double8 __ovld frexp(double8 x, __private int8 *exp);\n" |
35639 | "double16 __ovld frexp(double16 x, __private int16 *exp);\n" |
35640 | "#endif //cl_khr_fp64\n" |
35641 | "#ifdef cl_khr_fp16\n" |
35642 | "half __ovld frexp(half x, __global int *exp);\n" |
35643 | "half2 __ovld frexp(half2 x, __global int2 *exp);\n" |
35644 | "half3 __ovld frexp(half3 x, __global int3 *exp);\n" |
35645 | "half4 __ovld frexp(half4 x, __global int4 *exp);\n" |
35646 | "half8 __ovld frexp(half8 x, __global int8 *exp);\n" |
35647 | "half16 __ovld frexp(half16 x, __global int16 *exp);\n" |
35648 | "half __ovld frexp(half x, __local int *exp);\n" |
35649 | "half2 __ovld frexp(half2 x, __local int2 *exp);\n" |
35650 | "half3 __ovld frexp(half3 x, __local int3 *exp);\n" |
35651 | "half4 __ovld frexp(half4 x, __local int4 *exp);\n" |
35652 | "half8 __ovld frexp(half8 x, __local int8 *exp);\n" |
35653 | "half16 __ovld frexp(half16 x, __local int16 *exp);\n" |
35654 | "half __ovld frexp(half x, __private int *exp);\n" |
35655 | "half2 __ovld frexp(half2 x, __private int2 *exp);\n" |
35656 | "half3 __ovld frexp(half3 x, __private int3 *exp);\n" |
35657 | "half4 __ovld frexp(half4 x, __private int4 *exp);\n" |
35658 | "half8 __ovld frexp(half8 x, __private int8 *exp);\n" |
35659 | "half16 __ovld frexp(half16 x, __private int16 *exp);\n" |
35660 | "#endif //cl_khr_fp16\n" |
35661 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
35662 | "\n" |
35663 | "/**\n" |
35664 | " * Compute the value of the square root of x^2 + y^2\n" |
35665 | " * without undue overflow or underflow.\n" |
35666 | " */\n" |
35667 | "float __ovld __cnfn hypot(float x, float y);\n" |
35668 | "float2 __ovld __cnfn hypot(float2 x, float2 y);\n" |
35669 | "float3 __ovld __cnfn hypot(float3 x, float3 y);\n" |
35670 | "float4 __ovld __cnfn hypot(float4 x, float4 y);\n" |
35671 | "float8 __ovld __cnfn hypot(float8 x, float8 y);\n" |
35672 | "float16 __ovld __cnfn hypot(float16 x, float16 y);\n" |
35673 | "#ifdef cl_khr_fp64\n" |
35674 | "double __ovld __cnfn hypot(double x, double y);\n" |
35675 | "double2 __ovld __cnfn hypot(double2 x, double2 y);\n" |
35676 | "double3 __ovld __cnfn hypot(double3 x, double3 y);\n" |
35677 | "double4 __ovld __cnfn hypot(double4 x, double4 y);\n" |
35678 | "double8 __ovld __cnfn hypot(double8 x, double8 y);\n" |
35679 | "double16 __ovld __cnfn hypot(double16 x, double16 y);\n" |
35680 | "#endif //cl_khr_fp64\n" |
35681 | "#ifdef cl_khr_fp16\n" |
35682 | "half __ovld __cnfn hypot(half x, half y);\n" |
35683 | "half2 __ovld __cnfn hypot(half2 x, half2 y);\n" |
35684 | "half3 __ovld __cnfn hypot(half3 x, half3 y);\n" |
35685 | "half4 __ovld __cnfn hypot(half4 x, half4 y);\n" |
35686 | "half8 __ovld __cnfn hypot(half8 x, half8 y);\n" |
35687 | "half16 __ovld __cnfn hypot(half16 x, half16 y);\n" |
35688 | "#endif //cl_khr_fp16\n" |
35689 | "\n" |
35690 | "/**\n" |
35691 | " * Return the exponent as an integer value.\n" |
35692 | " */\n" |
35693 | "int __ovld __cnfn ilogb(float x);\n" |
35694 | "int2 __ovld __cnfn ilogb(float2 x);\n" |
35695 | "int3 __ovld __cnfn ilogb(float3 x);\n" |
35696 | "int4 __ovld __cnfn ilogb(float4 x);\n" |
35697 | "int8 __ovld __cnfn ilogb(float8 x);\n" |
35698 | "int16 __ovld __cnfn ilogb(float16 x);\n" |
35699 | "#ifdef cl_khr_fp64\n" |
35700 | "int __ovld __cnfn ilogb(double x);\n" |
35701 | "int2 __ovld __cnfn ilogb(double2 x);\n" |
35702 | "int3 __ovld __cnfn ilogb(double3 x);\n" |
35703 | "int4 __ovld __cnfn ilogb(double4 x);\n" |
35704 | "int8 __ovld __cnfn ilogb(double8 x);\n" |
35705 | "int16 __ovld __cnfn ilogb(double16 x);\n" |
35706 | "#endif //cl_khr_fp64\n" |
35707 | "#ifdef cl_khr_fp16\n" |
35708 | "int __ovld __cnfn ilogb(half x);\n" |
35709 | "int2 __ovld __cnfn ilogb(half2 x);\n" |
35710 | "int3 __ovld __cnfn ilogb(half3 x);\n" |
35711 | "int4 __ovld __cnfn ilogb(half4 x);\n" |
35712 | "int8 __ovld __cnfn ilogb(half8 x);\n" |
35713 | "int16 __ovld __cnfn ilogb(half16 x);\n" |
35714 | "#endif //cl_khr_fp16\n" |
35715 | "\n" |
35716 | "/**\n" |
35717 | " * Multiply x by 2 to the power n.\n" |
35718 | " */\n" |
35719 | "float __ovld __cnfn ldexp(float x, int n);\n" |
35720 | "float2 __ovld __cnfn ldexp(float2 x, int2 n);\n" |
35721 | "float3 __ovld __cnfn ldexp(float3 x, int3 n);\n" |
35722 | "float4 __ovld __cnfn ldexp(float4 x, int4 n);\n" |
35723 | "float8 __ovld __cnfn ldexp(float8 x, int8 n);\n" |
35724 | "float16 __ovld __cnfn ldexp(float16 x, int16 n);\n" |
35725 | "float2 __ovld __cnfn ldexp(float2 x, int n);\n" |
35726 | "float3 __ovld __cnfn ldexp(float3 x, int n);\n" |
35727 | "float4 __ovld __cnfn ldexp(float4 x, int n);\n" |
35728 | "float8 __ovld __cnfn ldexp(float8 x, int n);\n" |
35729 | "float16 __ovld __cnfn ldexp(float16 x, int n);\n" |
35730 | "#ifdef cl_khr_fp64\n" |
35731 | "double __ovld __cnfn ldexp(double x, int n);\n" |
35732 | "double2 __ovld __cnfn ldexp(double2 x, int2 n);\n" |
35733 | "double3 __ovld __cnfn ldexp(double3 x, int3 n);\n" |
35734 | "double4 __ovld __cnfn ldexp(double4 x, int4 n);\n" |
35735 | "double8 __ovld __cnfn ldexp(double8 x, int8 n);\n" |
35736 | "double16 __ovld __cnfn ldexp(double16 x, int16 n);\n" |
35737 | "double2 __ovld __cnfn ldexp(double2 x, int n);\n" |
35738 | "double3 __ovld __cnfn ldexp(double3 x, int n);\n" |
35739 | "double4 __ovld __cnfn ldexp(double4 x, int n);\n" |
35740 | "double8 __ovld __cnfn ldexp(double8 x, int n);\n" |
35741 | "double16 __ovld __cnfn ldexp(double16 x, int n);\n" |
35742 | "#endif //cl_khr_fp64\n" |
35743 | "#ifdef cl_khr_fp16\n" |
35744 | "half __ovld __cnfn ldexp(half x, int n);\n" |
35745 | "half2 __ovld __cnfn ldexp(half2 x, int2 n);\n" |
35746 | "half3 __ovld __cnfn ldexp(half3 x, int3 n);\n" |
35747 | "half4 __ovld __cnfn ldexp(half4 x, int4 n);\n" |
35748 | "half8 __ovld __cnfn ldexp(half8 x, int8 n);\n" |
35749 | "half16 __ovld __cnfn ldexp(half16 x, int16 n);\n" |
35750 | "half2 __ovld __cnfn ldexp(half2 x, int n);\n" |
35751 | "half3 __ovld __cnfn ldexp(half3 x, int n);\n" |
35752 | "half4 __ovld __cnfn ldexp(half4 x, int n);\n" |
35753 | "half8 __ovld __cnfn ldexp(half8 x, int n);\n" |
35754 | "half16 __ovld __cnfn ldexp(half16 x, int n);\n" |
35755 | "#endif //cl_khr_fp16\n" |
35756 | "\n" |
35757 | "/**\n" |
35758 | " * Log gamma function. Returns the natural\n" |
35759 | " * logarithm of the absolute value of the gamma\n" |
35760 | " * function. The sign of the gamma function is\n" |
35761 | " * returned in the signp argument of lgamma_r.\n" |
35762 | " */\n" |
35763 | "float __ovld __cnfn lgamma(float x);\n" |
35764 | "float2 __ovld __cnfn lgamma(float2 x);\n" |
35765 | "float3 __ovld __cnfn lgamma(float3 x);\n" |
35766 | "float4 __ovld __cnfn lgamma(float4 x);\n" |
35767 | "float8 __ovld __cnfn lgamma(float8 x);\n" |
35768 | "float16 __ovld __cnfn lgamma(float16 x);\n" |
35769 | "#ifdef cl_khr_fp64\n" |
35770 | "double __ovld __cnfn lgamma(double x);\n" |
35771 | "double2 __ovld __cnfn lgamma(double2 x);\n" |
35772 | "double3 __ovld __cnfn lgamma(double3 x);\n" |
35773 | "double4 __ovld __cnfn lgamma(double4 x);\n" |
35774 | "double8 __ovld __cnfn lgamma(double8 x);\n" |
35775 | "double16 __ovld __cnfn lgamma(double16 x);\n" |
35776 | "#endif //cl_khr_fp64\n" |
35777 | "#ifdef cl_khr_fp16\n" |
35778 | "half __ovld __cnfn lgamma(half x);\n" |
35779 | "half2 __ovld __cnfn lgamma(half2 x);\n" |
35780 | "half3 __ovld __cnfn lgamma(half3 x);\n" |
35781 | "half4 __ovld __cnfn lgamma(half4 x);\n" |
35782 | "half8 __ovld __cnfn lgamma(half8 x);\n" |
35783 | "half16 __ovld __cnfn lgamma(half16 x);\n" |
35784 | "#endif //cl_khr_fp16\n" |
35785 | "\n" |
35786 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
35787 | "float __ovld lgamma_r(float x, int *signp);\n" |
35788 | "float2 __ovld lgamma_r(float2 x, int2 *signp);\n" |
35789 | "float3 __ovld lgamma_r(float3 x, int3 *signp);\n" |
35790 | "float4 __ovld lgamma_r(float4 x, int4 *signp);\n" |
35791 | "float8 __ovld lgamma_r(float8 x, int8 *signp);\n" |
35792 | "float16 __ovld lgamma_r(float16 x, int16 *signp);\n" |
35793 | "#ifdef cl_khr_fp64\n" |
35794 | "double __ovld lgamma_r(double x, int *signp);\n" |
35795 | "double2 __ovld lgamma_r(double2 x, int2 *signp);\n" |
35796 | "double3 __ovld lgamma_r(double3 x, int3 *signp);\n" |
35797 | "double4 __ovld lgamma_r(double4 x, int4 *signp);\n" |
35798 | "double8 __ovld lgamma_r(double8 x, int8 *signp);\n" |
35799 | "double16 __ovld lgamma_r(double16 x, int16 *signp);\n" |
35800 | "#endif //cl_khr_fp64\n" |
35801 | "#ifdef cl_khr_fp16\n" |
35802 | "half __ovld lgamma_r(half x, int *signp);\n" |
35803 | "half2 __ovld lgamma_r(half2 x, int2 *signp);\n" |
35804 | "half3 __ovld lgamma_r(half3 x, int3 *signp);\n" |
35805 | "half4 __ovld lgamma_r(half4 x, int4 *signp);\n" |
35806 | "half8 __ovld lgamma_r(half8 x, int8 *signp);\n" |
35807 | "half16 __ovld lgamma_r(half16 x, int16 *signp);\n" |
35808 | "#endif //cl_khr_fp16\n" |
35809 | "#else\n" |
35810 | "float __ovld lgamma_r(float x, __global int *signp);\n" |
35811 | "float2 __ovld lgamma_r(float2 x, __global int2 *signp);\n" |
35812 | "float3 __ovld lgamma_r(float3 x, __global int3 *signp);\n" |
35813 | "float4 __ovld lgamma_r(float4 x, __global int4 *signp);\n" |
35814 | "float8 __ovld lgamma_r(float8 x, __global int8 *signp);\n" |
35815 | "float16 __ovld lgamma_r(float16 x, __global int16 *signp);\n" |
35816 | "float __ovld lgamma_r(float x, __local int *signp);\n" |
35817 | "float2 __ovld lgamma_r(float2 x, __local int2 *signp);\n" |
35818 | "float3 __ovld lgamma_r(float3 x, __local int3 *signp);\n" |
35819 | "float4 __ovld lgamma_r(float4 x, __local int4 *signp);\n" |
35820 | "float8 __ovld lgamma_r(float8 x, __local int8 *signp);\n" |
35821 | "float16 __ovld lgamma_r(float16 x, __local int16 *signp);\n" |
35822 | "float __ovld lgamma_r(float x, __private int *signp);\n" |
35823 | "float2 __ovld lgamma_r(float2 x, __private int2 *signp);\n" |
35824 | "float3 __ovld lgamma_r(float3 x, __private int3 *signp);\n" |
35825 | "float4 __ovld lgamma_r(float4 x, __private int4 *signp);\n" |
35826 | "float8 __ovld lgamma_r(float8 x, __private int8 *signp);\n" |
35827 | "float16 __ovld lgamma_r(float16 x, __private int16 *signp);\n" |
35828 | "#ifdef cl_khr_fp64\n" |
35829 | "double __ovld lgamma_r(double x, __global int *signp);\n" |
35830 | "double2 __ovld lgamma_r(double2 x, __global int2 *signp);\n" |
35831 | "double3 __ovld lgamma_r(double3 x, __global int3 *signp);\n" |
35832 | "double4 __ovld lgamma_r(double4 x, __global int4 *signp);\n" |
35833 | "double8 __ovld lgamma_r(double8 x, __global int8 *signp);\n" |
35834 | "double16 __ovld lgamma_r(double16 x, __global int16 *signp);\n" |
35835 | "double __ovld lgamma_r(double x, __local int *signp);\n" |
35836 | "double2 __ovld lgamma_r(double2 x, __local int2 *signp);\n" |
35837 | "double3 __ovld lgamma_r(double3 x, __local int3 *signp);\n" |
35838 | "double4 __ovld lgamma_r(double4 x, __local int4 *signp);\n" |
35839 | "double8 __ovld lgamma_r(double8 x, __local int8 *signp);\n" |
35840 | "double16 __ovld lgamma_r(double16 x, __local int16 *signp);\n" |
35841 | "double __ovld lgamma_r(double x, __private int *signp);\n" |
35842 | "double2 __ovld lgamma_r(double2 x, __private int2 *signp);\n" |
35843 | "double3 __ovld lgamma_r(double3 x, __private int3 *signp);\n" |
35844 | "double4 __ovld lgamma_r(double4 x, __private int4 *signp);\n" |
35845 | "double8 __ovld lgamma_r(double8 x, __private int8 *signp);\n" |
35846 | "double16 __ovld lgamma_r(double16 x, __private int16 *signp);\n" |
35847 | "#endif //cl_khr_fp64\n" |
35848 | "#ifdef cl_khr_fp16\n" |
35849 | "half __ovld lgamma_r(half x, __global int *signp);\n" |
35850 | "half2 __ovld lgamma_r(half2 x, __global int2 *signp);\n" |
35851 | "half3 __ovld lgamma_r(half3 x, __global int3 *signp);\n" |
35852 | "half4 __ovld lgamma_r(half4 x, __global int4 *signp);\n" |
35853 | "half8 __ovld lgamma_r(half8 x, __global int8 *signp);\n" |
35854 | "half16 __ovld lgamma_r(half16 x, __global int16 *signp);\n" |
35855 | "half __ovld lgamma_r(half x, __local int *signp);\n" |
35856 | "half2 __ovld lgamma_r(half2 x, __local int2 *signp);\n" |
35857 | "half3 __ovld lgamma_r(half3 x, __local int3 *signp);\n" |
35858 | "half4 __ovld lgamma_r(half4 x, __local int4 *signp);\n" |
35859 | "half8 __ovld lgamma_r(half8 x, __local int8 *signp);\n" |
35860 | "half16 __ovld lgamma_r(half16 x, __local int16 *signp);\n" |
35861 | "half __ovld lgamma_r(half x, __private int *signp);\n" |
35862 | "half2 __ovld lgamma_r(half2 x, __private int2 *signp);\n" |
35863 | "half3 __ovld lgamma_r(half3 x, __private int3 *signp);\n" |
35864 | "half4 __ovld lgamma_r(half4 x, __private int4 *signp);\n" |
35865 | "half8 __ovld lgamma_r(half8 x, __private int8 *signp);\n" |
35866 | "half16 __ovld lgamma_r(half16 x, __private int16 *signp);\n" |
35867 | "#endif //cl_khr_fp16\n" |
35868 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
35869 | "\n" |
35870 | "/**\n" |
35871 | " * Compute natural logarithm.\n" |
35872 | " */\n" |
35873 | "float __ovld __cnfn log(float);\n" |
35874 | "float2 __ovld __cnfn log(float2);\n" |
35875 | "float3 __ovld __cnfn log(float3);\n" |
35876 | "float4 __ovld __cnfn log(float4);\n" |
35877 | "float8 __ovld __cnfn log(float8);\n" |
35878 | "float16 __ovld __cnfn log(float16);\n" |
35879 | "#ifdef cl_khr_fp64\n" |
35880 | "double __ovld __cnfn log(double);\n" |
35881 | "double2 __ovld __cnfn log(double2);\n" |
35882 | "double3 __ovld __cnfn log(double3);\n" |
35883 | "double4 __ovld __cnfn log(double4);\n" |
35884 | "double8 __ovld __cnfn log(double8);\n" |
35885 | "double16 __ovld __cnfn log(double16);\n" |
35886 | "#endif //cl_khr_fp64\n" |
35887 | "#ifdef cl_khr_fp16\n" |
35888 | "half __ovld __cnfn log(half);\n" |
35889 | "half2 __ovld __cnfn log(half2);\n" |
35890 | "half3 __ovld __cnfn log(half3);\n" |
35891 | "half4 __ovld __cnfn log(half4);\n" |
35892 | "half8 __ovld __cnfn log(half8);\n" |
35893 | "half16 __ovld __cnfn log(half16);\n" |
35894 | "#endif //cl_khr_fp16\n" |
35895 | "\n" |
35896 | "/**\n" |
35897 | " * Compute a base 2 logarithm.\n" |
35898 | " */\n" |
35899 | "float __ovld __cnfn log2(float);\n" |
35900 | "float2 __ovld __cnfn log2(float2);\n" |
35901 | "float3 __ovld __cnfn log2(float3);\n" |
35902 | "float4 __ovld __cnfn log2(float4);\n" |
35903 | "float8 __ovld __cnfn log2(float8);\n" |
35904 | "float16 __ovld __cnfn log2(float16);\n" |
35905 | "#ifdef cl_khr_fp64\n" |
35906 | "double __ovld __cnfn log2(double);\n" |
35907 | "double2 __ovld __cnfn log2(double2);\n" |
35908 | "double3 __ovld __cnfn log2(double3);\n" |
35909 | "double4 __ovld __cnfn log2(double4);\n" |
35910 | "double8 __ovld __cnfn log2(double8);\n" |
35911 | "double16 __ovld __cnfn log2(double16);\n" |
35912 | "#endif //cl_khr_fp64\n" |
35913 | "#ifdef cl_khr_fp16\n" |
35914 | "half __ovld __cnfn log2(half);\n" |
35915 | "half2 __ovld __cnfn log2(half2);\n" |
35916 | "half3 __ovld __cnfn log2(half3);\n" |
35917 | "half4 __ovld __cnfn log2(half4);\n" |
35918 | "half8 __ovld __cnfn log2(half8);\n" |
35919 | "half16 __ovld __cnfn log2(half16);\n" |
35920 | "#endif //cl_khr_fp16\n" |
35921 | "\n" |
35922 | "/**\n" |
35923 | " * Compute a base 10 logarithm.\n" |
35924 | " */\n" |
35925 | "float __ovld __cnfn log10(float);\n" |
35926 | "float2 __ovld __cnfn log10(float2);\n" |
35927 | "float3 __ovld __cnfn log10(float3);\n" |
35928 | "float4 __ovld __cnfn log10(float4);\n" |
35929 | "float8 __ovld __cnfn log10(float8);\n" |
35930 | "float16 __ovld __cnfn log10(float16);\n" |
35931 | "#ifdef cl_khr_fp64\n" |
35932 | "double __ovld __cnfn log10(double);\n" |
35933 | "double2 __ovld __cnfn log10(double2);\n" |
35934 | "double3 __ovld __cnfn log10(double3);\n" |
35935 | "double4 __ovld __cnfn log10(double4);\n" |
35936 | "double8 __ovld __cnfn log10(double8);\n" |
35937 | "double16 __ovld __cnfn log10(double16);\n" |
35938 | "#endif //cl_khr_fp64\n" |
35939 | "#ifdef cl_khr_fp16\n" |
35940 | "half __ovld __cnfn log10(half);\n" |
35941 | "half2 __ovld __cnfn log10(half2);\n" |
35942 | "half3 __ovld __cnfn log10(half3);\n" |
35943 | "half4 __ovld __cnfn log10(half4);\n" |
35944 | "half8 __ovld __cnfn log10(half8);\n" |
35945 | "half16 __ovld __cnfn log10(half16);\n" |
35946 | "#endif //cl_khr_fp16\n" |
35947 | "\n" |
35948 | "/**\n" |
35949 | " * Compute a base e logarithm of (1.0 + x).\n" |
35950 | " */\n" |
35951 | "float __ovld __cnfn log1p(float x);\n" |
35952 | "float2 __ovld __cnfn log1p(float2 x);\n" |
35953 | "float3 __ovld __cnfn log1p(float3 x);\n" |
35954 | "float4 __ovld __cnfn log1p(float4 x);\n" |
35955 | "float8 __ovld __cnfn log1p(float8 x);\n" |
35956 | "float16 __ovld __cnfn log1p(float16 x);\n" |
35957 | "#ifdef cl_khr_fp64\n" |
35958 | "double __ovld __cnfn log1p(double x);\n" |
35959 | "double2 __ovld __cnfn log1p(double2 x);\n" |
35960 | "double3 __ovld __cnfn log1p(double3 x);\n" |
35961 | "double4 __ovld __cnfn log1p(double4 x);\n" |
35962 | "double8 __ovld __cnfn log1p(double8 x);\n" |
35963 | "double16 __ovld __cnfn log1p(double16 x);\n" |
35964 | "#endif //cl_khr_fp64\n" |
35965 | "#ifdef cl_khr_fp16\n" |
35966 | "half __ovld __cnfn log1p(half x);\n" |
35967 | "half2 __ovld __cnfn log1p(half2 x);\n" |
35968 | "half3 __ovld __cnfn log1p(half3 x);\n" |
35969 | "half4 __ovld __cnfn log1p(half4 x);\n" |
35970 | "half8 __ovld __cnfn log1p(half8 x);\n" |
35971 | "half16 __ovld __cnfn log1p(half16 x);\n" |
35972 | "#endif //cl_khr_fp16\n" |
35973 | "\n" |
35974 | "/**\n" |
35975 | " * Compute the exponent of x, which is the integral\n" |
35976 | " * part of logr | x |.\n" |
35977 | " */\n" |
35978 | "float __ovld __cnfn logb(float x);\n" |
35979 | "float2 __ovld __cnfn logb(float2 x);\n" |
35980 | "float3 __ovld __cnfn logb(float3 x);\n" |
35981 | "float4 __ovld __cnfn logb(float4 x);\n" |
35982 | "float8 __ovld __cnfn logb(float8 x);\n" |
35983 | "float16 __ovld __cnfn logb(float16 x);\n" |
35984 | "#ifdef cl_khr_fp64\n" |
35985 | "double __ovld __cnfn logb(double x);\n" |
35986 | "double2 __ovld __cnfn logb(double2 x);\n" |
35987 | "double3 __ovld __cnfn logb(double3 x);\n" |
35988 | "double4 __ovld __cnfn logb(double4 x);\n" |
35989 | "double8 __ovld __cnfn logb(double8 x);\n" |
35990 | "double16 __ovld __cnfn logb(double16 x);\n" |
35991 | "#endif //cl_khr_fp64\n" |
35992 | "#ifdef cl_khr_fp16\n" |
35993 | "half __ovld __cnfn logb(half x);\n" |
35994 | "half2 __ovld __cnfn logb(half2 x);\n" |
35995 | "half3 __ovld __cnfn logb(half3 x);\n" |
35996 | "half4 __ovld __cnfn logb(half4 x);\n" |
35997 | "half8 __ovld __cnfn logb(half8 x);\n" |
35998 | "half16 __ovld __cnfn logb(half16 x);\n" |
35999 | "#endif //cl_khr_fp16\n" |
36000 | "\n" |
36001 | "/**\n" |
36002 | " * mad approximates a * b + c. Whether or how the\n" |
36003 | " * product of a * b is rounded and how supernormal or\n" |
36004 | " * subnormal intermediate products are handled is not\n" |
36005 | " * defined. mad is intended to be used where speed is\n" |
36006 | " * preferred over accuracy.\n" |
36007 | " */\n" |
36008 | "float __ovld __cnfn mad(float a, float b, float c);\n" |
36009 | "float2 __ovld __cnfn mad(float2 a, float2 b, float2 c);\n" |
36010 | "float3 __ovld __cnfn mad(float3 a, float3 b, float3 c);\n" |
36011 | "float4 __ovld __cnfn mad(float4 a, float4 b, float4 c);\n" |
36012 | "float8 __ovld __cnfn mad(float8 a, float8 b, float8 c);\n" |
36013 | "float16 __ovld __cnfn mad(float16 a, float16 b, float16 c);\n" |
36014 | "#ifdef cl_khr_fp64\n" |
36015 | "double __ovld __cnfn mad(double a, double b, double c);\n" |
36016 | "double2 __ovld __cnfn mad(double2 a, double2 b, double2 c);\n" |
36017 | "double3 __ovld __cnfn mad(double3 a, double3 b, double3 c);\n" |
36018 | "double4 __ovld __cnfn mad(double4 a, double4 b, double4 c);\n" |
36019 | "double8 __ovld __cnfn mad(double8 a, double8 b, double8 c);\n" |
36020 | "double16 __ovld __cnfn mad(double16 a, double16 b, double16 c);\n" |
36021 | "#endif //cl_khr_fp64\n" |
36022 | "#ifdef cl_khr_fp16\n" |
36023 | "half __ovld __cnfn mad(half a, half b, half c);\n" |
36024 | "half2 __ovld __cnfn mad(half2 a, half2 b, half2 c);\n" |
36025 | "half3 __ovld __cnfn mad(half3 a, half3 b, half3 c);\n" |
36026 | "half4 __ovld __cnfn mad(half4 a, half4 b, half4 c);\n" |
36027 | "half8 __ovld __cnfn mad(half8 a, half8 b, half8 c);\n" |
36028 | "half16 __ovld __cnfn mad(half16 a, half16 b, half16 c);\n" |
36029 | "#endif //cl_khr_fp16\n" |
36030 | "\n" |
36031 | "/**\n" |
36032 | " * Returns x if | x | > | y |, y if | y | > | x |, otherwise\n" |
36033 | " * fmax(x, y).\n" |
36034 | " */\n" |
36035 | "float __ovld __cnfn maxmag(float x, float y);\n" |
36036 | "float2 __ovld __cnfn maxmag(float2 x, float2 y);\n" |
36037 | "float3 __ovld __cnfn maxmag(float3 x, float3 y);\n" |
36038 | "float4 __ovld __cnfn maxmag(float4 x, float4 y);\n" |
36039 | "float8 __ovld __cnfn maxmag(float8 x, float8 y);\n" |
36040 | "float16 __ovld __cnfn maxmag(float16 x, float16 y);\n" |
36041 | "#ifdef cl_khr_fp64\n" |
36042 | "double __ovld __cnfn maxmag(double x, double y);\n" |
36043 | "double2 __ovld __cnfn maxmag(double2 x, double2 y);\n" |
36044 | "double3 __ovld __cnfn maxmag(double3 x, double3 y);\n" |
36045 | "double4 __ovld __cnfn maxmag(double4 x, double4 y);\n" |
36046 | "double8 __ovld __cnfn maxmag(double8 x, double8 y);\n" |
36047 | "double16 __ovld __cnfn maxmag(double16 x, double16 y);\n" |
36048 | "#endif //cl_khr_fp64\n" |
36049 | "#ifdef cl_khr_fp16\n" |
36050 | "half __ovld __cnfn maxmag(half x, half y);\n" |
36051 | "half2 __ovld __cnfn maxmag(half2 x, half2 y);\n" |
36052 | "half3 __ovld __cnfn maxmag(half3 x, half3 y);\n" |
36053 | "half4 __ovld __cnfn maxmag(half4 x, half4 y);\n" |
36054 | "half8 __ovld __cnfn maxmag(half8 x, half8 y);\n" |
36055 | "half16 __ovld __cnfn maxmag(half16 x, half16 y);\n" |
36056 | "#endif //cl_khr_fp16\n" |
36057 | "\n" |
36058 | "/**\n" |
36059 | " * Returns x if | x | < | y |, y if | y | < | x |, otherwise\n" |
36060 | " * fmin(x, y).\n" |
36061 | " */\n" |
36062 | "float __ovld __cnfn minmag(float x, float y);\n" |
36063 | "float2 __ovld __cnfn minmag(float2 x, float2 y);\n" |
36064 | "float3 __ovld __cnfn minmag(float3 x, float3 y);\n" |
36065 | "float4 __ovld __cnfn minmag(float4 x, float4 y);\n" |
36066 | "float8 __ovld __cnfn minmag(float8 x, float8 y);\n" |
36067 | "float16 __ovld __cnfn minmag(float16 x, float16 y);\n" |
36068 | "#ifdef cl_khr_fp64\n" |
36069 | "double __ovld __cnfn minmag(double x, double y);\n" |
36070 | "double2 __ovld __cnfn minmag(double2 x, double2 y);\n" |
36071 | "double3 __ovld __cnfn minmag(double3 x, double3 y);\n" |
36072 | "double4 __ovld __cnfn minmag(double4 x, double4 y);\n" |
36073 | "double8 __ovld __cnfn minmag(double8 x, double8 y);\n" |
36074 | "double16 __ovld __cnfn minmag(double16 x, double16 y);\n" |
36075 | "#endif //cl_khr_fp64\n" |
36076 | "#ifdef cl_khr_fp16\n" |
36077 | "half __ovld __cnfn minmag(half x, half y);\n" |
36078 | "half2 __ovld __cnfn minmag(half2 x, half2 y);\n" |
36079 | "half3 __ovld __cnfn minmag(half3 x, half3 y);\n" |
36080 | "half4 __ovld __cnfn minmag(half4 x, half4 y);\n" |
36081 | "half8 __ovld __cnfn minmag(half8 x, half8 y);\n" |
36082 | "half16 __ovld __cnfn minmag(half16 x, half16 y);\n" |
36083 | "#endif //cl_khr_fp16\n" |
36084 | "\n" |
36085 | "/**\n" |
36086 | " * Decompose a floating-point number. The modf\n" |
36087 | " * function breaks the argument x into integral and\n" |
36088 | " * fractional parts, each of which has the same sign as\n" |
36089 | " * the argument. It stores the integral part in the object\n" |
36090 | " * pointed to by iptr.\n" |
36091 | " */\n" |
36092 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
36093 | "float __ovld modf(float x, float *iptr);\n" |
36094 | "float2 __ovld modf(float2 x, float2 *iptr);\n" |
36095 | "float3 __ovld modf(float3 x, float3 *iptr);\n" |
36096 | "float4 __ovld modf(float4 x, float4 *iptr);\n" |
36097 | "float8 __ovld modf(float8 x, float8 *iptr);\n" |
36098 | "float16 __ovld modf(float16 x, float16 *iptr);\n" |
36099 | "#ifdef cl_khr_fp64\n" |
36100 | "double __ovld modf(double x, double *iptr);\n" |
36101 | "double2 __ovld modf(double2 x, double2 *iptr);\n" |
36102 | "double3 __ovld modf(double3 x, double3 *iptr);\n" |
36103 | "double4 __ovld modf(double4 x, double4 *iptr);\n" |
36104 | "double8 __ovld modf(double8 x, double8 *iptr);\n" |
36105 | "double16 __ovld modf(double16 x, double16 *iptr);\n" |
36106 | "#endif //cl_khr_fp64\n" |
36107 | "#ifdef cl_khr_fp16\n" |
36108 | "half __ovld modf(half x, half *iptr);\n" |
36109 | "half2 __ovld modf(half2 x, half2 *iptr);\n" |
36110 | "half3 __ovld modf(half3 x, half3 *iptr);\n" |
36111 | "half4 __ovld modf(half4 x, half4 *iptr);\n" |
36112 | "half8 __ovld modf(half8 x, half8 *iptr);\n" |
36113 | "half16 __ovld modf(half16 x, half16 *iptr);\n" |
36114 | "#endif //cl_khr_fp16\n" |
36115 | "#else\n" |
36116 | "float __ovld modf(float x, __global float *iptr);\n" |
36117 | "float2 __ovld modf(float2 x, __global float2 *iptr);\n" |
36118 | "float3 __ovld modf(float3 x, __global float3 *iptr);\n" |
36119 | "float4 __ovld modf(float4 x, __global float4 *iptr);\n" |
36120 | "float8 __ovld modf(float8 x, __global float8 *iptr);\n" |
36121 | "float16 __ovld modf(float16 x, __global float16 *iptr);\n" |
36122 | "float __ovld modf(float x, __local float *iptr);\n" |
36123 | "float2 __ovld modf(float2 x, __local float2 *iptr);\n" |
36124 | "float3 __ovld modf(float3 x, __local float3 *iptr);\n" |
36125 | "float4 __ovld modf(float4 x, __local float4 *iptr);\n" |
36126 | "float8 __ovld modf(float8 x, __local float8 *iptr);\n" |
36127 | "float16 __ovld modf(float16 x, __local float16 *iptr);\n" |
36128 | "float __ovld modf(float x, __private float *iptr);\n" |
36129 | "float2 __ovld modf(float2 x, __private float2 *iptr);\n" |
36130 | "float3 __ovld modf(float3 x, __private float3 *iptr);\n" |
36131 | "float4 __ovld modf(float4 x, __private float4 *iptr);\n" |
36132 | "float8 __ovld modf(float8 x, __private float8 *iptr);\n" |
36133 | "float16 __ovld modf(float16 x, __private float16 *iptr);\n" |
36134 | "#ifdef cl_khr_fp64\n" |
36135 | "double __ovld modf(double x, __global double *iptr);\n" |
36136 | "double2 __ovld modf(double2 x, __global double2 *iptr);\n" |
36137 | "double3 __ovld modf(double3 x, __global double3 *iptr);\n" |
36138 | "double4 __ovld modf(double4 x, __global double4 *iptr);\n" |
36139 | "double8 __ovld modf(double8 x, __global double8 *iptr);\n" |
36140 | "double16 __ovld modf(double16 x, __global double16 *iptr);\n" |
36141 | "double __ovld modf(double x, __local double *iptr);\n" |
36142 | "double2 __ovld modf(double2 x, __local double2 *iptr);\n" |
36143 | "double3 __ovld modf(double3 x, __local double3 *iptr);\n" |
36144 | "double4 __ovld modf(double4 x, __local double4 *iptr);\n" |
36145 | "double8 __ovld modf(double8 x, __local double8 *iptr);\n" |
36146 | "double16 __ovld modf(double16 x, __local double16 *iptr);\n" |
36147 | "double __ovld modf(double x, __private double *iptr);\n" |
36148 | "double2 __ovld modf(double2 x, __private double2 *iptr);\n" |
36149 | "double3 __ovld modf(double3 x, __private double3 *iptr);\n" |
36150 | "double4 __ovld modf(double4 x, __private double4 *iptr);\n" |
36151 | "double8 __ovld modf(double8 x, __private double8 *iptr);\n" |
36152 | "double16 __ovld modf(double16 x, __private double16 *iptr);\n" |
36153 | "#endif //cl_khr_fp64\n" |
36154 | "#ifdef cl_khr_fp16\n" |
36155 | "half __ovld modf(half x, __global half *iptr);\n" |
36156 | "half2 __ovld modf(half2 x, __global half2 *iptr);\n" |
36157 | "half3 __ovld modf(half3 x, __global half3 *iptr);\n" |
36158 | "half4 __ovld modf(half4 x, __global half4 *iptr);\n" |
36159 | "half8 __ovld modf(half8 x, __global half8 *iptr);\n" |
36160 | "half16 __ovld modf(half16 x, __global half16 *iptr);\n" |
36161 | "half __ovld modf(half x, __local half *iptr);\n" |
36162 | "half2 __ovld modf(half2 x, __local half2 *iptr);\n" |
36163 | "half3 __ovld modf(half3 x, __local half3 *iptr);\n" |
36164 | "half4 __ovld modf(half4 x, __local half4 *iptr);\n" |
36165 | "half8 __ovld modf(half8 x, __local half8 *iptr);\n" |
36166 | "half16 __ovld modf(half16 x, __local half16 *iptr);\n" |
36167 | "half __ovld modf(half x, __private half *iptr);\n" |
36168 | "half2 __ovld modf(half2 x, __private half2 *iptr);\n" |
36169 | "half3 __ovld modf(half3 x, __private half3 *iptr);\n" |
36170 | "half4 __ovld modf(half4 x, __private half4 *iptr);\n" |
36171 | "half8 __ovld modf(half8 x, __private half8 *iptr);\n" |
36172 | "half16 __ovld modf(half16 x, __private half16 *iptr);\n" |
36173 | "#endif //cl_khr_fp16\n" |
36174 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
36175 | "\n" |
36176 | "/**\n" |
36177 | " * Returns a quiet NaN. The nancode may be placed\n" |
36178 | " * in the significand of the resulting NaN.\n" |
36179 | " */\n" |
36180 | "float __ovld __cnfn nan(uint nancode);\n" |
36181 | "float2 __ovld __cnfn nan(uint2 nancode);\n" |
36182 | "float3 __ovld __cnfn nan(uint3 nancode);\n" |
36183 | "float4 __ovld __cnfn nan(uint4 nancode);\n" |
36184 | "float8 __ovld __cnfn nan(uint8 nancode);\n" |
36185 | "float16 __ovld __cnfn nan(uint16 nancode);\n" |
36186 | "#ifdef cl_khr_fp64\n" |
36187 | "double __ovld __cnfn nan(ulong nancode);\n" |
36188 | "double2 __ovld __cnfn nan(ulong2 nancode);\n" |
36189 | "double3 __ovld __cnfn nan(ulong3 nancode);\n" |
36190 | "double4 __ovld __cnfn nan(ulong4 nancode);\n" |
36191 | "double8 __ovld __cnfn nan(ulong8 nancode);\n" |
36192 | "double16 __ovld __cnfn nan(ulong16 nancode);\n" |
36193 | "#endif //cl_khr_fp64\n" |
36194 | "#ifdef cl_khr_fp16\n" |
36195 | "half __ovld __cnfn nan(ushort nancode);\n" |
36196 | "half2 __ovld __cnfn nan(ushort2 nancode);\n" |
36197 | "half3 __ovld __cnfn nan(ushort3 nancode);\n" |
36198 | "half4 __ovld __cnfn nan(ushort4 nancode);\n" |
36199 | "half8 __ovld __cnfn nan(ushort8 nancode);\n" |
36200 | "half16 __ovld __cnfn nan(ushort16 nancode);\n" |
36201 | "#endif //cl_khr_fp16\n" |
36202 | "\n" |
36203 | "/**\n" |
36204 | " * Computes the next representable single-precision\n" |
36205 | " * floating-point value following x in the direction of\n" |
36206 | " * y. Thus, if y is less than x, nextafter() returns the\n" |
36207 | " * largest representable floating-point number less\n" |
36208 | " * than x.\n" |
36209 | " */\n" |
36210 | "float __ovld __cnfn nextafter(float x, float y);\n" |
36211 | "float2 __ovld __cnfn nextafter(float2 x, float2 y);\n" |
36212 | "float3 __ovld __cnfn nextafter(float3 x, float3 y);\n" |
36213 | "float4 __ovld __cnfn nextafter(float4 x, float4 y);\n" |
36214 | "float8 __ovld __cnfn nextafter(float8 x, float8 y);\n" |
36215 | "float16 __ovld __cnfn nextafter(float16 x, float16 y);\n" |
36216 | "#ifdef cl_khr_fp64\n" |
36217 | "double __ovld __cnfn nextafter(double x, double y);\n" |
36218 | "double2 __ovld __cnfn nextafter(double2 x, double2 y);\n" |
36219 | "double3 __ovld __cnfn nextafter(double3 x, double3 y);\n" |
36220 | "double4 __ovld __cnfn nextafter(double4 x, double4 y);\n" |
36221 | "double8 __ovld __cnfn nextafter(double8 x, double8 y);\n" |
36222 | "double16 __ovld __cnfn nextafter(double16 x, double16 y);\n" |
36223 | "#endif //cl_khr_fp64\n" |
36224 | "#ifdef cl_khr_fp16\n" |
36225 | "half __ovld __cnfn nextafter(half x, half y);\n" |
36226 | "half2 __ovld __cnfn nextafter(half2 x, half2 y);\n" |
36227 | "half3 __ovld __cnfn nextafter(half3 x, half3 y);\n" |
36228 | "half4 __ovld __cnfn nextafter(half4 x, half4 y);\n" |
36229 | "half8 __ovld __cnfn nextafter(half8 x, half8 y);\n" |
36230 | "half16 __ovld __cnfn nextafter(half16 x, half16 y);\n" |
36231 | "#endif //cl_khr_fp16\n" |
36232 | "\n" |
36233 | "/**\n" |
36234 | " * Compute x to the power y.\n" |
36235 | " */\n" |
36236 | "float __ovld __cnfn pow(float x, float y);\n" |
36237 | "float2 __ovld __cnfn pow(float2 x, float2 y);\n" |
36238 | "float3 __ovld __cnfn pow(float3 x, float3 y);\n" |
36239 | "float4 __ovld __cnfn pow(float4 x, float4 y);\n" |
36240 | "float8 __ovld __cnfn pow(float8 x, float8 y);\n" |
36241 | "float16 __ovld __cnfn pow(float16 x, float16 y);\n" |
36242 | "#ifdef cl_khr_fp64\n" |
36243 | "double __ovld __cnfn pow(double x, double y);\n" |
36244 | "double2 __ovld __cnfn pow(double2 x, double2 y);\n" |
36245 | "double3 __ovld __cnfn pow(double3 x, double3 y);\n" |
36246 | "double4 __ovld __cnfn pow(double4 x, double4 y);\n" |
36247 | "double8 __ovld __cnfn pow(double8 x, double8 y);\n" |
36248 | "double16 __ovld __cnfn pow(double16 x, double16 y);\n" |
36249 | "#endif //cl_khr_fp64\n" |
36250 | "#ifdef cl_khr_fp16\n" |
36251 | "half __ovld __cnfn pow(half x, half y);\n" |
36252 | "half2 __ovld __cnfn pow(half2 x, half2 y);\n" |
36253 | "half3 __ovld __cnfn pow(half3 x, half3 y);\n" |
36254 | "half4 __ovld __cnfn pow(half4 x, half4 y);\n" |
36255 | "half8 __ovld __cnfn pow(half8 x, half8 y);\n" |
36256 | "half16 __ovld __cnfn pow(half16 x, half16 y);\n" |
36257 | "#endif //cl_khr_fp16\n" |
36258 | "\n" |
36259 | "/**\n" |
36260 | " * Compute x to the power y, where y is an integer.\n" |
36261 | " */\n" |
36262 | "float __ovld __cnfn pown(float x, int y);\n" |
36263 | "float2 __ovld __cnfn pown(float2 x, int2 y);\n" |
36264 | "float3 __ovld __cnfn pown(float3 x, int3 y);\n" |
36265 | "float4 __ovld __cnfn pown(float4 x, int4 y);\n" |
36266 | "float8 __ovld __cnfn pown(float8 x, int8 y);\n" |
36267 | "float16 __ovld __cnfn pown(float16 x, int16 y);\n" |
36268 | "#ifdef cl_khr_fp64\n" |
36269 | "double __ovld __cnfn pown(double x, int y);\n" |
36270 | "double2 __ovld __cnfn pown(double2 x, int2 y);\n" |
36271 | "double3 __ovld __cnfn pown(double3 x, int3 y);\n" |
36272 | "double4 __ovld __cnfn pown(double4 x, int4 y);\n" |
36273 | "double8 __ovld __cnfn pown(double8 x, int8 y);\n" |
36274 | "double16 __ovld __cnfn pown(double16 x, int16 y);\n" |
36275 | "#endif //cl_khr_fp64\n" |
36276 | "#ifdef cl_khr_fp16\n" |
36277 | "half __ovld __cnfn pown(half x, int y);\n" |
36278 | "half2 __ovld __cnfn pown(half2 x, int2 y);\n" |
36279 | "half3 __ovld __cnfn pown(half3 x, int3 y);\n" |
36280 | "half4 __ovld __cnfn pown(half4 x, int4 y);\n" |
36281 | "half8 __ovld __cnfn pown(half8 x, int8 y);\n" |
36282 | "half16 __ovld __cnfn pown(half16 x, int16 y);\n" |
36283 | "#endif //cl_khr_fp16\n" |
36284 | "\n" |
36285 | "/**\n" |
36286 | " * Compute x to the power y, where x is >= 0.\n" |
36287 | " */\n" |
36288 | "float __ovld __cnfn powr(float x, float y);\n" |
36289 | "float2 __ovld __cnfn powr(float2 x, float2 y);\n" |
36290 | "float3 __ovld __cnfn powr(float3 x, float3 y);\n" |
36291 | "float4 __ovld __cnfn powr(float4 x, float4 y);\n" |
36292 | "float8 __ovld __cnfn powr(float8 x, float8 y);\n" |
36293 | "float16 __ovld __cnfn powr(float16 x, float16 y);\n" |
36294 | "#ifdef cl_khr_fp64\n" |
36295 | "double __ovld __cnfn powr(double x, double y);\n" |
36296 | "double2 __ovld __cnfn powr(double2 x, double2 y);\n" |
36297 | "double3 __ovld __cnfn powr(double3 x, double3 y);\n" |
36298 | "double4 __ovld __cnfn powr(double4 x, double4 y);\n" |
36299 | "double8 __ovld __cnfn powr(double8 x, double8 y);\n" |
36300 | "double16 __ovld __cnfn powr(double16 x, double16 y);\n" |
36301 | "#endif //cl_khr_fp64\n" |
36302 | "#ifdef cl_khr_fp16\n" |
36303 | "half __ovld __cnfn powr(half x, half y);\n" |
36304 | "half2 __ovld __cnfn powr(half2 x, half2 y);\n" |
36305 | "half3 __ovld __cnfn powr(half3 x, half3 y);\n" |
36306 | "half4 __ovld __cnfn powr(half4 x, half4 y);\n" |
36307 | "half8 __ovld __cnfn powr(half8 x, half8 y);\n" |
36308 | "half16 __ovld __cnfn powr(half16 x, half16 y);\n" |
36309 | "#endif //cl_khr_fp16\n" |
36310 | "\n" |
36311 | "/**\n" |
36312 | " * Compute the value r such that r = x - n*y, where n\n" |
36313 | " * is the integer nearest the exact value of x/y. If there\n" |
36314 | " * are two integers closest to x/y, n shall be the even\n" |
36315 | " * one. If r is zero, it is given the same sign as x.\n" |
36316 | " */\n" |
36317 | "float __ovld __cnfn remainder(float x, float y);\n" |
36318 | "float2 __ovld __cnfn remainder(float2 x, float2 y);\n" |
36319 | "float3 __ovld __cnfn remainder(float3 x, float3 y);\n" |
36320 | "float4 __ovld __cnfn remainder(float4 x, float4 y);\n" |
36321 | "float8 __ovld __cnfn remainder(float8 x, float8 y);\n" |
36322 | "float16 __ovld __cnfn remainder(float16 x, float16 y);\n" |
36323 | "#ifdef cl_khr_fp64\n" |
36324 | "double __ovld __cnfn remainder(double x, double y);\n" |
36325 | "double2 __ovld __cnfn remainder(double2 x, double2 y);\n" |
36326 | "double3 __ovld __cnfn remainder(double3 x, double3 y);\n" |
36327 | "double4 __ovld __cnfn remainder(double4 x, double4 y);\n" |
36328 | "double8 __ovld __cnfn remainder(double8 x, double8 y);\n" |
36329 | "double16 __ovld __cnfn remainder(double16 x, double16 y);\n" |
36330 | "#endif //cl_khr_fp64\n" |
36331 | "#ifdef cl_khr_fp16\n" |
36332 | "half __ovld __cnfn remainder(half x, half y);\n" |
36333 | "half2 __ovld __cnfn remainder(half2 x, half2 y);\n" |
36334 | "half3 __ovld __cnfn remainder(half3 x, half3 y);\n" |
36335 | "half4 __ovld __cnfn remainder(half4 x, half4 y);\n" |
36336 | "half8 __ovld __cnfn remainder(half8 x, half8 y);\n" |
36337 | "half16 __ovld __cnfn remainder(half16 x, half16 y);\n" |
36338 | "#endif //cl_khr_fp16\n" |
36339 | "\n" |
36340 | "/**\n" |
36341 | " * The remquo function computes the value r such\n" |
36342 | " * that r = x - n*y, where n is the integer nearest the\n" |
36343 | " * exact value of x/y. If there are two integers closest\n" |
36344 | " * to x/y, n shall be the even one. If r is zero, it is\n" |
36345 | " * given the same sign as x. This is the same value\n" |
36346 | " * that is returned by the remainder function.\n" |
36347 | " * remquo also calculates the lower seven bits of the\n" |
36348 | " * integral quotient x/y, and gives that value the same\n" |
36349 | " * sign as x/y. It stores this signed value in the object\n" |
36350 | " * pointed to by quo.\n" |
36351 | " */\n" |
36352 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
36353 | "float __ovld remquo(float x, float y, int *quo);\n" |
36354 | "float2 __ovld remquo(float2 x, float2 y, int2 *quo);\n" |
36355 | "float3 __ovld remquo(float3 x, float3 y, int3 *quo);\n" |
36356 | "float4 __ovld remquo(float4 x, float4 y, int4 *quo);\n" |
36357 | "float8 __ovld remquo(float8 x, float8 y, int8 *quo);\n" |
36358 | "float16 __ovld remquo(float16 x, float16 y, int16 *quo);\n" |
36359 | "#ifdef cl_khr_fp64\n" |
36360 | "double __ovld remquo(double x, double y, int *quo);\n" |
36361 | "double2 __ovld remquo(double2 x, double2 y, int2 *quo);\n" |
36362 | "double3 __ovld remquo(double3 x, double3 y, int3 *quo);\n" |
36363 | "double4 __ovld remquo(double4 x, double4 y, int4 *quo);\n" |
36364 | "double8 __ovld remquo(double8 x, double8 y, int8 *quo);\n" |
36365 | "double16 __ovld remquo(double16 x, double16 y, int16 *quo);\n" |
36366 | "#endif //cl_khr_fp64\n" |
36367 | "#ifdef cl_khr_fp16\n" |
36368 | "half __ovld remquo(half x, half y, int *quo);\n" |
36369 | "half2 __ovld remquo(half2 x, half2 y, int2 *quo);\n" |
36370 | "half3 __ovld remquo(half3 x, half3 y, int3 *quo);\n" |
36371 | "half4 __ovld remquo(half4 x, half4 y, int4 *quo);\n" |
36372 | "half8 __ovld remquo(half8 x, half8 y, int8 *quo);\n" |
36373 | "half16 __ovld remquo(half16 x, half16 y, int16 *quo);\n" |
36374 | "\n" |
36375 | "#endif //cl_khr_fp16\n" |
36376 | "#else\n" |
36377 | "float __ovld remquo(float x, float y, __global int *quo);\n" |
36378 | "float2 __ovld remquo(float2 x, float2 y, __global int2 *quo);\n" |
36379 | "float3 __ovld remquo(float3 x, float3 y, __global int3 *quo);\n" |
36380 | "float4 __ovld remquo(float4 x, float4 y, __global int4 *quo);\n" |
36381 | "float8 __ovld remquo(float8 x, float8 y, __global int8 *quo);\n" |
36382 | "float16 __ovld remquo(float16 x, float16 y, __global int16 *quo);\n" |
36383 | "float __ovld remquo(float x, float y, __local int *quo);\n" |
36384 | "float2 __ovld remquo(float2 x, float2 y, __local int2 *quo);\n" |
36385 | "float3 __ovld remquo(float3 x, float3 y, __local int3 *quo);\n" |
36386 | "float4 __ovld remquo(float4 x, float4 y, __local int4 *quo);\n" |
36387 | "float8 __ovld remquo(float8 x, float8 y, __local int8 *quo);\n" |
36388 | "float16 __ovld remquo(float16 x, float16 y, __local int16 *quo);\n" |
36389 | "float __ovld remquo(float x, float y, __private int *quo);\n" |
36390 | "float2 __ovld remquo(float2 x, float2 y, __private int2 *quo);\n" |
36391 | "float3 __ovld remquo(float3 x, float3 y, __private int3 *quo);\n" |
36392 | "float4 __ovld remquo(float4 x, float4 y, __private int4 *quo);\n" |
36393 | "float8 __ovld remquo(float8 x, float8 y, __private int8 *quo);\n" |
36394 | "float16 __ovld remquo(float16 x, float16 y, __private int16 *quo);\n" |
36395 | "#ifdef cl_khr_fp64\n" |
36396 | "double __ovld remquo(double x, double y, __global int *quo);\n" |
36397 | "double2 __ovld remquo(double2 x, double2 y, __global int2 *quo);\n" |
36398 | "double3 __ovld remquo(double3 x, double3 y, __global int3 *quo);\n" |
36399 | "double4 __ovld remquo(double4 x, double4 y, __global int4 *quo);\n" |
36400 | "double8 __ovld remquo(double8 x, double8 y, __global int8 *quo);\n" |
36401 | "double16 __ovld remquo(double16 x, double16 y, __global int16 *quo);\n" |
36402 | "double __ovld remquo(double x, double y, __local int *quo);\n" |
36403 | "double2 __ovld remquo(double2 x, double2 y, __local int2 *quo);\n" |
36404 | "double3 __ovld remquo(double3 x, double3 y, __local int3 *quo);\n" |
36405 | "double4 __ovld remquo(double4 x, double4 y, __local int4 *quo);\n" |
36406 | "double8 __ovld remquo(double8 x, double8 y, __local int8 *quo);\n" |
36407 | "double16 __ovld remquo(double16 x, double16 y, __local int16 *quo);\n" |
36408 | "double __ovld remquo(double x, double y, __private int *quo);\n" |
36409 | "double2 __ovld remquo(double2 x, double2 y, __private int2 *quo);\n" |
36410 | "double3 __ovld remquo(double3 x, double3 y, __private int3 *quo);\n" |
36411 | "double4 __ovld remquo(double4 x, double4 y, __private int4 *quo);\n" |
36412 | "double8 __ovld remquo(double8 x, double8 y, __private int8 *quo);\n" |
36413 | "double16 __ovld remquo(double16 x, double16 y, __private int16 *quo);\n" |
36414 | "#endif //cl_khr_fp64\n" |
36415 | "#ifdef cl_khr_fp16\n" |
36416 | "half __ovld remquo(half x, half y, __global int *quo);\n" |
36417 | "half2 __ovld remquo(half2 x, half2 y, __global int2 *quo);\n" |
36418 | "half3 __ovld remquo(half3 x, half3 y, __global int3 *quo);\n" |
36419 | "half4 __ovld remquo(half4 x, half4 y, __global int4 *quo);\n" |
36420 | "half8 __ovld remquo(half8 x, half8 y, __global int8 *quo);\n" |
36421 | "half16 __ovld remquo(half16 x, half16 y, __global int16 *quo);\n" |
36422 | "half __ovld remquo(half x, half y, __local int *quo);\n" |
36423 | "half2 __ovld remquo(half2 x, half2 y, __local int2 *quo);\n" |
36424 | "half3 __ovld remquo(half3 x, half3 y, __local int3 *quo);\n" |
36425 | "half4 __ovld remquo(half4 x, half4 y, __local int4 *quo);\n" |
36426 | "half8 __ovld remquo(half8 x, half8 y, __local int8 *quo);\n" |
36427 | "half16 __ovld remquo(half16 x, half16 y, __local int16 *quo);\n" |
36428 | "half __ovld remquo(half x, half y, __private int *quo);\n" |
36429 | "half2 __ovld remquo(half2 x, half2 y, __private int2 *quo);\n" |
36430 | "half3 __ovld remquo(half3 x, half3 y, __private int3 *quo);\n" |
36431 | "half4 __ovld remquo(half4 x, half4 y, __private int4 *quo);\n" |
36432 | "half8 __ovld remquo(half8 x, half8 y, __private int8 *quo);\n" |
36433 | "half16 __ovld remquo(half16 x, half16 y, __private int16 *quo);\n" |
36434 | "#endif //cl_khr_fp16\n" |
36435 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
36436 | "/**\n" |
36437 | " * Round to integral value (using round to nearest\n" |
36438 | " * even rounding mode) in floating-point format.\n" |
36439 | " * Refer to section 7.1 for description of rounding\n" |
36440 | " * modes.\n" |
36441 | " */\n" |
36442 | "float __ovld __cnfn rint(float);\n" |
36443 | "float2 __ovld __cnfn rint(float2);\n" |
36444 | "float3 __ovld __cnfn rint(float3);\n" |
36445 | "float4 __ovld __cnfn rint(float4);\n" |
36446 | "float8 __ovld __cnfn rint(float8);\n" |
36447 | "float16 __ovld __cnfn rint(float16);\n" |
36448 | "#ifdef cl_khr_fp64\n" |
36449 | "double __ovld __cnfn rint(double);\n" |
36450 | "double2 __ovld __cnfn rint(double2);\n" |
36451 | "double3 __ovld __cnfn rint(double3);\n" |
36452 | "double4 __ovld __cnfn rint(double4);\n" |
36453 | "double8 __ovld __cnfn rint(double8);\n" |
36454 | "double16 __ovld __cnfn rint(double16);\n" |
36455 | "#endif //cl_khr_fp64\n" |
36456 | "#ifdef cl_khr_fp16\n" |
36457 | "half __ovld __cnfn rint(half);\n" |
36458 | "half2 __ovld __cnfn rint(half2);\n" |
36459 | "half3 __ovld __cnfn rint(half3);\n" |
36460 | "half4 __ovld __cnfn rint(half4);\n" |
36461 | "half8 __ovld __cnfn rint(half8);\n" |
36462 | "half16 __ovld __cnfn rint(half16);\n" |
36463 | "#endif //cl_khr_fp16\n" |
36464 | "\n" |
36465 | "/**\n" |
36466 | " * Compute x to the power 1/y.\n" |
36467 | " */\n" |
36468 | "float __ovld __cnfn rootn(float x, int y);\n" |
36469 | "float2 __ovld __cnfn rootn(float2 x, int2 y);\n" |
36470 | "float3 __ovld __cnfn rootn(float3 x, int3 y);\n" |
36471 | "float4 __ovld __cnfn rootn(float4 x, int4 y);\n" |
36472 | "float8 __ovld __cnfn rootn(float8 x, int8 y);\n" |
36473 | "float16 __ovld __cnfn rootn(float16 x, int16 y);\n" |
36474 | "#ifdef cl_khr_fp64\n" |
36475 | "double __ovld __cnfn rootn(double x, int y);\n" |
36476 | "double2 __ovld __cnfn rootn(double2 x, int2 y);\n" |
36477 | "double3 __ovld __cnfn rootn(double3 x, int3 y);\n" |
36478 | "double4 __ovld __cnfn rootn(double4 x, int4 y);\n" |
36479 | "double8 __ovld __cnfn rootn(double8 x, int8 y);\n" |
36480 | "double16 __ovld __cnfn rootn(double16 x, int16 y);\n" |
36481 | "#endif //cl_khr_fp64\n" |
36482 | "#ifdef cl_khr_fp16\n" |
36483 | "half __ovld __cnfn rootn(half x, int y);\n" |
36484 | "half2 __ovld __cnfn rootn(half2 x, int2 y);\n" |
36485 | "half3 __ovld __cnfn rootn(half3 x, int3 y);\n" |
36486 | "half4 __ovld __cnfn rootn(half4 x, int4 y);\n" |
36487 | "half8 __ovld __cnfn rootn(half8 x, int8 y);\n" |
36488 | "half16 __ovld __cnfn rootn(half16 x, int16 y);\n" |
36489 | "#endif //cl_khr_fp16\n" |
36490 | "\n" |
36491 | "/**\n" |
36492 | " * Return the integral value nearest to x rounding\n" |
36493 | " * halfway cases away from zero, regardless of the\n" |
36494 | " * current rounding direction.\n" |
36495 | " */\n" |
36496 | "float __ovld __cnfn round(float x);\n" |
36497 | "float2 __ovld __cnfn round(float2 x);\n" |
36498 | "float3 __ovld __cnfn round(float3 x);\n" |
36499 | "float4 __ovld __cnfn round(float4 x);\n" |
36500 | "float8 __ovld __cnfn round(float8 x);\n" |
36501 | "float16 __ovld __cnfn round(float16 x);\n" |
36502 | "#ifdef cl_khr_fp64\n" |
36503 | "double __ovld __cnfn round(double x);\n" |
36504 | "double2 __ovld __cnfn round(double2 x);\n" |
36505 | "double3 __ovld __cnfn round(double3 x);\n" |
36506 | "double4 __ovld __cnfn round(double4 x);\n" |
36507 | "double8 __ovld __cnfn round(double8 x);\n" |
36508 | "double16 __ovld __cnfn round(double16 x);\n" |
36509 | "#endif //cl_khr_fp64\n" |
36510 | "#ifdef cl_khr_fp16\n" |
36511 | "half __ovld __cnfn round(half x);\n" |
36512 | "half2 __ovld __cnfn round(half2 x);\n" |
36513 | "half3 __ovld __cnfn round(half3 x);\n" |
36514 | "half4 __ovld __cnfn round(half4 x);\n" |
36515 | "half8 __ovld __cnfn round(half8 x);\n" |
36516 | "half16 __ovld __cnfn round(half16 x);\n" |
36517 | "#endif //cl_khr_fp16\n" |
36518 | "\n" |
36519 | "/**\n" |
36520 | " * Compute inverse square root.\n" |
36521 | " */\n" |
36522 | "float __ovld __cnfn rsqrt(float);\n" |
36523 | "float2 __ovld __cnfn rsqrt(float2);\n" |
36524 | "float3 __ovld __cnfn rsqrt(float3);\n" |
36525 | "float4 __ovld __cnfn rsqrt(float4);\n" |
36526 | "float8 __ovld __cnfn rsqrt(float8);\n" |
36527 | "float16 __ovld __cnfn rsqrt(float16);\n" |
36528 | "#ifdef cl_khr_fp64\n" |
36529 | "double __ovld __cnfn rsqrt(double);\n" |
36530 | "double2 __ovld __cnfn rsqrt(double2);\n" |
36531 | "double3 __ovld __cnfn rsqrt(double3);\n" |
36532 | "double4 __ovld __cnfn rsqrt(double4);\n" |
36533 | "double8 __ovld __cnfn rsqrt(double8);\n" |
36534 | "double16 __ovld __cnfn rsqrt(double16);\n" |
36535 | "#endif //cl_khr_fp64\n" |
36536 | "#ifdef cl_khr_fp16\n" |
36537 | "half __ovld __cnfn rsqrt(half);\n" |
36538 | "half2 __ovld __cnfn rsqrt(half2);\n" |
36539 | "half3 __ovld __cnfn rsqrt(half3);\n" |
36540 | "half4 __ovld __cnfn rsqrt(half4);\n" |
36541 | "half8 __ovld __cnfn rsqrt(half8);\n" |
36542 | "half16 __ovld __cnfn rsqrt(half16);\n" |
36543 | "#endif //cl_khr_fp16\n" |
36544 | "\n" |
36545 | "/**\n" |
36546 | " * Compute sine.\n" |
36547 | " */\n" |
36548 | "float __ovld __cnfn sin(float);\n" |
36549 | "float2 __ovld __cnfn sin(float2);\n" |
36550 | "float3 __ovld __cnfn sin(float3);\n" |
36551 | "float4 __ovld __cnfn sin(float4);\n" |
36552 | "float8 __ovld __cnfn sin(float8);\n" |
36553 | "float16 __ovld __cnfn sin(float16);\n" |
36554 | "#ifdef cl_khr_fp64\n" |
36555 | "double __ovld __cnfn sin(double);\n" |
36556 | "double2 __ovld __cnfn sin(double2);\n" |
36557 | "double3 __ovld __cnfn sin(double3);\n" |
36558 | "double4 __ovld __cnfn sin(double4);\n" |
36559 | "double8 __ovld __cnfn sin(double8);\n" |
36560 | "double16 __ovld __cnfn sin(double16);\n" |
36561 | "#endif //cl_khr_fp64\n" |
36562 | "#ifdef cl_khr_fp16\n" |
36563 | "half __ovld __cnfn sin(half);\n" |
36564 | "half2 __ovld __cnfn sin(half2);\n" |
36565 | "half3 __ovld __cnfn sin(half3);\n" |
36566 | "half4 __ovld __cnfn sin(half4);\n" |
36567 | "half8 __ovld __cnfn sin(half8);\n" |
36568 | "half16 __ovld __cnfn sin(half16);\n" |
36569 | "#endif //cl_khr_fp16\n" |
36570 | "\n" |
36571 | "/**\n" |
36572 | " * Compute sine and cosine of x. The computed sine\n" |
36573 | " * is the return value and computed cosine is returned\n" |
36574 | " * in cosval.\n" |
36575 | " */\n" |
36576 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
36577 | "float __ovld sincos(float x, float *cosval);\n" |
36578 | "float2 __ovld sincos(float2 x, float2 *cosval);\n" |
36579 | "float3 __ovld sincos(float3 x, float3 *cosval);\n" |
36580 | "float4 __ovld sincos(float4 x, float4 *cosval);\n" |
36581 | "float8 __ovld sincos(float8 x, float8 *cosval);\n" |
36582 | "float16 __ovld sincos(float16 x, float16 *cosval);\n" |
36583 | "#ifdef cl_khr_fp64\n" |
36584 | "double __ovld sincos(double x, double *cosval);\n" |
36585 | "double2 __ovld sincos(double2 x, double2 *cosval);\n" |
36586 | "double3 __ovld sincos(double3 x, double3 *cosval);\n" |
36587 | "double4 __ovld sincos(double4 x, double4 *cosval);\n" |
36588 | "double8 __ovld sincos(double8 x, double8 *cosval);\n" |
36589 | "double16 __ovld sincos(double16 x, double16 *cosval);\n" |
36590 | "#endif //cl_khr_fp64\n" |
36591 | "#ifdef cl_khr_fp16\n" |
36592 | "half __ovld sincos(half x, half *cosval);\n" |
36593 | "half2 __ovld sincos(half2 x, half2 *cosval);\n" |
36594 | "half3 __ovld sincos(half3 x, half3 *cosval);\n" |
36595 | "half4 __ovld sincos(half4 x, half4 *cosval);\n" |
36596 | "half8 __ovld sincos(half8 x, half8 *cosval);\n" |
36597 | "half16 __ovld sincos(half16 x, half16 *cosval);\n" |
36598 | "#endif //cl_khr_fp16\n" |
36599 | "#else\n" |
36600 | "float __ovld sincos(float x, __global float *cosval);\n" |
36601 | "float2 __ovld sincos(float2 x, __global float2 *cosval);\n" |
36602 | "float3 __ovld sincos(float3 x, __global float3 *cosval);\n" |
36603 | "float4 __ovld sincos(float4 x, __global float4 *cosval);\n" |
36604 | "float8 __ovld sincos(float8 x, __global float8 *cosval);\n" |
36605 | "float16 __ovld sincos(float16 x, __global float16 *cosval);\n" |
36606 | "float __ovld sincos(float x, __local float *cosval);\n" |
36607 | "float2 __ovld sincos(float2 x, __local float2 *cosval);\n" |
36608 | "float3 __ovld sincos(float3 x, __local float3 *cosval);\n" |
36609 | "float4 __ovld sincos(float4 x, __local float4 *cosval);\n" |
36610 | "float8 __ovld sincos(float8 x, __local float8 *cosval);\n" |
36611 | "float16 __ovld sincos(float16 x, __local float16 *cosval);\n" |
36612 | "float __ovld sincos(float x, __private float *cosval);\n" |
36613 | "float2 __ovld sincos(float2 x, __private float2 *cosval);\n" |
36614 | "float3 __ovld sincos(float3 x, __private float3 *cosval);\n" |
36615 | "float4 __ovld sincos(float4 x, __private float4 *cosval);\n" |
36616 | "float8 __ovld sincos(float8 x, __private float8 *cosval);\n" |
36617 | "float16 __ovld sincos(float16 x, __private float16 *cosval);\n" |
36618 | "#ifdef cl_khr_fp64\n" |
36619 | "double __ovld sincos(double x, __global double *cosval);\n" |
36620 | "double2 __ovld sincos(double2 x, __global double2 *cosval);\n" |
36621 | "double3 __ovld sincos(double3 x, __global double3 *cosval);\n" |
36622 | "double4 __ovld sincos(double4 x, __global double4 *cosval);\n" |
36623 | "double8 __ovld sincos(double8 x, __global double8 *cosval);\n" |
36624 | "double16 __ovld sincos(double16 x, __global double16 *cosval);\n" |
36625 | "double __ovld sincos(double x, __local double *cosval);\n" |
36626 | "double2 __ovld sincos(double2 x, __local double2 *cosval);\n" |
36627 | "double3 __ovld sincos(double3 x, __local double3 *cosval);\n" |
36628 | "double4 __ovld sincos(double4 x, __local double4 *cosval);\n" |
36629 | "double8 __ovld sincos(double8 x, __local double8 *cosval);\n" |
36630 | "double16 __ovld sincos(double16 x, __local double16 *cosval);\n" |
36631 | "double __ovld sincos(double x, __private double *cosval);\n" |
36632 | "double2 __ovld sincos(double2 x, __private double2 *cosval);\n" |
36633 | "double3 __ovld sincos(double3 x, __private double3 *cosval);\n" |
36634 | "double4 __ovld sincos(double4 x, __private double4 *cosval);\n" |
36635 | "double8 __ovld sincos(double8 x, __private double8 *cosval);\n" |
36636 | "double16 __ovld sincos(double16 x, __private double16 *cosval);\n" |
36637 | "#endif //cl_khr_fp64\n" |
36638 | "#ifdef cl_khr_fp16\n" |
36639 | "half __ovld sincos(half x, __global half *cosval);\n" |
36640 | "half2 __ovld sincos(half2 x, __global half2 *cosval);\n" |
36641 | "half3 __ovld sincos(half3 x, __global half3 *cosval);\n" |
36642 | "half4 __ovld sincos(half4 x, __global half4 *cosval);\n" |
36643 | "half8 __ovld sincos(half8 x, __global half8 *cosval);\n" |
36644 | "half16 __ovld sincos(half16 x, __global half16 *cosval);\n" |
36645 | "half __ovld sincos(half x, __local half *cosval);\n" |
36646 | "half2 __ovld sincos(half2 x, __local half2 *cosval);\n" |
36647 | "half3 __ovld sincos(half3 x, __local half3 *cosval);\n" |
36648 | "half4 __ovld sincos(half4 x, __local half4 *cosval);\n" |
36649 | "half8 __ovld sincos(half8 x, __local half8 *cosval);\n" |
36650 | "half16 __ovld sincos(half16 x, __local half16 *cosval);\n" |
36651 | "half __ovld sincos(half x, __private half *cosval);\n" |
36652 | "half2 __ovld sincos(half2 x, __private half2 *cosval);\n" |
36653 | "half3 __ovld sincos(half3 x, __private half3 *cosval);\n" |
36654 | "half4 __ovld sincos(half4 x, __private half4 *cosval);\n" |
36655 | "half8 __ovld sincos(half8 x, __private half8 *cosval);\n" |
36656 | "half16 __ovld sincos(half16 x, __private half16 *cosval);\n" |
36657 | "#endif //cl_khr_fp16\n" |
36658 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
36659 | "\n" |
36660 | "/**\n" |
36661 | " * Compute hyperbolic sine.\n" |
36662 | " */\n" |
36663 | "float __ovld __cnfn sinh(float);\n" |
36664 | "float2 __ovld __cnfn sinh(float2);\n" |
36665 | "float3 __ovld __cnfn sinh(float3);\n" |
36666 | "float4 __ovld __cnfn sinh(float4);\n" |
36667 | "float8 __ovld __cnfn sinh(float8);\n" |
36668 | "float16 __ovld __cnfn sinh(float16);\n" |
36669 | "#ifdef cl_khr_fp64\n" |
36670 | "double __ovld __cnfn sinh(double);\n" |
36671 | "double2 __ovld __cnfn sinh(double2);\n" |
36672 | "double3 __ovld __cnfn sinh(double3);\n" |
36673 | "double4 __ovld __cnfn sinh(double4);\n" |
36674 | "double8 __ovld __cnfn sinh(double8);\n" |
36675 | "double16 __ovld __cnfn sinh(double16);\n" |
36676 | "#endif //cl_khr_fp64\n" |
36677 | "#ifdef cl_khr_fp16\n" |
36678 | "half __ovld __cnfn sinh(half);\n" |
36679 | "half2 __ovld __cnfn sinh(half2);\n" |
36680 | "half3 __ovld __cnfn sinh(half3);\n" |
36681 | "half4 __ovld __cnfn sinh(half4);\n" |
36682 | "half8 __ovld __cnfn sinh(half8);\n" |
36683 | "half16 __ovld __cnfn sinh(half16);\n" |
36684 | "#endif //cl_khr_fp16\n" |
36685 | "\n" |
36686 | "/**\n" |
36687 | " * Compute sin (PI * x).\n" |
36688 | " */\n" |
36689 | "float __ovld __cnfn sinpi(float x);\n" |
36690 | "float2 __ovld __cnfn sinpi(float2 x);\n" |
36691 | "float3 __ovld __cnfn sinpi(float3 x);\n" |
36692 | "float4 __ovld __cnfn sinpi(float4 x);\n" |
36693 | "float8 __ovld __cnfn sinpi(float8 x);\n" |
36694 | "float16 __ovld __cnfn sinpi(float16 x);\n" |
36695 | "#ifdef cl_khr_fp64\n" |
36696 | "double __ovld __cnfn sinpi(double x);\n" |
36697 | "double2 __ovld __cnfn sinpi(double2 x);\n" |
36698 | "double3 __ovld __cnfn sinpi(double3 x);\n" |
36699 | "double4 __ovld __cnfn sinpi(double4 x);\n" |
36700 | "double8 __ovld __cnfn sinpi(double8 x);\n" |
36701 | "double16 __ovld __cnfn sinpi(double16 x);\n" |
36702 | "#endif //cl_khr_fp64\n" |
36703 | "#ifdef cl_khr_fp16\n" |
36704 | "half __ovld __cnfn sinpi(half x);\n" |
36705 | "half2 __ovld __cnfn sinpi(half2 x);\n" |
36706 | "half3 __ovld __cnfn sinpi(half3 x);\n" |
36707 | "half4 __ovld __cnfn sinpi(half4 x);\n" |
36708 | "half8 __ovld __cnfn sinpi(half8 x);\n" |
36709 | "half16 __ovld __cnfn sinpi(half16 x);\n" |
36710 | "#endif //cl_khr_fp16\n" |
36711 | "\n" |
36712 | "/**\n" |
36713 | " * Compute square root.\n" |
36714 | " */\n" |
36715 | "float __ovld __cnfn sqrt(float);\n" |
36716 | "float2 __ovld __cnfn sqrt(float2);\n" |
36717 | "float3 __ovld __cnfn sqrt(float3);\n" |
36718 | "float4 __ovld __cnfn sqrt(float4);\n" |
36719 | "float8 __ovld __cnfn sqrt(float8);\n" |
36720 | "float16 __ovld __cnfn sqrt(float16);\n" |
36721 | "#ifdef cl_khr_fp64\n" |
36722 | "double __ovld __cnfn sqrt(double);\n" |
36723 | "double2 __ovld __cnfn sqrt(double2);\n" |
36724 | "double3 __ovld __cnfn sqrt(double3);\n" |
36725 | "double4 __ovld __cnfn sqrt(double4);\n" |
36726 | "double8 __ovld __cnfn sqrt(double8);\n" |
36727 | "double16 __ovld __cnfn sqrt(double16);\n" |
36728 | "#endif //cl_khr_fp64\n" |
36729 | "#ifdef cl_khr_fp16\n" |
36730 | "half __ovld __cnfn sqrt(half);\n" |
36731 | "half2 __ovld __cnfn sqrt(half2);\n" |
36732 | "half3 __ovld __cnfn sqrt(half3);\n" |
36733 | "half4 __ovld __cnfn sqrt(half4);\n" |
36734 | "half8 __ovld __cnfn sqrt(half8);\n" |
36735 | "half16 __ovld __cnfn sqrt(half16);\n" |
36736 | "#endif //cl_khr_fp16\n" |
36737 | "\n" |
36738 | "/**\n" |
36739 | " * Compute tangent.\n" |
36740 | " */\n" |
36741 | "float __ovld __cnfn tan(float);\n" |
36742 | "float2 __ovld __cnfn tan(float2);\n" |
36743 | "float3 __ovld __cnfn tan(float3);\n" |
36744 | "float4 __ovld __cnfn tan(float4);\n" |
36745 | "float8 __ovld __cnfn tan(float8);\n" |
36746 | "float16 __ovld __cnfn tan(float16);\n" |
36747 | "#ifdef cl_khr_fp64\n" |
36748 | "double __ovld __cnfn tan(double);\n" |
36749 | "double2 __ovld __cnfn tan(double2);\n" |
36750 | "double3 __ovld __cnfn tan(double3);\n" |
36751 | "double4 __ovld __cnfn tan(double4);\n" |
36752 | "double8 __ovld __cnfn tan(double8);\n" |
36753 | "double16 __ovld __cnfn tan(double16);\n" |
36754 | "#endif //cl_khr_fp64\n" |
36755 | "#ifdef cl_khr_fp16\n" |
36756 | "half __ovld __cnfn tan(half);\n" |
36757 | "half2 __ovld __cnfn tan(half2);\n" |
36758 | "half3 __ovld __cnfn tan(half3);\n" |
36759 | "half4 __ovld __cnfn tan(half4);\n" |
36760 | "half8 __ovld __cnfn tan(half8);\n" |
36761 | "half16 __ovld __cnfn tan(half16);\n" |
36762 | "#endif //cl_khr_fp16\n" |
36763 | "\n" |
36764 | "/**\n" |
36765 | " * Compute hyperbolic tangent.\n" |
36766 | " */\n" |
36767 | "float __ovld __cnfn tanh(float);\n" |
36768 | "float2 __ovld __cnfn tanh(float2);\n" |
36769 | "float3 __ovld __cnfn tanh(float3);\n" |
36770 | "float4 __ovld __cnfn tanh(float4);\n" |
36771 | "float8 __ovld __cnfn tanh(float8);\n" |
36772 | "float16 __ovld __cnfn tanh(float16);\n" |
36773 | "#ifdef cl_khr_fp64\n" |
36774 | "double __ovld __cnfn tanh(double);\n" |
36775 | "double2 __ovld __cnfn tanh(double2);\n" |
36776 | "double3 __ovld __cnfn tanh(double3);\n" |
36777 | "double4 __ovld __cnfn tanh(double4);\n" |
36778 | "double8 __ovld __cnfn tanh(double8);\n" |
36779 | "double16 __ovld __cnfn tanh(double16);\n" |
36780 | "#endif //cl_khr_fp64\n" |
36781 | "#ifdef cl_khr_fp16\n" |
36782 | "half __ovld __cnfn tanh(half);\n" |
36783 | "half2 __ovld __cnfn tanh(half2);\n" |
36784 | "half3 __ovld __cnfn tanh(half3);\n" |
36785 | "half4 __ovld __cnfn tanh(half4);\n" |
36786 | "half8 __ovld __cnfn tanh(half8);\n" |
36787 | "half16 __ovld __cnfn tanh(half16);\n" |
36788 | "#endif //cl_khr_fp16\n" |
36789 | "\n" |
36790 | "/**\n" |
36791 | " * Compute tan (PI * x).\n" |
36792 | " */\n" |
36793 | "float __ovld __cnfn tanpi(float x);\n" |
36794 | "float2 __ovld __cnfn tanpi(float2 x);\n" |
36795 | "float3 __ovld __cnfn tanpi(float3 x);\n" |
36796 | "float4 __ovld __cnfn tanpi(float4 x);\n" |
36797 | "float8 __ovld __cnfn tanpi(float8 x);\n" |
36798 | "float16 __ovld __cnfn tanpi(float16 x);\n" |
36799 | "#ifdef cl_khr_fp64\n" |
36800 | "double __ovld __cnfn tanpi(double x);\n" |
36801 | "double2 __ovld __cnfn tanpi(double2 x);\n" |
36802 | "double3 __ovld __cnfn tanpi(double3 x);\n" |
36803 | "double4 __ovld __cnfn tanpi(double4 x);\n" |
36804 | "double8 __ovld __cnfn tanpi(double8 x);\n" |
36805 | "double16 __ovld __cnfn tanpi(double16 x);\n" |
36806 | "#endif //cl_khr_fp64\n" |
36807 | "#ifdef cl_khr_fp16\n" |
36808 | "half __ovld __cnfn tanpi(half x);\n" |
36809 | "half2 __ovld __cnfn tanpi(half2 x);\n" |
36810 | "half3 __ovld __cnfn tanpi(half3 x);\n" |
36811 | "half4 __ovld __cnfn tanpi(half4 x);\n" |
36812 | "half8 __ovld __cnfn tanpi(half8 x);\n" |
36813 | "half16 __ovld __cnfn tanpi(half16 x);\n" |
36814 | "#endif //cl_khr_fp16\n" |
36815 | "\n" |
36816 | "/**\n" |
36817 | " * Compute the gamma function.\n" |
36818 | " */\n" |
36819 | "float __ovld __cnfn tgamma(float);\n" |
36820 | "float2 __ovld __cnfn tgamma(float2);\n" |
36821 | "float3 __ovld __cnfn tgamma(float3);\n" |
36822 | "float4 __ovld __cnfn tgamma(float4);\n" |
36823 | "float8 __ovld __cnfn tgamma(float8);\n" |
36824 | "float16 __ovld __cnfn tgamma(float16);\n" |
36825 | "#ifdef cl_khr_fp64\n" |
36826 | "double __ovld __cnfn tgamma(double);\n" |
36827 | "double2 __ovld __cnfn tgamma(double2);\n" |
36828 | "double3 __ovld __cnfn tgamma(double3);\n" |
36829 | "double4 __ovld __cnfn tgamma(double4);\n" |
36830 | "double8 __ovld __cnfn tgamma(double8);\n" |
36831 | "double16 __ovld __cnfn tgamma(double16);\n" |
36832 | "#endif //cl_khr_fp64\n" |
36833 | "#ifdef cl_khr_fp16\n" |
36834 | "half __ovld __cnfn tgamma(half);\n" |
36835 | "half2 __ovld __cnfn tgamma(half2);\n" |
36836 | "half3 __ovld __cnfn tgamma(half3);\n" |
36837 | "half4 __ovld __cnfn tgamma(half4);\n" |
36838 | "half8 __ovld __cnfn tgamma(half8);\n" |
36839 | "half16 __ovld __cnfn tgamma(half16);\n" |
36840 | "#endif //cl_khr_fp16\n" |
36841 | "\n" |
36842 | "/**\n" |
36843 | " * Round to integral value using the round to zero\n" |
36844 | " * rounding mode.\n" |
36845 | " */\n" |
36846 | "float __ovld __cnfn trunc(float);\n" |
36847 | "float2 __ovld __cnfn trunc(float2);\n" |
36848 | "float3 __ovld __cnfn trunc(float3);\n" |
36849 | "float4 __ovld __cnfn trunc(float4);\n" |
36850 | "float8 __ovld __cnfn trunc(float8);\n" |
36851 | "float16 __ovld __cnfn trunc(float16);\n" |
36852 | "#ifdef cl_khr_fp64\n" |
36853 | "double __ovld __cnfn trunc(double);\n" |
36854 | "double2 __ovld __cnfn trunc(double2);\n" |
36855 | "double3 __ovld __cnfn trunc(double3);\n" |
36856 | "double4 __ovld __cnfn trunc(double4);\n" |
36857 | "double8 __ovld __cnfn trunc(double8);\n" |
36858 | "double16 __ovld __cnfn trunc(double16);\n" |
36859 | "#endif //cl_khr_fp64\n" |
36860 | "#ifdef cl_khr_fp16\n" |
36861 | "half __ovld __cnfn trunc(half);\n" |
36862 | "half2 __ovld __cnfn trunc(half2);\n" |
36863 | "half3 __ovld __cnfn trunc(half3);\n" |
36864 | "half4 __ovld __cnfn trunc(half4);\n" |
36865 | "half8 __ovld __cnfn trunc(half8);\n" |
36866 | "half16 __ovld __cnfn trunc(half16);\n" |
36867 | "#endif //cl_khr_fp16\n" |
36868 | "\n" |
36869 | "/**\n" |
36870 | " * Compute cosine. x must be in the range -2^16 ... +2^16.\n" |
36871 | " */\n" |
36872 | "float __ovld __cnfn half_cos(float x);\n" |
36873 | "float2 __ovld __cnfn half_cos(float2 x);\n" |
36874 | "float3 __ovld __cnfn half_cos(float3 x);\n" |
36875 | "float4 __ovld __cnfn half_cos(float4 x);\n" |
36876 | "float8 __ovld __cnfn half_cos(float8 x);\n" |
36877 | "float16 __ovld __cnfn half_cos(float16 x);\n" |
36878 | "\n" |
36879 | "/**\n" |
36880 | " * Compute x / y.\n" |
36881 | " */\n" |
36882 | "float __ovld __cnfn half_divide(float x, float y);\n" |
36883 | "float2 __ovld __cnfn half_divide(float2 x, float2 y);\n" |
36884 | "float3 __ovld __cnfn half_divide(float3 x, float3 y);\n" |
36885 | "float4 __ovld __cnfn half_divide(float4 x, float4 y);\n" |
36886 | "float8 __ovld __cnfn half_divide(float8 x, float8 y);\n" |
36887 | "float16 __ovld __cnfn half_divide(float16 x, float16 y);\n" |
36888 | "\n" |
36889 | "/**\n" |
36890 | " * Compute the base- e exponential of x.\n" |
36891 | " */\n" |
36892 | "float __ovld __cnfn half_exp(float x);\n" |
36893 | "float2 __ovld __cnfn half_exp(float2 x);\n" |
36894 | "float3 __ovld __cnfn half_exp(float3 x);\n" |
36895 | "float4 __ovld __cnfn half_exp(float4 x);\n" |
36896 | "float8 __ovld __cnfn half_exp(float8 x);\n" |
36897 | "float16 __ovld __cnfn half_exp(float16 x);\n" |
36898 | "\n" |
36899 | "/**\n" |
36900 | " * Compute the base- 2 exponential of x.\n" |
36901 | " */\n" |
36902 | "float __ovld __cnfn half_exp2(float x);\n" |
36903 | "float2 __ovld __cnfn half_exp2(float2 x);\n" |
36904 | "float3 __ovld __cnfn half_exp2(float3 x);\n" |
36905 | "float4 __ovld __cnfn half_exp2(float4 x);\n" |
36906 | "float8 __ovld __cnfn half_exp2(float8 x);\n" |
36907 | "float16 __ovld __cnfn half_exp2(float16 x);\n" |
36908 | "\n" |
36909 | "/**\n" |
36910 | " * Compute the base- 10 exponential of x.\n" |
36911 | " */\n" |
36912 | "float __ovld __cnfn half_exp10(float x);\n" |
36913 | "float2 __ovld __cnfn half_exp10(float2 x);\n" |
36914 | "float3 __ovld __cnfn half_exp10(float3 x);\n" |
36915 | "float4 __ovld __cnfn half_exp10(float4 x);\n" |
36916 | "float8 __ovld __cnfn half_exp10(float8 x);\n" |
36917 | "float16 __ovld __cnfn half_exp10(float16 x);\n" |
36918 | "\n" |
36919 | "/**\n" |
36920 | " * Compute natural logarithm.\n" |
36921 | " */\n" |
36922 | "float __ovld __cnfn half_log(float x);\n" |
36923 | "float2 __ovld __cnfn half_log(float2 x);\n" |
36924 | "float3 __ovld __cnfn half_log(float3 x);\n" |
36925 | "float4 __ovld __cnfn half_log(float4 x);\n" |
36926 | "float8 __ovld __cnfn half_log(float8 x);\n" |
36927 | "float16 __ovld __cnfn half_log(float16 x);\n" |
36928 | "\n" |
36929 | "/**\n" |
36930 | " * Compute a base 2 logarithm.\n" |
36931 | " */\n" |
36932 | "float __ovld __cnfn half_log2(float x);\n" |
36933 | "float2 __ovld __cnfn half_log2(float2 x);\n" |
36934 | "float3 __ovld __cnfn half_log2(float3 x);\n" |
36935 | "float4 __ovld __cnfn half_log2(float4 x);\n" |
36936 | "float8 __ovld __cnfn half_log2(float8 x);\n" |
36937 | "float16 __ovld __cnfn half_log2(float16 x);\n" |
36938 | "\n" |
36939 | "/**\n" |
36940 | " * Compute a base 10 logarithm.\n" |
36941 | " */\n" |
36942 | "float __ovld __cnfn half_log10(float x);\n" |
36943 | "float2 __ovld __cnfn half_log10(float2 x);\n" |
36944 | "float3 __ovld __cnfn half_log10(float3 x);\n" |
36945 | "float4 __ovld __cnfn half_log10(float4 x);\n" |
36946 | "float8 __ovld __cnfn half_log10(float8 x);\n" |
36947 | "float16 __ovld __cnfn half_log10(float16 x);\n" |
36948 | "\n" |
36949 | "/**\n" |
36950 | " * Compute x to the power y, where x is >= 0.\n" |
36951 | " */\n" |
36952 | "float __ovld __cnfn half_powr(float x, float y);\n" |
36953 | "float2 __ovld __cnfn half_powr(float2 x, float2 y);\n" |
36954 | "float3 __ovld __cnfn half_powr(float3 x, float3 y);\n" |
36955 | "float4 __ovld __cnfn half_powr(float4 x, float4 y);\n" |
36956 | "float8 __ovld __cnfn half_powr(float8 x, float8 y);\n" |
36957 | "float16 __ovld __cnfn half_powr(float16 x, float16 y);\n" |
36958 | "\n" |
36959 | "/**\n" |
36960 | " * Compute reciprocal.\n" |
36961 | " */\n" |
36962 | "float __ovld __cnfn half_recip(float x);\n" |
36963 | "float2 __ovld __cnfn half_recip(float2 x);\n" |
36964 | "float3 __ovld __cnfn half_recip(float3 x);\n" |
36965 | "float4 __ovld __cnfn half_recip(float4 x);\n" |
36966 | "float8 __ovld __cnfn half_recip(float8 x);\n" |
36967 | "float16 __ovld __cnfn half_recip(float16 x);\n" |
36968 | "\n" |
36969 | "/**\n" |
36970 | " * Compute inverse square root.\n" |
36971 | " */\n" |
36972 | "float __ovld __cnfn half_rsqrt(float x);\n" |
36973 | "float2 __ovld __cnfn half_rsqrt(float2 x);\n" |
36974 | "float3 __ovld __cnfn half_rsqrt(float3 x);\n" |
36975 | "float4 __ovld __cnfn half_rsqrt(float4 x);\n" |
36976 | "float8 __ovld __cnfn half_rsqrt(float8 x);\n" |
36977 | "float16 __ovld __cnfn half_rsqrt(float16 x);\n" |
36978 | "\n" |
36979 | "/**\n" |
36980 | " * Compute sine. x must be in the range -2^16 ... +2^16.\n" |
36981 | " */\n" |
36982 | "float __ovld __cnfn half_sin(float x);\n" |
36983 | "float2 __ovld __cnfn half_sin(float2 x);\n" |
36984 | "float3 __ovld __cnfn half_sin(float3 x);\n" |
36985 | "float4 __ovld __cnfn half_sin(float4 x);\n" |
36986 | "float8 __ovld __cnfn half_sin(float8 x);\n" |
36987 | "float16 __ovld __cnfn half_sin(float16 x);\n" |
36988 | "\n" |
36989 | "/**\n" |
36990 | " * Compute square root.\n" |
36991 | " */\n" |
36992 | "float __ovld __cnfn half_sqrt(float x);\n" |
36993 | "float2 __ovld __cnfn half_sqrt(float2 x);\n" |
36994 | "float3 __ovld __cnfn half_sqrt(float3 x);\n" |
36995 | "float4 __ovld __cnfn half_sqrt(float4 x);\n" |
36996 | "float8 __ovld __cnfn half_sqrt(float8 x);\n" |
36997 | "float16 __ovld __cnfn half_sqrt(float16 x);\n" |
36998 | "\n" |
36999 | "/**\n" |
37000 | " * Compute tangent. x must be in the range -216 ... +216.\n" |
37001 | " */\n" |
37002 | "float __ovld __cnfn half_tan(float x);\n" |
37003 | "float2 __ovld __cnfn half_tan(float2 x);\n" |
37004 | "float3 __ovld __cnfn half_tan(float3 x);\n" |
37005 | "float4 __ovld __cnfn half_tan(float4 x);\n" |
37006 | "float8 __ovld __cnfn half_tan(float8 x);\n" |
37007 | "float16 __ovld __cnfn half_tan(float16 x);\n" |
37008 | "\n" |
37009 | "/**\n" |
37010 | " * Compute cosine over an implementation-defined range.\n" |
37011 | " * The maximum error is implementation-defined.\n" |
37012 | " */\n" |
37013 | "float __ovld __cnfn native_cos(float x);\n" |
37014 | "float2 __ovld __cnfn native_cos(float2 x);\n" |
37015 | "float3 __ovld __cnfn native_cos(float3 x);\n" |
37016 | "float4 __ovld __cnfn native_cos(float4 x);\n" |
37017 | "float8 __ovld __cnfn native_cos(float8 x);\n" |
37018 | "float16 __ovld __cnfn native_cos(float16 x);\n" |
37019 | "\n" |
37020 | "/**\n" |
37021 | " * Compute x / y over an implementation-defined range.\n" |
37022 | " * The maximum error is implementation-defined.\n" |
37023 | " */\n" |
37024 | "float __ovld __cnfn native_divide(float x, float y);\n" |
37025 | "float2 __ovld __cnfn native_divide(float2 x, float2 y);\n" |
37026 | "float3 __ovld __cnfn native_divide(float3 x, float3 y);\n" |
37027 | "float4 __ovld __cnfn native_divide(float4 x, float4 y);\n" |
37028 | "float8 __ovld __cnfn native_divide(float8 x, float8 y);\n" |
37029 | "float16 __ovld __cnfn native_divide(float16 x, float16 y);\n" |
37030 | "\n" |
37031 | "/**\n" |
37032 | " * Compute the base- e exponential of x over an\n" |
37033 | " * implementation-defined range. The maximum error is\n" |
37034 | " * implementation-defined.\n" |
37035 | " */\n" |
37036 | "float __ovld __cnfn native_exp(float x);\n" |
37037 | "float2 __ovld __cnfn native_exp(float2 x);\n" |
37038 | "float3 __ovld __cnfn native_exp(float3 x);\n" |
37039 | "float4 __ovld __cnfn native_exp(float4 x);\n" |
37040 | "float8 __ovld __cnfn native_exp(float8 x);\n" |
37041 | "float16 __ovld __cnfn native_exp(float16 x);\n" |
37042 | "\n" |
37043 | "/**\n" |
37044 | " * Compute the base- 2 exponential of x over an\n" |
37045 | " * implementation-defined range. The maximum error is\n" |
37046 | " * implementation-defined.\n" |
37047 | " */\n" |
37048 | "float __ovld __cnfn native_exp2(float x);\n" |
37049 | "float2 __ovld __cnfn native_exp2(float2 x);\n" |
37050 | "float3 __ovld __cnfn native_exp2(float3 x);\n" |
37051 | "float4 __ovld __cnfn native_exp2(float4 x);\n" |
37052 | "float8 __ovld __cnfn native_exp2(float8 x);\n" |
37053 | "float16 __ovld __cnfn native_exp2(float16 x);\n" |
37054 | "\n" |
37055 | "/**\n" |
37056 | " * Compute the base- 10 exponential of x over an\n" |
37057 | " * implementation-defined range. The maximum error is\n" |
37058 | " * implementation-defined.\n" |
37059 | " */\n" |
37060 | "float __ovld __cnfn native_exp10(float x);\n" |
37061 | "float2 __ovld __cnfn native_exp10(float2 x);\n" |
37062 | "float3 __ovld __cnfn native_exp10(float3 x);\n" |
37063 | "float4 __ovld __cnfn native_exp10(float4 x);\n" |
37064 | "float8 __ovld __cnfn native_exp10(float8 x);\n" |
37065 | "float16 __ovld __cnfn native_exp10(float16 x);\n" |
37066 | "\n" |
37067 | "/**\n" |
37068 | " * Compute natural logarithm over an implementationdefined\n" |
37069 | " * range. The maximum error is implementation\n" |
37070 | " * defined.\n" |
37071 | " */\n" |
37072 | "float __ovld __cnfn native_log(float x);\n" |
37073 | "float2 __ovld __cnfn native_log(float2 x);\n" |
37074 | "float3 __ovld __cnfn native_log(float3 x);\n" |
37075 | "float4 __ovld __cnfn native_log(float4 x);\n" |
37076 | "float8 __ovld __cnfn native_log(float8 x);\n" |
37077 | "float16 __ovld __cnfn native_log(float16 x);\n" |
37078 | "\n" |
37079 | "/**\n" |
37080 | " * Compute a base 2 logarithm over an implementationdefined\n" |
37081 | " * range. The maximum error is implementationdefined.\n" |
37082 | " */\n" |
37083 | "float __ovld __cnfn native_log2(float x);\n" |
37084 | "float2 __ovld __cnfn native_log2(float2 x);\n" |
37085 | "float3 __ovld __cnfn native_log2(float3 x);\n" |
37086 | "float4 __ovld __cnfn native_log2(float4 x);\n" |
37087 | "float8 __ovld __cnfn native_log2(float8 x);\n" |
37088 | "float16 __ovld __cnfn native_log2(float16 x);\n" |
37089 | "\n" |
37090 | "/**\n" |
37091 | " * Compute a base 10 logarithm over an implementationdefined\n" |
37092 | " * range. The maximum error is implementationdefined.\n" |
37093 | " */\n" |
37094 | "float __ovld __cnfn native_log10(float x);\n" |
37095 | "float2 __ovld __cnfn native_log10(float2 x);\n" |
37096 | "float3 __ovld __cnfn native_log10(float3 x);\n" |
37097 | "float4 __ovld __cnfn native_log10(float4 x);\n" |
37098 | "float8 __ovld __cnfn native_log10(float8 x);\n" |
37099 | "float16 __ovld __cnfn native_log10(float16 x);\n" |
37100 | "\n" |
37101 | "/**\n" |
37102 | " * Compute x to the power y, where x is >= 0. The range of\n" |
37103 | " * x and y are implementation-defined. The maximum error\n" |
37104 | " * is implementation-defined.\n" |
37105 | " */\n" |
37106 | "float __ovld __cnfn native_powr(float x, float y);\n" |
37107 | "float2 __ovld __cnfn native_powr(float2 x, float2 y);\n" |
37108 | "float3 __ovld __cnfn native_powr(float3 x, float3 y);\n" |
37109 | "float4 __ovld __cnfn native_powr(float4 x, float4 y);\n" |
37110 | "float8 __ovld __cnfn native_powr(float8 x, float8 y);\n" |
37111 | "float16 __ovld __cnfn native_powr(float16 x, float16 y);\n" |
37112 | "\n" |
37113 | "/**\n" |
37114 | " * Compute reciprocal over an implementation-defined\n" |
37115 | " * range. The maximum error is implementation-defined.\n" |
37116 | " */\n" |
37117 | "float __ovld __cnfn native_recip(float x);\n" |
37118 | "float2 __ovld __cnfn native_recip(float2 x);\n" |
37119 | "float3 __ovld __cnfn native_recip(float3 x);\n" |
37120 | "float4 __ovld __cnfn native_recip(float4 x);\n" |
37121 | "float8 __ovld __cnfn native_recip(float8 x);\n" |
37122 | "float16 __ovld __cnfn native_recip(float16 x);\n" |
37123 | "\n" |
37124 | "/**\n" |
37125 | " * Compute inverse square root over an implementationdefined\n" |
37126 | " * range. The maximum error is implementationdefined.\n" |
37127 | " */\n" |
37128 | "float __ovld __cnfn native_rsqrt(float x);\n" |
37129 | "float2 __ovld __cnfn native_rsqrt(float2 x);\n" |
37130 | "float3 __ovld __cnfn native_rsqrt(float3 x);\n" |
37131 | "float4 __ovld __cnfn native_rsqrt(float4 x);\n" |
37132 | "float8 __ovld __cnfn native_rsqrt(float8 x);\n" |
37133 | "float16 __ovld __cnfn native_rsqrt(float16 x);\n" |
37134 | "\n" |
37135 | "/**\n" |
37136 | " * Compute sine over an implementation-defined range.\n" |
37137 | " * The maximum error is implementation-defined.\n" |
37138 | " */\n" |
37139 | "float __ovld __cnfn native_sin(float x);\n" |
37140 | "float2 __ovld __cnfn native_sin(float2 x);\n" |
37141 | "float3 __ovld __cnfn native_sin(float3 x);\n" |
37142 | "float4 __ovld __cnfn native_sin(float4 x);\n" |
37143 | "float8 __ovld __cnfn native_sin(float8 x);\n" |
37144 | "float16 __ovld __cnfn native_sin(float16 x);\n" |
37145 | "\n" |
37146 | "/**\n" |
37147 | " * Compute square root over an implementation-defined\n" |
37148 | " * range. The maximum error is implementation-defined.\n" |
37149 | " */\n" |
37150 | "float __ovld __cnfn native_sqrt(float x);\n" |
37151 | "float2 __ovld __cnfn native_sqrt(float2 x);\n" |
37152 | "float3 __ovld __cnfn native_sqrt(float3 x);\n" |
37153 | "float4 __ovld __cnfn native_sqrt(float4 x);\n" |
37154 | "float8 __ovld __cnfn native_sqrt(float8 x);\n" |
37155 | "float16 __ovld __cnfn native_sqrt(float16 x);\n" |
37156 | "\n" |
37157 | "/**\n" |
37158 | " * Compute tangent over an implementation-defined range.\n" |
37159 | " * The maximum error is implementation-defined.\n" |
37160 | " */\n" |
37161 | "float __ovld __cnfn native_tan(float x);\n" |
37162 | "float2 __ovld __cnfn native_tan(float2 x);\n" |
37163 | "float3 __ovld __cnfn native_tan(float3 x);\n" |
37164 | "float4 __ovld __cnfn native_tan(float4 x);\n" |
37165 | "float8 __ovld __cnfn native_tan(float8 x);\n" |
37166 | "float16 __ovld __cnfn native_tan(float16 x);\n" |
37167 | "\n" |
37168 | "// OpenCL v1.1 s6.11.3, v1.2 s6.12.3, v2.0 s6.13.3 - Integer Functions\n" |
37169 | "\n" |
37170 | "/**\n" |
37171 | " * Returns | x |.\n" |
37172 | " */\n" |
37173 | "uchar __ovld __cnfn abs(char x);\n" |
37174 | "uchar __ovld __cnfn abs(uchar x);\n" |
37175 | "uchar2 __ovld __cnfn abs(char2 x);\n" |
37176 | "uchar2 __ovld __cnfn abs(uchar2 x);\n" |
37177 | "uchar3 __ovld __cnfn abs(char3 x);\n" |
37178 | "uchar3 __ovld __cnfn abs(uchar3 x);\n" |
37179 | "uchar4 __ovld __cnfn abs(char4 x);\n" |
37180 | "uchar4 __ovld __cnfn abs(uchar4 x);\n" |
37181 | "uchar8 __ovld __cnfn abs(char8 x);\n" |
37182 | "uchar8 __ovld __cnfn abs(uchar8 x);\n" |
37183 | "uchar16 __ovld __cnfn abs(char16 x);\n" |
37184 | "uchar16 __ovld __cnfn abs(uchar16 x);\n" |
37185 | "ushort __ovld __cnfn abs(short x);\n" |
37186 | "ushort __ovld __cnfn abs(ushort x);\n" |
37187 | "ushort2 __ovld __cnfn abs(short2 x);\n" |
37188 | "ushort2 __ovld __cnfn abs(ushort2 x);\n" |
37189 | "ushort3 __ovld __cnfn abs(short3 x);\n" |
37190 | "ushort3 __ovld __cnfn abs(ushort3 x);\n" |
37191 | "ushort4 __ovld __cnfn abs(short4 x);\n" |
37192 | "ushort4 __ovld __cnfn abs(ushort4 x);\n" |
37193 | "ushort8 __ovld __cnfn abs(short8 x);\n" |
37194 | "ushort8 __ovld __cnfn abs(ushort8 x);\n" |
37195 | "ushort16 __ovld __cnfn abs(short16 x);\n" |
37196 | "ushort16 __ovld __cnfn abs(ushort16 x);\n" |
37197 | "uint __ovld __cnfn abs(int x);\n" |
37198 | "uint __ovld __cnfn abs(uint x);\n" |
37199 | "uint2 __ovld __cnfn abs(int2 x);\n" |
37200 | "uint2 __ovld __cnfn abs(uint2 x);\n" |
37201 | "uint3 __ovld __cnfn abs(int3 x);\n" |
37202 | "uint3 __ovld __cnfn abs(uint3 x);\n" |
37203 | "uint4 __ovld __cnfn abs(int4 x);\n" |
37204 | "uint4 __ovld __cnfn abs(uint4 x);\n" |
37205 | "uint8 __ovld __cnfn abs(int8 x);\n" |
37206 | "uint8 __ovld __cnfn abs(uint8 x);\n" |
37207 | "uint16 __ovld __cnfn abs(int16 x);\n" |
37208 | "uint16 __ovld __cnfn abs(uint16 x);\n" |
37209 | "ulong __ovld __cnfn abs(long x);\n" |
37210 | "ulong __ovld __cnfn abs(ulong x);\n" |
37211 | "ulong2 __ovld __cnfn abs(long2 x);\n" |
37212 | "ulong2 __ovld __cnfn abs(ulong2 x);\n" |
37213 | "ulong3 __ovld __cnfn abs(long3 x);\n" |
37214 | "ulong3 __ovld __cnfn abs(ulong3 x);\n" |
37215 | "ulong4 __ovld __cnfn abs(long4 x);\n" |
37216 | "ulong4 __ovld __cnfn abs(ulong4 x);\n" |
37217 | "ulong8 __ovld __cnfn abs(long8 x);\n" |
37218 | "ulong8 __ovld __cnfn abs(ulong8 x);\n" |
37219 | "ulong16 __ovld __cnfn abs(long16 x);\n" |
37220 | "ulong16 __ovld __cnfn abs(ulong16 x);\n" |
37221 | "\n" |
37222 | "/**\n" |
37223 | " * Returns | x - y | without modulo overflow.\n" |
37224 | " */\n" |
37225 | "uchar __ovld __cnfn abs_diff(char x, char y);\n" |
37226 | "uchar __ovld __cnfn abs_diff(uchar x, uchar y);\n" |
37227 | "uchar2 __ovld __cnfn abs_diff(char2 x, char2 y);\n" |
37228 | "uchar2 __ovld __cnfn abs_diff(uchar2 x, uchar2 y);\n" |
37229 | "uchar3 __ovld __cnfn abs_diff(char3 x, char3 y);\n" |
37230 | "uchar3 __ovld __cnfn abs_diff(uchar3 x, uchar3 y);\n" |
37231 | "uchar4 __ovld __cnfn abs_diff(char4 x, char4 y);\n" |
37232 | "uchar4 __ovld __cnfn abs_diff(uchar4 x, uchar4 y);\n" |
37233 | "uchar8 __ovld __cnfn abs_diff(char8 x, char8 y);\n" |
37234 | "uchar8 __ovld __cnfn abs_diff(uchar8 x, uchar8 y);\n" |
37235 | "uchar16 __ovld __cnfn abs_diff(char16 x, char16 y);\n" |
37236 | "uchar16 __ovld __cnfn abs_diff(uchar16 x, uchar16 y);\n" |
37237 | "ushort __ovld __cnfn abs_diff(short x, short y);\n" |
37238 | "ushort __ovld __cnfn abs_diff(ushort x, ushort y);\n" |
37239 | "ushort2 __ovld __cnfn abs_diff(short2 x, short2 y);\n" |
37240 | "ushort2 __ovld __cnfn abs_diff(ushort2 x, ushort2 y);\n" |
37241 | "ushort3 __ovld __cnfn abs_diff(short3 x, short3 y);\n" |
37242 | "ushort3 __ovld __cnfn abs_diff(ushort3 x, ushort3 y);\n" |
37243 | "ushort4 __ovld __cnfn abs_diff(short4 x, short4 y);\n" |
37244 | "ushort4 __ovld __cnfn abs_diff(ushort4 x, ushort4 y);\n" |
37245 | "ushort8 __ovld __cnfn abs_diff(short8 x, short8 y);\n" |
37246 | "ushort8 __ovld __cnfn abs_diff(ushort8 x, ushort8 y);\n" |
37247 | "ushort16 __ovld __cnfn abs_diff(short16 x, short16 y);\n" |
37248 | "ushort16 __ovld __cnfn abs_diff(ushort16 x, ushort16 y);\n" |
37249 | "uint __ovld __cnfn abs_diff(int x, int y);\n" |
37250 | "uint __ovld __cnfn abs_diff(uint x, uint y);\n" |
37251 | "uint2 __ovld __cnfn abs_diff(int2 x, int2 y);\n" |
37252 | "uint2 __ovld __cnfn abs_diff(uint2 x, uint2 y);\n" |
37253 | "uint3 __ovld __cnfn abs_diff(int3 x, int3 y);\n" |
37254 | "uint3 __ovld __cnfn abs_diff(uint3 x, uint3 y);\n" |
37255 | "uint4 __ovld __cnfn abs_diff(int4 x, int4 y);\n" |
37256 | "uint4 __ovld __cnfn abs_diff(uint4 x, uint4 y);\n" |
37257 | "uint8 __ovld __cnfn abs_diff(int8 x, int8 y);\n" |
37258 | "uint8 __ovld __cnfn abs_diff(uint8 x, uint8 y);\n" |
37259 | "uint16 __ovld __cnfn abs_diff(int16 x, int16 y);\n" |
37260 | "uint16 __ovld __cnfn abs_diff(uint16 x, uint16 y);\n" |
37261 | "ulong __ovld __cnfn abs_diff(long x, long y);\n" |
37262 | "ulong __ovld __cnfn abs_diff(ulong x, ulong y);\n" |
37263 | "ulong2 __ovld __cnfn abs_diff(long2 x, long2 y);\n" |
37264 | "ulong2 __ovld __cnfn abs_diff(ulong2 x, ulong2 y);\n" |
37265 | "ulong3 __ovld __cnfn abs_diff(long3 x, long3 y);\n" |
37266 | "ulong3 __ovld __cnfn abs_diff(ulong3 x, ulong3 y);\n" |
37267 | "ulong4 __ovld __cnfn abs_diff(long4 x, long4 y);\n" |
37268 | "ulong4 __ovld __cnfn abs_diff(ulong4 x, ulong4 y);\n" |
37269 | "ulong8 __ovld __cnfn abs_diff(long8 x, long8 y);\n" |
37270 | "ulong8 __ovld __cnfn abs_diff(ulong8 x, ulong8 y);\n" |
37271 | "ulong16 __ovld __cnfn abs_diff(long16 x, long16 y);\n" |
37272 | "ulong16 __ovld __cnfn abs_diff(ulong16 x, ulong16 y);\n" |
37273 | "\n" |
37274 | "/**\n" |
37275 | " * Returns x + y and saturates the result.\n" |
37276 | " */\n" |
37277 | "char __ovld __cnfn add_sat(char x, char y);\n" |
37278 | "uchar __ovld __cnfn add_sat(uchar x, uchar y);\n" |
37279 | "char2 __ovld __cnfn add_sat(char2 x, char2 y);\n" |
37280 | "uchar2 __ovld __cnfn add_sat(uchar2 x, uchar2 y);\n" |
37281 | "char3 __ovld __cnfn add_sat(char3 x, char3 y);\n" |
37282 | "uchar3 __ovld __cnfn add_sat(uchar3 x, uchar3 y);\n" |
37283 | "char4 __ovld __cnfn add_sat(char4 x, char4 y);\n" |
37284 | "uchar4 __ovld __cnfn add_sat(uchar4 x, uchar4 y);\n" |
37285 | "char8 __ovld __cnfn add_sat(char8 x, char8 y);\n" |
37286 | "uchar8 __ovld __cnfn add_sat(uchar8 x, uchar8 y);\n" |
37287 | "char16 __ovld __cnfn add_sat(char16 x, char16 y);\n" |
37288 | "uchar16 __ovld __cnfn add_sat(uchar16 x, uchar16 y);\n" |
37289 | "short __ovld __cnfn add_sat(short x, short y);\n" |
37290 | "ushort __ovld __cnfn add_sat(ushort x, ushort y);\n" |
37291 | "short2 __ovld __cnfn add_sat(short2 x, short2 y);\n" |
37292 | "ushort2 __ovld __cnfn add_sat(ushort2 x, ushort2 y);\n" |
37293 | "short3 __ovld __cnfn add_sat(short3 x, short3 y);\n" |
37294 | "ushort3 __ovld __cnfn add_sat(ushort3 x, ushort3 y);\n" |
37295 | "short4 __ovld __cnfn add_sat(short4 x, short4 y);\n" |
37296 | "ushort4 __ovld __cnfn add_sat(ushort4 x, ushort4 y);\n" |
37297 | "short8 __ovld __cnfn add_sat(short8 x, short8 y);\n" |
37298 | "ushort8 __ovld __cnfn add_sat(ushort8 x, ushort8 y);\n" |
37299 | "short16 __ovld __cnfn add_sat(short16 x, short16 y);\n" |
37300 | "ushort16 __ovld __cnfn add_sat(ushort16 x, ushort16 y);\n" |
37301 | "int __ovld __cnfn add_sat(int x, int y);\n" |
37302 | "uint __ovld __cnfn add_sat(uint x, uint y);\n" |
37303 | "int2 __ovld __cnfn add_sat(int2 x, int2 y);\n" |
37304 | "uint2 __ovld __cnfn add_sat(uint2 x, uint2 y);\n" |
37305 | "int3 __ovld __cnfn add_sat(int3 x, int3 y);\n" |
37306 | "uint3 __ovld __cnfn add_sat(uint3 x, uint3 y);\n" |
37307 | "int4 __ovld __cnfn add_sat(int4 x, int4 y);\n" |
37308 | "uint4 __ovld __cnfn add_sat(uint4 x, uint4 y);\n" |
37309 | "int8 __ovld __cnfn add_sat(int8 x, int8 y);\n" |
37310 | "uint8 __ovld __cnfn add_sat(uint8 x, uint8 y);\n" |
37311 | "int16 __ovld __cnfn add_sat(int16 x, int16 y);\n" |
37312 | "uint16 __ovld __cnfn add_sat(uint16 x, uint16 y);\n" |
37313 | "long __ovld __cnfn add_sat(long x, long y);\n" |
37314 | "ulong __ovld __cnfn add_sat(ulong x, ulong y);\n" |
37315 | "long2 __ovld __cnfn add_sat(long2 x, long2 y);\n" |
37316 | "ulong2 __ovld __cnfn add_sat(ulong2 x, ulong2 y);\n" |
37317 | "long3 __ovld __cnfn add_sat(long3 x, long3 y);\n" |
37318 | "ulong3 __ovld __cnfn add_sat(ulong3 x, ulong3 y);\n" |
37319 | "long4 __ovld __cnfn add_sat(long4 x, long4 y);\n" |
37320 | "ulong4 __ovld __cnfn add_sat(ulong4 x, ulong4 y);\n" |
37321 | "long8 __ovld __cnfn add_sat(long8 x, long8 y);\n" |
37322 | "ulong8 __ovld __cnfn add_sat(ulong8 x, ulong8 y);\n" |
37323 | "long16 __ovld __cnfn add_sat(long16 x, long16 y);\n" |
37324 | "ulong16 __ovld __cnfn add_sat(ulong16 x, ulong16 y);\n" |
37325 | "\n" |
37326 | "/**\n" |
37327 | " * Returns (x + y) >> 1. The intermediate sum does\n" |
37328 | " * not modulo overflow.\n" |
37329 | " */\n" |
37330 | "char __ovld __cnfn hadd(char x, char y);\n" |
37331 | "uchar __ovld __cnfn hadd(uchar x, uchar y);\n" |
37332 | "char2 __ovld __cnfn hadd(char2 x, char2 y);\n" |
37333 | "uchar2 __ovld __cnfn hadd(uchar2 x, uchar2 y);\n" |
37334 | "char3 __ovld __cnfn hadd(char3 x, char3 y);\n" |
37335 | "uchar3 __ovld __cnfn hadd(uchar3 x, uchar3 y);\n" |
37336 | "char4 __ovld __cnfn hadd(char4 x, char4 y);\n" |
37337 | "uchar4 __ovld __cnfn hadd(uchar4 x, uchar4 y);\n" |
37338 | "char8 __ovld __cnfn hadd(char8 x, char8 y);\n" |
37339 | "uchar8 __ovld __cnfn hadd(uchar8 x, uchar8 y);\n" |
37340 | "char16 __ovld __cnfn hadd(char16 x, char16 y);\n" |
37341 | "uchar16 __ovld __cnfn hadd(uchar16 x, uchar16 y);\n" |
37342 | "short __ovld __cnfn hadd(short x, short y);\n" |
37343 | "ushort __ovld __cnfn hadd(ushort x, ushort y);\n" |
37344 | "short2 __ovld __cnfn hadd(short2 x, short2 y);\n" |
37345 | "ushort2 __ovld __cnfn hadd(ushort2 x, ushort2 y);\n" |
37346 | "short3 __ovld __cnfn hadd(short3 x, short3 y);\n" |
37347 | "ushort3 __ovld __cnfn hadd(ushort3 x, ushort3 y);\n" |
37348 | "short4 __ovld __cnfn hadd(short4 x, short4 y);\n" |
37349 | "ushort4 __ovld __cnfn hadd(ushort4 x, ushort4 y);\n" |
37350 | "short8 __ovld __cnfn hadd(short8 x, short8 y);\n" |
37351 | "ushort8 __ovld __cnfn hadd(ushort8 x, ushort8 y);\n" |
37352 | "short16 __ovld __cnfn hadd(short16 x, short16 y);\n" |
37353 | "ushort16 __ovld __cnfn hadd(ushort16 x, ushort16 y);\n" |
37354 | "int __ovld __cnfn hadd(int x, int y);\n" |
37355 | "uint __ovld __cnfn hadd(uint x, uint y);\n" |
37356 | "int2 __ovld __cnfn hadd(int2 x, int2 y);\n" |
37357 | "uint2 __ovld __cnfn hadd(uint2 x, uint2 y);\n" |
37358 | "int3 __ovld __cnfn hadd(int3 x, int3 y);\n" |
37359 | "uint3 __ovld __cnfn hadd(uint3 x, uint3 y);\n" |
37360 | "int4 __ovld __cnfn hadd(int4 x, int4 y);\n" |
37361 | "uint4 __ovld __cnfn hadd(uint4 x, uint4 y);\n" |
37362 | "int8 __ovld __cnfn hadd(int8 x, int8 y);\n" |
37363 | "uint8 __ovld __cnfn hadd(uint8 x, uint8 y);\n" |
37364 | "int16 __ovld __cnfn hadd(int16 x, int16 y);\n" |
37365 | "uint16 __ovld __cnfn hadd(uint16 x, uint16 y);\n" |
37366 | "long __ovld __cnfn hadd(long x, long y);\n" |
37367 | "ulong __ovld __cnfn hadd(ulong x, ulong y);\n" |
37368 | "long2 __ovld __cnfn hadd(long2 x, long2 y);\n" |
37369 | "ulong2 __ovld __cnfn hadd(ulong2 x, ulong2 y);\n" |
37370 | "long3 __ovld __cnfn hadd(long3 x, long3 y);\n" |
37371 | "ulong3 __ovld __cnfn hadd(ulong3 x, ulong3 y);\n" |
37372 | "long4 __ovld __cnfn hadd(long4 x, long4 y);\n" |
37373 | "ulong4 __ovld __cnfn hadd(ulong4 x, ulong4 y);\n" |
37374 | "long8 __ovld __cnfn hadd(long8 x, long8 y);\n" |
37375 | "ulong8 __ovld __cnfn hadd(ulong8 x, ulong8 y);\n" |
37376 | "long16 __ovld __cnfn hadd(long16 x, long16 y);\n" |
37377 | "ulong16 __ovld __cnfn hadd(ulong16 x, ulong16 y);\n" |
37378 | "\n" |
37379 | "/**\n" |
37380 | " * Returns (x + y + 1) >> 1. The intermediate sum\n" |
37381 | " * does not modulo overflow.\n" |
37382 | " */\n" |
37383 | "char __ovld __cnfn rhadd(char x, char y);\n" |
37384 | "uchar __ovld __cnfn rhadd(uchar x, uchar y);\n" |
37385 | "char2 __ovld __cnfn rhadd(char2 x, char2 y);\n" |
37386 | "uchar2 __ovld __cnfn rhadd(uchar2 x, uchar2 y);\n" |
37387 | "char3 __ovld __cnfn rhadd(char3 x, char3 y);\n" |
37388 | "uchar3 __ovld __cnfn rhadd(uchar3 x, uchar3 y);\n" |
37389 | "char4 __ovld __cnfn rhadd(char4 x, char4 y);\n" |
37390 | "uchar4 __ovld __cnfn rhadd(uchar4 x, uchar4 y);\n" |
37391 | "char8 __ovld __cnfn rhadd(char8 x, char8 y);\n" |
37392 | "uchar8 __ovld __cnfn rhadd(uchar8 x, uchar8 y);\n" |
37393 | "char16 __ovld __cnfn rhadd(char16 x, char16 y);\n" |
37394 | "uchar16 __ovld __cnfn rhadd(uchar16 x, uchar16 y);\n" |
37395 | "short __ovld __cnfn rhadd(short x, short y);\n" |
37396 | "ushort __ovld __cnfn rhadd(ushort x, ushort y);\n" |
37397 | "short2 __ovld __cnfn rhadd(short2 x, short2 y);\n" |
37398 | "ushort2 __ovld __cnfn rhadd(ushort2 x, ushort2 y);\n" |
37399 | "short3 __ovld __cnfn rhadd(short3 x, short3 y);\n" |
37400 | "ushort3 __ovld __cnfn rhadd(ushort3 x, ushort3 y);\n" |
37401 | "short4 __ovld __cnfn rhadd(short4 x, short4 y);\n" |
37402 | "ushort4 __ovld __cnfn rhadd(ushort4 x, ushort4 y);\n" |
37403 | "short8 __ovld __cnfn rhadd(short8 x, short8 y);\n" |
37404 | "ushort8 __ovld __cnfn rhadd(ushort8 x, ushort8 y);\n" |
37405 | "short16 __ovld __cnfn rhadd(short16 x, short16 y);\n" |
37406 | "ushort16 __ovld __cnfn rhadd(ushort16 x, ushort16 y);\n" |
37407 | "int __ovld __cnfn rhadd(int x, int y);\n" |
37408 | "uint __ovld __cnfn rhadd(uint x, uint y);\n" |
37409 | "int2 __ovld __cnfn rhadd(int2 x, int2 y);\n" |
37410 | "uint2 __ovld __cnfn rhadd(uint2 x, uint2 y);\n" |
37411 | "int3 __ovld __cnfn rhadd(int3 x, int3 y);\n" |
37412 | "uint3 __ovld __cnfn rhadd(uint3 x, uint3 y);\n" |
37413 | "int4 __ovld __cnfn rhadd(int4 x, int4 y);\n" |
37414 | "uint4 __ovld __cnfn rhadd(uint4 x, uint4 y);\n" |
37415 | "int8 __ovld __cnfn rhadd(int8 x, int8 y);\n" |
37416 | "uint8 __ovld __cnfn rhadd(uint8 x, uint8 y);\n" |
37417 | "int16 __ovld __cnfn rhadd(int16 x, int16 y);\n" |
37418 | "uint16 __ovld __cnfn rhadd(uint16 x, uint16 y);\n" |
37419 | "long __ovld __cnfn rhadd(long x, long y);\n" |
37420 | "ulong __ovld __cnfn rhadd(ulong x, ulong y);\n" |
37421 | "long2 __ovld __cnfn rhadd(long2 x, long2 y);\n" |
37422 | "ulong2 __ovld __cnfn rhadd(ulong2 x, ulong2 y);\n" |
37423 | "long3 __ovld __cnfn rhadd(long3 x, long3 y);\n" |
37424 | "ulong3 __ovld __cnfn rhadd(ulong3 x, ulong3 y);\n" |
37425 | "long4 __ovld __cnfn rhadd(long4 x, long4 y);\n" |
37426 | "ulong4 __ovld __cnfn rhadd(ulong4 x, ulong4 y);\n" |
37427 | "long8 __ovld __cnfn rhadd(long8 x, long8 y);\n" |
37428 | "ulong8 __ovld __cnfn rhadd(ulong8 x, ulong8 y);\n" |
37429 | "long16 __ovld __cnfn rhadd(long16 x, long16 y);\n" |
37430 | "ulong16 __ovld __cnfn rhadd(ulong16 x, ulong16 y);\n" |
37431 | "\n" |
37432 | "/**\n" |
37433 | " * Returns min(max(x, minval), maxval).\n" |
37434 | " * Results are undefined if minval > maxval.\n" |
37435 | " */\n" |
37436 | "char __ovld __cnfn clamp(char x, char minval, char maxval);\n" |
37437 | "uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n" |
37438 | "char2 __ovld __cnfn clamp(char2 x, char2 minval, char2 maxval);\n" |
37439 | "uchar2 __ovld __cnfn clamp(uchar2 x, uchar2 minval, uchar2 maxval);\n" |
37440 | "char3 __ovld __cnfn clamp(char3 x, char3 minval, char3 maxval);\n" |
37441 | "uchar3 __ovld __cnfn clamp(uchar3 x, uchar3 minval, uchar3 maxval);\n" |
37442 | "char4 __ovld __cnfn clamp(char4 x, char4 minval, char4 maxval);\n" |
37443 | "uchar4 __ovld __cnfn clamp(uchar4 x, uchar4 minval, uchar4 maxval);\n" |
37444 | "char8 __ovld __cnfn clamp(char8 x, char8 minval, char8 maxval);\n" |
37445 | "uchar8 __ovld __cnfn clamp(uchar8 x, uchar8 minval, uchar8 maxval);\n" |
37446 | "char16 __ovld __cnfn clamp(char16 x, char16 minval, char16 maxval);\n" |
37447 | "uchar16 __ovld __cnfn clamp(uchar16 x, uchar16 minval, uchar16 maxval);\n" |
37448 | "short __ovld __cnfn clamp(short x, short minval, short maxval);\n" |
37449 | "ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n" |
37450 | "short2 __ovld __cnfn clamp(short2 x, short2 minval, short2 maxval);\n" |
37451 | "ushort2 __ovld __cnfn clamp(ushort2 x, ushort2 minval, ushort2 maxval);\n" |
37452 | "short3 __ovld __cnfn clamp(short3 x, short3 minval, short3 maxval);\n" |
37453 | "ushort3 __ovld __cnfn clamp(ushort3 x, ushort3 minval, ushort3 maxval);\n" |
37454 | "short4 __ovld __cnfn clamp(short4 x, short4 minval, short4 maxval);\n" |
37455 | "ushort4 __ovld __cnfn clamp(ushort4 x, ushort4 minval, ushort4 maxval);\n" |
37456 | "short8 __ovld __cnfn clamp(short8 x, short8 minval, short8 maxval);\n" |
37457 | "ushort8 __ovld __cnfn clamp(ushort8 x, ushort8 minval, ushort8 maxval);\n" |
37458 | "short16 __ovld __cnfn clamp(short16 x, short16 minval, short16 maxval);\n" |
37459 | "ushort16 __ovld __cnfn clamp(ushort16 x, ushort16 minval, ushort16 maxval);\n" |
37460 | "int __ovld __cnfn clamp(int x, int minval, int maxval);\n" |
37461 | "uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n" |
37462 | "int2 __ovld __cnfn clamp(int2 x, int2 minval, int2 maxval);\n" |
37463 | "uint2 __ovld __cnfn clamp(uint2 x, uint2 minval, uint2 maxval);\n" |
37464 | "int3 __ovld __cnfn clamp(int3 x, int3 minval, int3 maxval);\n" |
37465 | "uint3 __ovld __cnfn clamp(uint3 x, uint3 minval, uint3 maxval);\n" |
37466 | "int4 __ovld __cnfn clamp(int4 x, int4 minval, int4 maxval);\n" |
37467 | "uint4 __ovld __cnfn clamp(uint4 x, uint4 minval, uint4 maxval);\n" |
37468 | "int8 __ovld __cnfn clamp(int8 x, int8 minval, int8 maxval);\n" |
37469 | "uint8 __ovld __cnfn clamp(uint8 x, uint8 minval, uint8 maxval);\n" |
37470 | "int16 __ovld __cnfn clamp(int16 x, int16 minval, int16 maxval);\n" |
37471 | "uint16 __ovld __cnfn clamp(uint16 x, uint16 minval, uint16 maxval);\n" |
37472 | "long __ovld __cnfn clamp(long x, long minval, long maxval);\n" |
37473 | "ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n" |
37474 | "long2 __ovld __cnfn clamp(long2 x, long2 minval, long2 maxval);\n" |
37475 | "ulong2 __ovld __cnfn clamp(ulong2 x, ulong2 minval, ulong2 maxval);\n" |
37476 | "long3 __ovld __cnfn clamp(long3 x, long3 minval, long3 maxval);\n" |
37477 | "ulong3 __ovld __cnfn clamp(ulong3 x, ulong3 minval, ulong3 maxval);\n" |
37478 | "long4 __ovld __cnfn clamp(long4 x, long4 minval, long4 maxval);\n" |
37479 | "ulong4 __ovld __cnfn clamp(ulong4 x, ulong4 minval, ulong4 maxval);\n" |
37480 | "long8 __ovld __cnfn clamp(long8 x, long8 minval, long8 maxval);\n" |
37481 | "ulong8 __ovld __cnfn clamp(ulong8 x, ulong8 minval, ulong8 maxval);\n" |
37482 | "long16 __ovld __cnfn clamp(long16 x, long16 minval, long16 maxval);\n" |
37483 | "ulong16 __ovld __cnfn clamp(ulong16 x, ulong16 minval, ulong16 maxval);\n" |
37484 | "char __ovld __cnfn clamp(char x, char minval, char maxval);\n" |
37485 | "uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n" |
37486 | "char2 __ovld __cnfn clamp(char2 x, char minval, char maxval);\n" |
37487 | "uchar2 __ovld __cnfn clamp(uchar2 x, uchar minval, uchar maxval);\n" |
37488 | "char3 __ovld __cnfn clamp(char3 x, char minval, char maxval);\n" |
37489 | "uchar3 __ovld __cnfn clamp(uchar3 x, uchar minval, uchar maxval);\n" |
37490 | "char4 __ovld __cnfn clamp(char4 x, char minval, char maxval);\n" |
37491 | "uchar4 __ovld __cnfn clamp(uchar4 x, uchar minval, uchar maxval);\n" |
37492 | "char8 __ovld __cnfn clamp(char8 x, char minval, char maxval);\n" |
37493 | "uchar8 __ovld __cnfn clamp(uchar8 x, uchar minval, uchar maxval);\n" |
37494 | "char16 __ovld __cnfn clamp(char16 x, char minval, char maxval);\n" |
37495 | "uchar16 __ovld __cnfn clamp(uchar16 x, uchar minval, uchar maxval);\n" |
37496 | "short __ovld __cnfn clamp(short x, short minval, short maxval);\n" |
37497 | "ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n" |
37498 | "short2 __ovld __cnfn clamp(short2 x, short minval, short maxval);\n" |
37499 | "ushort2 __ovld __cnfn clamp(ushort2 x, ushort minval, ushort maxval);\n" |
37500 | "short3 __ovld __cnfn clamp(short3 x, short minval, short maxval);\n" |
37501 | "ushort3 __ovld __cnfn clamp(ushort3 x, ushort minval, ushort maxval);\n" |
37502 | "short4 __ovld __cnfn clamp(short4 x, short minval, short maxval);\n" |
37503 | "ushort4 __ovld __cnfn clamp(ushort4 x, ushort minval, ushort maxval);\n" |
37504 | "short8 __ovld __cnfn clamp(short8 x, short minval, short maxval);\n" |
37505 | "ushort8 __ovld __cnfn clamp(ushort8 x, ushort minval, ushort maxval);\n" |
37506 | "short16 __ovld __cnfn clamp(short16 x, short minval, short maxval);\n" |
37507 | "ushort16 __ovld __cnfn clamp(ushort16 x, ushort minval, ushort maxval);\n" |
37508 | "int __ovld __cnfn clamp(int x, int minval, int maxval);\n" |
37509 | "uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n" |
37510 | "int2 __ovld __cnfn clamp(int2 x, int minval, int maxval);\n" |
37511 | "uint2 __ovld __cnfn clamp(uint2 x, uint minval, uint maxval);\n" |
37512 | "int3 __ovld __cnfn clamp(int3 x, int minval, int maxval);\n" |
37513 | "uint3 __ovld __cnfn clamp(uint3 x, uint minval, uint maxval);\n" |
37514 | "int4 __ovld __cnfn clamp(int4 x, int minval, int maxval);\n" |
37515 | "uint4 __ovld __cnfn clamp(uint4 x, uint minval, uint maxval);\n" |
37516 | "int8 __ovld __cnfn clamp(int8 x, int minval, int maxval);\n" |
37517 | "uint8 __ovld __cnfn clamp(uint8 x, uint minval, uint maxval);\n" |
37518 | "int16 __ovld __cnfn clamp(int16 x, int minval, int maxval);\n" |
37519 | "uint16 __ovld __cnfn clamp(uint16 x, uint minval, uint maxval);\n" |
37520 | "long __ovld __cnfn clamp(long x, long minval, long maxval);\n" |
37521 | "ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n" |
37522 | "long2 __ovld __cnfn clamp(long2 x, long minval, long maxval);\n" |
37523 | "ulong2 __ovld __cnfn clamp(ulong2 x, ulong minval, ulong maxval);\n" |
37524 | "long3 __ovld __cnfn clamp(long3 x, long minval, long maxval);\n" |
37525 | "ulong3 __ovld __cnfn clamp(ulong3 x, ulong minval, ulong maxval);\n" |
37526 | "long4 __ovld __cnfn clamp(long4 x, long minval, long maxval);\n" |
37527 | "ulong4 __ovld __cnfn clamp(ulong4 x, ulong minval, ulong maxval);\n" |
37528 | "long8 __ovld __cnfn clamp(long8 x, long minval, long maxval);\n" |
37529 | "ulong8 __ovld __cnfn clamp(ulong8 x, ulong minval, ulong maxval);\n" |
37530 | "long16 __ovld __cnfn clamp(long16 x, long minval, long maxval);\n" |
37531 | "ulong16 __ovld __cnfn clamp(ulong16 x, ulong minval, ulong maxval);\n" |
37532 | "\n" |
37533 | "/**\n" |
37534 | " * Returns the number of leading 0-bits in x, starting\n" |
37535 | " * at the most significant bit position.\n" |
37536 | " */\n" |
37537 | "char __ovld __cnfn clz(char x);\n" |
37538 | "uchar __ovld __cnfn clz(uchar x);\n" |
37539 | "char2 __ovld __cnfn clz(char2 x);\n" |
37540 | "uchar2 __ovld __cnfn clz(uchar2 x);\n" |
37541 | "char3 __ovld __cnfn clz(char3 x);\n" |
37542 | "uchar3 __ovld __cnfn clz(uchar3 x);\n" |
37543 | "char4 __ovld __cnfn clz(char4 x);\n" |
37544 | "uchar4 __ovld __cnfn clz(uchar4 x);\n" |
37545 | "char8 __ovld __cnfn clz(char8 x);\n" |
37546 | "uchar8 __ovld __cnfn clz(uchar8 x);\n" |
37547 | "char16 __ovld __cnfn clz(char16 x);\n" |
37548 | "uchar16 __ovld __cnfn clz(uchar16 x);\n" |
37549 | "short __ovld __cnfn clz(short x);\n" |
37550 | "ushort __ovld __cnfn clz(ushort x);\n" |
37551 | "short2 __ovld __cnfn clz(short2 x);\n" |
37552 | "ushort2 __ovld __cnfn clz(ushort2 x);\n" |
37553 | "short3 __ovld __cnfn clz(short3 x);\n" |
37554 | "ushort3 __ovld __cnfn clz(ushort3 x);\n" |
37555 | "short4 __ovld __cnfn clz(short4 x);\n" |
37556 | "ushort4 __ovld __cnfn clz(ushort4 x);\n" |
37557 | "short8 __ovld __cnfn clz(short8 x);\n" |
37558 | "ushort8 __ovld __cnfn clz(ushort8 x);\n" |
37559 | "short16 __ovld __cnfn clz(short16 x);\n" |
37560 | "ushort16 __ovld __cnfn clz(ushort16 x);\n" |
37561 | "int __ovld __cnfn clz(int x);\n" |
37562 | "uint __ovld __cnfn clz(uint x);\n" |
37563 | "int2 __ovld __cnfn clz(int2 x);\n" |
37564 | "uint2 __ovld __cnfn clz(uint2 x);\n" |
37565 | "int3 __ovld __cnfn clz(int3 x);\n" |
37566 | "uint3 __ovld __cnfn clz(uint3 x);\n" |
37567 | "int4 __ovld __cnfn clz(int4 x);\n" |
37568 | "uint4 __ovld __cnfn clz(uint4 x);\n" |
37569 | "int8 __ovld __cnfn clz(int8 x);\n" |
37570 | "uint8 __ovld __cnfn clz(uint8 x);\n" |
37571 | "int16 __ovld __cnfn clz(int16 x);\n" |
37572 | "uint16 __ovld __cnfn clz(uint16 x);\n" |
37573 | "long __ovld __cnfn clz(long x);\n" |
37574 | "ulong __ovld __cnfn clz(ulong x);\n" |
37575 | "long2 __ovld __cnfn clz(long2 x);\n" |
37576 | "ulong2 __ovld __cnfn clz(ulong2 x);\n" |
37577 | "long3 __ovld __cnfn clz(long3 x);\n" |
37578 | "ulong3 __ovld __cnfn clz(ulong3 x);\n" |
37579 | "long4 __ovld __cnfn clz(long4 x);\n" |
37580 | "ulong4 __ovld __cnfn clz(ulong4 x);\n" |
37581 | "long8 __ovld __cnfn clz(long8 x);\n" |
37582 | "ulong8 __ovld __cnfn clz(ulong8 x);\n" |
37583 | "long16 __ovld __cnfn clz(long16 x);\n" |
37584 | "ulong16 __ovld __cnfn clz(ulong16 x);\n" |
37585 | "\n" |
37586 | "/**\n" |
37587 | " * Returns the count of trailing 0-bits in x. If x is 0,\n" |
37588 | " * returns the size in bits of the type of x or\n" |
37589 | " * component type of x, if x is a vector.\n" |
37590 | " */\n" |
37591 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
37592 | "char __ovld ctz(char x);\n" |
37593 | "uchar __ovld ctz(uchar x);\n" |
37594 | "char2 __ovld ctz(char2 x);\n" |
37595 | "uchar2 __ovld ctz(uchar2 x);\n" |
37596 | "char3 __ovld ctz(char3 x);\n" |
37597 | "uchar3 __ovld ctz(uchar3 x);\n" |
37598 | "char4 __ovld ctz(char4 x);\n" |
37599 | "uchar4 __ovld ctz(uchar4 x);\n" |
37600 | "char8 __ovld ctz(char8 x);\n" |
37601 | "uchar8 __ovld ctz(uchar8 x);\n" |
37602 | "char16 __ovld ctz(char16 x);\n" |
37603 | "uchar16 __ovld ctz(uchar16 x);\n" |
37604 | "short __ovld ctz(short x);\n" |
37605 | "ushort __ovld ctz(ushort x);\n" |
37606 | "short2 __ovld ctz(short2 x);\n" |
37607 | "ushort2 __ovld ctz(ushort2 x);\n" |
37608 | "short3 __ovld ctz(short3 x);\n" |
37609 | "ushort3 __ovld ctz(ushort3 x);\n" |
37610 | "short4 __ovld ctz(short4 x);\n" |
37611 | "ushort4 __ovld ctz(ushort4 x);\n" |
37612 | "short8 __ovld ctz(short8 x);\n" |
37613 | "ushort8 __ovld ctz(ushort8 x);\n" |
37614 | "short16 __ovld ctz(short16 x);\n" |
37615 | "ushort16 __ovld ctz(ushort16 x);\n" |
37616 | "int __ovld ctz(int x);\n" |
37617 | "uint __ovld ctz(uint x);\n" |
37618 | "int2 __ovld ctz(int2 x);\n" |
37619 | "uint2 __ovld ctz(uint2 x);\n" |
37620 | "int3 __ovld ctz(int3 x);\n" |
37621 | "uint3 __ovld ctz(uint3 x);\n" |
37622 | "int4 __ovld ctz(int4 x);\n" |
37623 | "uint4 __ovld ctz(uint4 x);\n" |
37624 | "int8 __ovld ctz(int8 x);\n" |
37625 | "uint8 __ovld ctz(uint8 x);\n" |
37626 | "int16 __ovld ctz(int16 x);\n" |
37627 | "uint16 __ovld ctz(uint16 x);\n" |
37628 | "long __ovld ctz(long x);\n" |
37629 | "ulong __ovld ctz(ulong x);\n" |
37630 | "long2 __ovld ctz(long2 x);\n" |
37631 | "ulong2 __ovld ctz(ulong2 x);\n" |
37632 | "long3 __ovld ctz(long3 x);\n" |
37633 | "ulong3 __ovld ctz(ulong3 x);\n" |
37634 | "long4 __ovld ctz(long4 x);\n" |
37635 | "ulong4 __ovld ctz(ulong4 x);\n" |
37636 | "long8 __ovld ctz(long8 x);\n" |
37637 | "ulong8 __ovld ctz(ulong8 x);\n" |
37638 | "long16 __ovld ctz(long16 x);\n" |
37639 | "ulong16 __ovld ctz(ulong16 x);\n" |
37640 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
37641 | "\n" |
37642 | "/**\n" |
37643 | " * Returns mul_hi(a, b) + c.\n" |
37644 | " */\n" |
37645 | "char __ovld __cnfn mad_hi(char a, char b, char c);\n" |
37646 | "uchar __ovld __cnfn mad_hi(uchar a, uchar b, uchar c);\n" |
37647 | "char2 __ovld __cnfn mad_hi(char2 a, char2 b, char2 c);\n" |
37648 | "uchar2 __ovld __cnfn mad_hi(uchar2 a, uchar2 b, uchar2 c);\n" |
37649 | "char3 __ovld __cnfn mad_hi(char3 a, char3 b, char3 c);\n" |
37650 | "uchar3 __ovld __cnfn mad_hi(uchar3 a, uchar3 b, uchar3 c);\n" |
37651 | "char4 __ovld __cnfn mad_hi(char4 a, char4 b, char4 c);\n" |
37652 | "uchar4 __ovld __cnfn mad_hi(uchar4 a, uchar4 b, uchar4 c);\n" |
37653 | "char8 __ovld __cnfn mad_hi(char8 a, char8 b, char8 c);\n" |
37654 | "uchar8 __ovld __cnfn mad_hi(uchar8 a, uchar8 b, uchar8 c);\n" |
37655 | "char16 __ovld __cnfn mad_hi(char16 a, char16 b, char16 c);\n" |
37656 | "uchar16 __ovld __cnfn mad_hi(uchar16 a, uchar16 b, uchar16 c);\n" |
37657 | "short __ovld __cnfn mad_hi(short a, short b, short c);\n" |
37658 | "ushort __ovld __cnfn mad_hi(ushort a, ushort b, ushort c);\n" |
37659 | "short2 __ovld __cnfn mad_hi(short2 a, short2 b, short2 c);\n" |
37660 | "ushort2 __ovld __cnfn mad_hi(ushort2 a, ushort2 b, ushort2 c);\n" |
37661 | "short3 __ovld __cnfn mad_hi(short3 a, short3 b, short3 c);\n" |
37662 | "ushort3 __ovld __cnfn mad_hi(ushort3 a, ushort3 b, ushort3 c);\n" |
37663 | "short4 __ovld __cnfn mad_hi(short4 a, short4 b, short4 c);\n" |
37664 | "ushort4 __ovld __cnfn mad_hi(ushort4 a, ushort4 b, ushort4 c);\n" |
37665 | "short8 __ovld __cnfn mad_hi(short8 a, short8 b, short8 c);\n" |
37666 | "ushort8 __ovld __cnfn mad_hi(ushort8 a, ushort8 b, ushort8 c);\n" |
37667 | "short16 __ovld __cnfn mad_hi(short16 a, short16 b, short16 c);\n" |
37668 | "ushort16 __ovld __cnfn mad_hi(ushort16 a, ushort16 b, ushort16 c);\n" |
37669 | "int __ovld __cnfn mad_hi(int a, int b, int c);\n" |
37670 | "uint __ovld __cnfn mad_hi(uint a, uint b, uint c);\n" |
37671 | "int2 __ovld __cnfn mad_hi(int2 a, int2 b, int2 c);\n" |
37672 | "uint2 __ovld __cnfn mad_hi(uint2 a, uint2 b, uint2 c);\n" |
37673 | "int3 __ovld __cnfn mad_hi(int3 a, int3 b, int3 c);\n" |
37674 | "uint3 __ovld __cnfn mad_hi(uint3 a, uint3 b, uint3 c);\n" |
37675 | "int4 __ovld __cnfn mad_hi(int4 a, int4 b, int4 c);\n" |
37676 | "uint4 __ovld __cnfn mad_hi(uint4 a, uint4 b, uint4 c);\n" |
37677 | "int8 __ovld __cnfn mad_hi(int8 a, int8 b, int8 c);\n" |
37678 | "uint8 __ovld __cnfn mad_hi(uint8 a, uint8 b, uint8 c);\n" |
37679 | "int16 __ovld __cnfn mad_hi(int16 a, int16 b, int16 c);\n" |
37680 | "uint16 __ovld __cnfn mad_hi(uint16 a, uint16 b, uint16 c);\n" |
37681 | "long __ovld __cnfn mad_hi(long a, long b, long c);\n" |
37682 | "ulong __ovld __cnfn mad_hi(ulong a, ulong b, ulong c);\n" |
37683 | "long2 __ovld __cnfn mad_hi(long2 a, long2 b, long2 c);\n" |
37684 | "ulong2 __ovld __cnfn mad_hi(ulong2 a, ulong2 b, ulong2 c);\n" |
37685 | "long3 __ovld __cnfn mad_hi(long3 a, long3 b, long3 c);\n" |
37686 | "ulong3 __ovld __cnfn mad_hi(ulong3 a, ulong3 b, ulong3 c);\n" |
37687 | "long4 __ovld __cnfn mad_hi(long4 a, long4 b, long4 c);\n" |
37688 | "ulong4 __ovld __cnfn mad_hi(ulong4 a, ulong4 b, ulong4 c);\n" |
37689 | "long8 __ovld __cnfn mad_hi(long8 a, long8 b, long8 c);\n" |
37690 | "ulong8 __ovld __cnfn mad_hi(ulong8 a, ulong8 b, ulong8 c);\n" |
37691 | "long16 __ovld __cnfn mad_hi(long16 a, long16 b, long16 c);\n" |
37692 | "ulong16 __ovld __cnfn mad_hi(ulong16 a, ulong16 b, ulong16 c);\n" |
37693 | "\n" |
37694 | "/**\n" |
37695 | " * Returns a * b + c and saturates the result.\n" |
37696 | " */\n" |
37697 | "char __ovld __cnfn mad_sat(char a, char b, char c);\n" |
37698 | "uchar __ovld __cnfn mad_sat(uchar a, uchar b, uchar c);\n" |
37699 | "char2 __ovld __cnfn mad_sat(char2 a, char2 b, char2 c);\n" |
37700 | "uchar2 __ovld __cnfn mad_sat(uchar2 a, uchar2 b, uchar2 c);\n" |
37701 | "char3 __ovld __cnfn mad_sat(char3 a, char3 b, char3 c);\n" |
37702 | "uchar3 __ovld __cnfn mad_sat(uchar3 a, uchar3 b, uchar3 c);\n" |
37703 | "char4 __ovld __cnfn mad_sat(char4 a, char4 b, char4 c);\n" |
37704 | "uchar4 __ovld __cnfn mad_sat(uchar4 a, uchar4 b, uchar4 c);\n" |
37705 | "char8 __ovld __cnfn mad_sat(char8 a, char8 b, char8 c);\n" |
37706 | "uchar8 __ovld __cnfn mad_sat(uchar8 a, uchar8 b, uchar8 c);\n" |
37707 | "char16 __ovld __cnfn mad_sat(char16 a, char16 b, char16 c);\n" |
37708 | "uchar16 __ovld __cnfn mad_sat(uchar16 a, uchar16 b, uchar16 c);\n" |
37709 | "short __ovld __cnfn mad_sat(short a, short b, short c);\n" |
37710 | "ushort __ovld __cnfn mad_sat(ushort a, ushort b, ushort c);\n" |
37711 | "short2 __ovld __cnfn mad_sat(short2 a, short2 b, short2 c);\n" |
37712 | "ushort2 __ovld __cnfn mad_sat(ushort2 a, ushort2 b, ushort2 c);\n" |
37713 | "short3 __ovld __cnfn mad_sat(short3 a, short3 b, short3 c);\n" |
37714 | "ushort3 __ovld __cnfn mad_sat(ushort3 a, ushort3 b, ushort3 c);\n" |
37715 | "short4 __ovld __cnfn mad_sat(short4 a, short4 b, short4 c);\n" |
37716 | "ushort4 __ovld __cnfn mad_sat(ushort4 a, ushort4 b, ushort4 c);\n" |
37717 | "short8 __ovld __cnfn mad_sat(short8 a, short8 b, short8 c);\n" |
37718 | "ushort8 __ovld __cnfn mad_sat(ushort8 a, ushort8 b, ushort8 c);\n" |
37719 | "short16 __ovld __cnfn mad_sat(short16 a, short16 b, short16 c);\n" |
37720 | "ushort16 __ovld __cnfn mad_sat(ushort16 a, ushort16 b, ushort16 c);\n" |
37721 | "int __ovld __cnfn mad_sat(int a, int b, int c);\n" |
37722 | "uint __ovld __cnfn mad_sat(uint a, uint b, uint c);\n" |
37723 | "int2 __ovld __cnfn mad_sat(int2 a, int2 b, int2 c);\n" |
37724 | "uint2 __ovld __cnfn mad_sat(uint2 a, uint2 b, uint2 c);\n" |
37725 | "int3 __ovld __cnfn mad_sat(int3 a, int3 b, int3 c);\n" |
37726 | "uint3 __ovld __cnfn mad_sat(uint3 a, uint3 b, uint3 c);\n" |
37727 | "int4 __ovld __cnfn mad_sat(int4 a, int4 b, int4 c);\n" |
37728 | "uint4 __ovld __cnfn mad_sat(uint4 a, uint4 b, uint4 c);\n" |
37729 | "int8 __ovld __cnfn mad_sat(int8 a, int8 b, int8 c);\n" |
37730 | "uint8 __ovld __cnfn mad_sat(uint8 a, uint8 b, uint8 c);\n" |
37731 | "int16 __ovld __cnfn mad_sat(int16 a, int16 b, int16 c);\n" |
37732 | "uint16 __ovld __cnfn mad_sat(uint16 a, uint16 b, uint16 c);\n" |
37733 | "long __ovld __cnfn mad_sat(long a, long b, long c);\n" |
37734 | "ulong __ovld __cnfn mad_sat(ulong a, ulong b, ulong c);\n" |
37735 | "long2 __ovld __cnfn mad_sat(long2 a, long2 b, long2 c);\n" |
37736 | "ulong2 __ovld __cnfn mad_sat(ulong2 a, ulong2 b, ulong2 c);\n" |
37737 | "long3 __ovld __cnfn mad_sat(long3 a, long3 b, long3 c);\n" |
37738 | "ulong3 __ovld __cnfn mad_sat(ulong3 a, ulong3 b, ulong3 c);\n" |
37739 | "long4 __ovld __cnfn mad_sat(long4 a, long4 b, long4 c);\n" |
37740 | "ulong4 __ovld __cnfn mad_sat(ulong4 a, ulong4 b, ulong4 c);\n" |
37741 | "long8 __ovld __cnfn mad_sat(long8 a, long8 b, long8 c);\n" |
37742 | "ulong8 __ovld __cnfn mad_sat(ulong8 a, ulong8 b, ulong8 c);\n" |
37743 | "long16 __ovld __cnfn mad_sat(long16 a, long16 b, long16 c);\n" |
37744 | "ulong16 __ovld __cnfn mad_sat(ulong16 a, ulong16 b, ulong16 c);\n" |
37745 | "\n" |
37746 | "/**\n" |
37747 | " * Returns y if x < y, otherwise it returns x.\n" |
37748 | " */\n" |
37749 | "char __ovld __cnfn max(char x, char y);\n" |
37750 | "uchar __ovld __cnfn max(uchar x, uchar y);\n" |
37751 | "char2 __ovld __cnfn max(char2 x, char2 y);\n" |
37752 | "uchar2 __ovld __cnfn max(uchar2 x, uchar2 y);\n" |
37753 | "char3 __ovld __cnfn max(char3 x, char3 y);\n" |
37754 | "uchar3 __ovld __cnfn max(uchar3 x, uchar3 y);\n" |
37755 | "char4 __ovld __cnfn max(char4 x, char4 y);\n" |
37756 | "uchar4 __ovld __cnfn max(uchar4 x, uchar4 y);\n" |
37757 | "char8 __ovld __cnfn max(char8 x, char8 y);\n" |
37758 | "uchar8 __ovld __cnfn max(uchar8 x, uchar8 y);\n" |
37759 | "char16 __ovld __cnfn max(char16 x, char16 y);\n" |
37760 | "uchar16 __ovld __cnfn max(uchar16 x, uchar16 y);\n" |
37761 | "short __ovld __cnfn max(short x, short y);\n" |
37762 | "ushort __ovld __cnfn max(ushort x, ushort y);\n" |
37763 | "short2 __ovld __cnfn max(short2 x, short2 y);\n" |
37764 | "ushort2 __ovld __cnfn max(ushort2 x, ushort2 y);\n" |
37765 | "short3 __ovld __cnfn max(short3 x, short3 y);\n" |
37766 | "ushort3 __ovld __cnfn max(ushort3 x, ushort3 y);\n" |
37767 | "short4 __ovld __cnfn max(short4 x, short4 y);\n" |
37768 | "ushort4 __ovld __cnfn max(ushort4 x, ushort4 y);\n" |
37769 | "short8 __ovld __cnfn max(short8 x, short8 y);\n" |
37770 | "ushort8 __ovld __cnfn max(ushort8 x, ushort8 y);\n" |
37771 | "short16 __ovld __cnfn max(short16 x, short16 y);\n" |
37772 | "ushort16 __ovld __cnfn max(ushort16 x, ushort16 y);\n" |
37773 | "int __ovld __cnfn max(int x, int y);\n" |
37774 | "uint __ovld __cnfn max(uint x, uint y);\n" |
37775 | "int2 __ovld __cnfn max(int2 x, int2 y);\n" |
37776 | "uint2 __ovld __cnfn max(uint2 x, uint2 y);\n" |
37777 | "int3 __ovld __cnfn max(int3 x, int3 y);\n" |
37778 | "uint3 __ovld __cnfn max(uint3 x, uint3 y);\n" |
37779 | "int4 __ovld __cnfn max(int4 x, int4 y);\n" |
37780 | "uint4 __ovld __cnfn max(uint4 x, uint4 y);\n" |
37781 | "int8 __ovld __cnfn max(int8 x, int8 y);\n" |
37782 | "uint8 __ovld __cnfn max(uint8 x, uint8 y);\n" |
37783 | "int16 __ovld __cnfn max(int16 x, int16 y);\n" |
37784 | "uint16 __ovld __cnfn max(uint16 x, uint16 y);\n" |
37785 | "long __ovld __cnfn max(long x, long y);\n" |
37786 | "ulong __ovld __cnfn max(ulong x, ulong y);\n" |
37787 | "long2 __ovld __cnfn max(long2 x, long2 y);\n" |
37788 | "ulong2 __ovld __cnfn max(ulong2 x, ulong2 y);\n" |
37789 | "long3 __ovld __cnfn max(long3 x, long3 y);\n" |
37790 | "ulong3 __ovld __cnfn max(ulong3 x, ulong3 y);\n" |
37791 | "long4 __ovld __cnfn max(long4 x, long4 y);\n" |
37792 | "ulong4 __ovld __cnfn max(ulong4 x, ulong4 y);\n" |
37793 | "long8 __ovld __cnfn max(long8 x, long8 y);\n" |
37794 | "ulong8 __ovld __cnfn max(ulong8 x, ulong8 y);\n" |
37795 | "long16 __ovld __cnfn max(long16 x, long16 y);\n" |
37796 | "ulong16 __ovld __cnfn max(ulong16 x, ulong16 y);\n" |
37797 | "char __ovld __cnfn max(char x, char y);\n" |
37798 | "uchar __ovld __cnfn max(uchar x, uchar y);\n" |
37799 | "char2 __ovld __cnfn max(char2 x, char y);\n" |
37800 | "uchar2 __ovld __cnfn max(uchar2 x, uchar y);\n" |
37801 | "char3 __ovld __cnfn max(char3 x, char y);\n" |
37802 | "uchar3 __ovld __cnfn max(uchar3 x, uchar y);\n" |
37803 | "char4 __ovld __cnfn max(char4 x, char y);\n" |
37804 | "uchar4 __ovld __cnfn max(uchar4 x, uchar y);\n" |
37805 | "char8 __ovld __cnfn max(char8 x, char y);\n" |
37806 | "uchar8 __ovld __cnfn max(uchar8 x, uchar y);\n" |
37807 | "char16 __ovld __cnfn max(char16 x, char y);\n" |
37808 | "uchar16 __ovld __cnfn max(uchar16 x, uchar y);\n" |
37809 | "short __ovld __cnfn max(short x, short y);\n" |
37810 | "ushort __ovld __cnfn max(ushort x, ushort y);\n" |
37811 | "short2 __ovld __cnfn max(short2 x, short y);\n" |
37812 | "ushort2 __ovld __cnfn max(ushort2 x, ushort y);\n" |
37813 | "short3 __ovld __cnfn max(short3 x, short y);\n" |
37814 | "ushort3 __ovld __cnfn max(ushort3 x, ushort y);\n" |
37815 | "short4 __ovld __cnfn max(short4 x, short y);\n" |
37816 | "ushort4 __ovld __cnfn max(ushort4 x, ushort y);\n" |
37817 | "short8 __ovld __cnfn max(short8 x, short y);\n" |
37818 | "ushort8 __ovld __cnfn max(ushort8 x, ushort y);\n" |
37819 | "short16 __ovld __cnfn max(short16 x, short y);\n" |
37820 | "ushort16 __ovld __cnfn max(ushort16 x, ushort y);\n" |
37821 | "int __ovld __cnfn max(int x, int y);\n" |
37822 | "uint __ovld __cnfn max(uint x, uint y);\n" |
37823 | "int2 __ovld __cnfn max(int2 x, int y);\n" |
37824 | "uint2 __ovld __cnfn max(uint2 x, uint y);\n" |
37825 | "int3 __ovld __cnfn max(int3 x, int y);\n" |
37826 | "uint3 __ovld __cnfn max(uint3 x, uint y);\n" |
37827 | "int4 __ovld __cnfn max(int4 x, int y);\n" |
37828 | "uint4 __ovld __cnfn max(uint4 x, uint y);\n" |
37829 | "int8 __ovld __cnfn max(int8 x, int y);\n" |
37830 | "uint8 __ovld __cnfn max(uint8 x, uint y);\n" |
37831 | "int16 __ovld __cnfn max(int16 x, int y);\n" |
37832 | "uint16 __ovld __cnfn max(uint16 x, uint y);\n" |
37833 | "long __ovld __cnfn max(long x, long y);\n" |
37834 | "ulong __ovld __cnfn max(ulong x, ulong y);\n" |
37835 | "long2 __ovld __cnfn max(long2 x, long y);\n" |
37836 | "ulong2 __ovld __cnfn max(ulong2 x, ulong y);\n" |
37837 | "long3 __ovld __cnfn max(long3 x, long y);\n" |
37838 | "ulong3 __ovld __cnfn max(ulong3 x, ulong y);\n" |
37839 | "long4 __ovld __cnfn max(long4 x, long y);\n" |
37840 | "ulong4 __ovld __cnfn max(ulong4 x, ulong y);\n" |
37841 | "long8 __ovld __cnfn max(long8 x, long y);\n" |
37842 | "ulong8 __ovld __cnfn max(ulong8 x, ulong y);\n" |
37843 | "long16 __ovld __cnfn max(long16 x, long y);\n" |
37844 | "ulong16 __ovld __cnfn max(ulong16 x, ulong y);\n" |
37845 | "\n" |
37846 | "/**\n" |
37847 | " * Returns y if y < x, otherwise it returns x.\n" |
37848 | " */\n" |
37849 | "char __ovld __cnfn min(char x, char y);\n" |
37850 | "uchar __ovld __cnfn min(uchar x, uchar y);\n" |
37851 | "char2 __ovld __cnfn min(char2 x, char2 y);\n" |
37852 | "uchar2 __ovld __cnfn min(uchar2 x, uchar2 y);\n" |
37853 | "char3 __ovld __cnfn min(char3 x, char3 y);\n" |
37854 | "uchar3 __ovld __cnfn min(uchar3 x, uchar3 y);\n" |
37855 | "char4 __ovld __cnfn min(char4 x, char4 y);\n" |
37856 | "uchar4 __ovld __cnfn min(uchar4 x, uchar4 y);\n" |
37857 | "char8 __ovld __cnfn min(char8 x, char8 y);\n" |
37858 | "uchar8 __ovld __cnfn min(uchar8 x, uchar8 y);\n" |
37859 | "char16 __ovld __cnfn min(char16 x, char16 y);\n" |
37860 | "uchar16 __ovld __cnfn min(uchar16 x, uchar16 y);\n" |
37861 | "short __ovld __cnfn min(short x, short y);\n" |
37862 | "ushort __ovld __cnfn min(ushort x, ushort y);\n" |
37863 | "short2 __ovld __cnfn min(short2 x, short2 y);\n" |
37864 | "ushort2 __ovld __cnfn min(ushort2 x, ushort2 y);\n" |
37865 | "short3 __ovld __cnfn min(short3 x, short3 y);\n" |
37866 | "ushort3 __ovld __cnfn min(ushort3 x, ushort3 y);\n" |
37867 | "short4 __ovld __cnfn min(short4 x, short4 y);\n" |
37868 | "ushort4 __ovld __cnfn min(ushort4 x, ushort4 y);\n" |
37869 | "short8 __ovld __cnfn min(short8 x, short8 y);\n" |
37870 | "ushort8 __ovld __cnfn min(ushort8 x, ushort8 y);\n" |
37871 | "short16 __ovld __cnfn min(short16 x, short16 y);\n" |
37872 | "ushort16 __ovld __cnfn min(ushort16 x, ushort16 y);\n" |
37873 | "int __ovld __cnfn min(int x, int y);\n" |
37874 | "uint __ovld __cnfn min(uint x, uint y);\n" |
37875 | "int2 __ovld __cnfn min(int2 x, int2 y);\n" |
37876 | "uint2 __ovld __cnfn min(uint2 x, uint2 y);\n" |
37877 | "int3 __ovld __cnfn min(int3 x, int3 y);\n" |
37878 | "uint3 __ovld __cnfn min(uint3 x, uint3 y);\n" |
37879 | "int4 __ovld __cnfn min(int4 x, int4 y);\n" |
37880 | "uint4 __ovld __cnfn min(uint4 x, uint4 y);\n" |
37881 | "int8 __ovld __cnfn min(int8 x, int8 y);\n" |
37882 | "uint8 __ovld __cnfn min(uint8 x, uint8 y);\n" |
37883 | "int16 __ovld __cnfn min(int16 x, int16 y);\n" |
37884 | "uint16 __ovld __cnfn min(uint16 x, uint16 y);\n" |
37885 | "long __ovld __cnfn min(long x, long y);\n" |
37886 | "ulong __ovld __cnfn min(ulong x, ulong y);\n" |
37887 | "long2 __ovld __cnfn min(long2 x, long2 y);\n" |
37888 | "ulong2 __ovld __cnfn min(ulong2 x, ulong2 y);\n" |
37889 | "long3 __ovld __cnfn min(long3 x, long3 y);\n" |
37890 | "ulong3 __ovld __cnfn min(ulong3 x, ulong3 y);\n" |
37891 | "long4 __ovld __cnfn min(long4 x, long4 y);\n" |
37892 | "ulong4 __ovld __cnfn min(ulong4 x, ulong4 y);\n" |
37893 | "long8 __ovld __cnfn min(long8 x, long8 y);\n" |
37894 | "ulong8 __ovld __cnfn min(ulong8 x, ulong8 y);\n" |
37895 | "long16 __ovld __cnfn min(long16 x, long16 y);\n" |
37896 | "ulong16 __ovld __cnfn min(ulong16 x, ulong16 y);\n" |
37897 | "char __ovld __cnfn min(char x, char y);\n" |
37898 | "uchar __ovld __cnfn min(uchar x, uchar y);\n" |
37899 | "char2 __ovld __cnfn min(char2 x, char y);\n" |
37900 | "uchar2 __ovld __cnfn min(uchar2 x, uchar y);\n" |
37901 | "char3 __ovld __cnfn min(char3 x, char y);\n" |
37902 | "uchar3 __ovld __cnfn min(uchar3 x, uchar y);\n" |
37903 | "char4 __ovld __cnfn min(char4 x, char y);\n" |
37904 | "uchar4 __ovld __cnfn min(uchar4 x, uchar y);\n" |
37905 | "char8 __ovld __cnfn min(char8 x, char y);\n" |
37906 | "uchar8 __ovld __cnfn min(uchar8 x, uchar y);\n" |
37907 | "char16 __ovld __cnfn min(char16 x, char y);\n" |
37908 | "uchar16 __ovld __cnfn min(uchar16 x, uchar y);\n" |
37909 | "short __ovld __cnfn min(short x, short y);\n" |
37910 | "ushort __ovld __cnfn min(ushort x, ushort y);\n" |
37911 | "short2 __ovld __cnfn min(short2 x, short y);\n" |
37912 | "ushort2 __ovld __cnfn min(ushort2 x, ushort y);\n" |
37913 | "short3 __ovld __cnfn min(short3 x, short y);\n" |
37914 | "ushort3 __ovld __cnfn min(ushort3 x, ushort y);\n" |
37915 | "short4 __ovld __cnfn min(short4 x, short y);\n" |
37916 | "ushort4 __ovld __cnfn min(ushort4 x, ushort y);\n" |
37917 | "short8 __ovld __cnfn min(short8 x, short y);\n" |
37918 | "ushort8 __ovld __cnfn min(ushort8 x, ushort y);\n" |
37919 | "short16 __ovld __cnfn min(short16 x, short y);\n" |
37920 | "ushort16 __ovld __cnfn min(ushort16 x, ushort y);\n" |
37921 | "int __ovld __cnfn min(int x, int y);\n" |
37922 | "uint __ovld __cnfn min(uint x, uint y);\n" |
37923 | "int2 __ovld __cnfn min(int2 x, int y);\n" |
37924 | "uint2 __ovld __cnfn min(uint2 x, uint y);\n" |
37925 | "int3 __ovld __cnfn min(int3 x, int y);\n" |
37926 | "uint3 __ovld __cnfn min(uint3 x, uint y);\n" |
37927 | "int4 __ovld __cnfn min(int4 x, int y);\n" |
37928 | "uint4 __ovld __cnfn min(uint4 x, uint y);\n" |
37929 | "int8 __ovld __cnfn min(int8 x, int y);\n" |
37930 | "uint8 __ovld __cnfn min(uint8 x, uint y);\n" |
37931 | "int16 __ovld __cnfn min(int16 x, int y);\n" |
37932 | "uint16 __ovld __cnfn min(uint16 x, uint y);\n" |
37933 | "long __ovld __cnfn min(long x, long y);\n" |
37934 | "ulong __ovld __cnfn min(ulong x, ulong y);\n" |
37935 | "long2 __ovld __cnfn min(long2 x, long y);\n" |
37936 | "ulong2 __ovld __cnfn min(ulong2 x, ulong y);\n" |
37937 | "long3 __ovld __cnfn min(long3 x, long y);\n" |
37938 | "ulong3 __ovld __cnfn min(ulong3 x, ulong y);\n" |
37939 | "long4 __ovld __cnfn min(long4 x, long y);\n" |
37940 | "ulong4 __ovld __cnfn min(ulong4 x, ulong y);\n" |
37941 | "long8 __ovld __cnfn min(long8 x, long y);\n" |
37942 | "ulong8 __ovld __cnfn min(ulong8 x, ulong y);\n" |
37943 | "long16 __ovld __cnfn min(long16 x, long y);\n" |
37944 | "ulong16 __ovld __cnfn min(ulong16 x, ulong y);\n" |
37945 | "\n" |
37946 | "/**\n" |
37947 | " * Computes x * y and returns the high half of the\n" |
37948 | " * product of x and y.\n" |
37949 | " */\n" |
37950 | "char __ovld __cnfn mul_hi(char x, char y);\n" |
37951 | "uchar __ovld __cnfn mul_hi(uchar x, uchar y);\n" |
37952 | "char2 __ovld __cnfn mul_hi(char2 x, char2 y);\n" |
37953 | "uchar2 __ovld __cnfn mul_hi(uchar2 x, uchar2 y);\n" |
37954 | "char3 __ovld __cnfn mul_hi(char3 x, char3 y);\n" |
37955 | "uchar3 __ovld __cnfn mul_hi(uchar3 x, uchar3 y);\n" |
37956 | "char4 __ovld __cnfn mul_hi(char4 x, char4 y);\n" |
37957 | "uchar4 __ovld __cnfn mul_hi(uchar4 x, uchar4 y);\n" |
37958 | "char8 __ovld __cnfn mul_hi(char8 x, char8 y);\n" |
37959 | "uchar8 __ovld __cnfn mul_hi(uchar8 x, uchar8 y);\n" |
37960 | "char16 __ovld __cnfn mul_hi(char16 x, char16 y);\n" |
37961 | "uchar16 __ovld __cnfn mul_hi(uchar16 x, uchar16 y);\n" |
37962 | "short __ovld __cnfn mul_hi(short x, short y);\n" |
37963 | "ushort __ovld __cnfn mul_hi(ushort x, ushort y);\n" |
37964 | "short2 __ovld __cnfn mul_hi(short2 x, short2 y);\n" |
37965 | "ushort2 __ovld __cnfn mul_hi(ushort2 x, ushort2 y);\n" |
37966 | "short3 __ovld __cnfn mul_hi(short3 x, short3 y);\n" |
37967 | "ushort3 __ovld __cnfn mul_hi(ushort3 x, ushort3 y);\n" |
37968 | "short4 __ovld __cnfn mul_hi(short4 x, short4 y);\n" |
37969 | "ushort4 __ovld __cnfn mul_hi(ushort4 x, ushort4 y);\n" |
37970 | "short8 __ovld __cnfn mul_hi(short8 x, short8 y);\n" |
37971 | "ushort8 __ovld __cnfn mul_hi(ushort8 x, ushort8 y);\n" |
37972 | "short16 __ovld __cnfn mul_hi(short16 x, short16 y);\n" |
37973 | "ushort16 __ovld __cnfn mul_hi(ushort16 x, ushort16 y);\n" |
37974 | "int __ovld __cnfn mul_hi(int x, int y);\n" |
37975 | "uint __ovld __cnfn mul_hi(uint x, uint y);\n" |
37976 | "int2 __ovld __cnfn mul_hi(int2 x, int2 y);\n" |
37977 | "uint2 __ovld __cnfn mul_hi(uint2 x, uint2 y);\n" |
37978 | "int3 __ovld __cnfn mul_hi(int3 x, int3 y);\n" |
37979 | "uint3 __ovld __cnfn mul_hi(uint3 x, uint3 y);\n" |
37980 | "int4 __ovld __cnfn mul_hi(int4 x, int4 y);\n" |
37981 | "uint4 __ovld __cnfn mul_hi(uint4 x, uint4 y);\n" |
37982 | "int8 __ovld __cnfn mul_hi(int8 x, int8 y);\n" |
37983 | "uint8 __ovld __cnfn mul_hi(uint8 x, uint8 y);\n" |
37984 | "int16 __ovld __cnfn mul_hi(int16 x, int16 y);\n" |
37985 | "uint16 __ovld __cnfn mul_hi(uint16 x, uint16 y);\n" |
37986 | "long __ovld __cnfn mul_hi(long x, long y);\n" |
37987 | "ulong __ovld __cnfn mul_hi(ulong x, ulong y);\n" |
37988 | "long2 __ovld __cnfn mul_hi(long2 x, long2 y);\n" |
37989 | "ulong2 __ovld __cnfn mul_hi(ulong2 x, ulong2 y);\n" |
37990 | "long3 __ovld __cnfn mul_hi(long3 x, long3 y);\n" |
37991 | "ulong3 __ovld __cnfn mul_hi(ulong3 x, ulong3 y);\n" |
37992 | "long4 __ovld __cnfn mul_hi(long4 x, long4 y);\n" |
37993 | "ulong4 __ovld __cnfn mul_hi(ulong4 x, ulong4 y);\n" |
37994 | "long8 __ovld __cnfn mul_hi(long8 x, long8 y);\n" |
37995 | "ulong8 __ovld __cnfn mul_hi(ulong8 x, ulong8 y);\n" |
37996 | "long16 __ovld __cnfn mul_hi(long16 x, long16 y);\n" |
37997 | "ulong16 __ovld __cnfn mul_hi(ulong16 x, ulong16 y);\n" |
37998 | "\n" |
37999 | "/**\n" |
38000 | " * For each element in v, the bits are shifted left by\n" |
38001 | " * the number of bits given by the corresponding\n" |
38002 | " * element in i (subject to usual shift modulo rules\n" |
38003 | " * described in section 6.3). Bits shifted off the left\n" |
38004 | " * side of the element are shifted back in from the\n" |
38005 | " * right.\n" |
38006 | " */\n" |
38007 | "char __ovld __cnfn rotate(char v, char i);\n" |
38008 | "uchar __ovld __cnfn rotate(uchar v, uchar i);\n" |
38009 | "char2 __ovld __cnfn rotate(char2 v, char2 i);\n" |
38010 | "uchar2 __ovld __cnfn rotate(uchar2 v, uchar2 i);\n" |
38011 | "char3 __ovld __cnfn rotate(char3 v, char3 i);\n" |
38012 | "uchar3 __ovld __cnfn rotate(uchar3 v, uchar3 i);\n" |
38013 | "char4 __ovld __cnfn rotate(char4 v, char4 i);\n" |
38014 | "uchar4 __ovld __cnfn rotate(uchar4 v, uchar4 i);\n" |
38015 | "char8 __ovld __cnfn rotate(char8 v, char8 i);\n" |
38016 | "uchar8 __ovld __cnfn rotate(uchar8 v, uchar8 i);\n" |
38017 | "char16 __ovld __cnfn rotate(char16 v, char16 i);\n" |
38018 | "uchar16 __ovld __cnfn rotate(uchar16 v, uchar16 i);\n" |
38019 | "short __ovld __cnfn rotate(short v, short i);\n" |
38020 | "ushort __ovld __cnfn rotate(ushort v, ushort i);\n" |
38021 | "short2 __ovld __cnfn rotate(short2 v, short2 i);\n" |
38022 | "ushort2 __ovld __cnfn rotate(ushort2 v, ushort2 i);\n" |
38023 | "short3 __ovld __cnfn rotate(short3 v, short3 i);\n" |
38024 | "ushort3 __ovld __cnfn rotate(ushort3 v, ushort3 i);\n" |
38025 | "short4 __ovld __cnfn rotate(short4 v, short4 i);\n" |
38026 | "ushort4 __ovld __cnfn rotate(ushort4 v, ushort4 i);\n" |
38027 | "short8 __ovld __cnfn rotate(short8 v, short8 i);\n" |
38028 | "ushort8 __ovld __cnfn rotate(ushort8 v, ushort8 i);\n" |
38029 | "short16 __ovld __cnfn rotate(short16 v, short16 i);\n" |
38030 | "ushort16 __ovld __cnfn rotate(ushort16 v, ushort16 i);\n" |
38031 | "int __ovld __cnfn rotate(int v, int i);\n" |
38032 | "uint __ovld __cnfn rotate(uint v, uint i);\n" |
38033 | "int2 __ovld __cnfn rotate(int2 v, int2 i);\n" |
38034 | "uint2 __ovld __cnfn rotate(uint2 v, uint2 i);\n" |
38035 | "int3 __ovld __cnfn rotate(int3 v, int3 i);\n" |
38036 | "uint3 __ovld __cnfn rotate(uint3 v, uint3 i);\n" |
38037 | "int4 __ovld __cnfn rotate(int4 v, int4 i);\n" |
38038 | "uint4 __ovld __cnfn rotate(uint4 v, uint4 i);\n" |
38039 | "int8 __ovld __cnfn rotate(int8 v, int8 i);\n" |
38040 | "uint8 __ovld __cnfn rotate(uint8 v, uint8 i);\n" |
38041 | "int16 __ovld __cnfn rotate(int16 v, int16 i);\n" |
38042 | "uint16 __ovld __cnfn rotate(uint16 v, uint16 i);\n" |
38043 | "long __ovld __cnfn rotate(long v, long i);\n" |
38044 | "ulong __ovld __cnfn rotate(ulong v, ulong i);\n" |
38045 | "long2 __ovld __cnfn rotate(long2 v, long2 i);\n" |
38046 | "ulong2 __ovld __cnfn rotate(ulong2 v, ulong2 i);\n" |
38047 | "long3 __ovld __cnfn rotate(long3 v, long3 i);\n" |
38048 | "ulong3 __ovld __cnfn rotate(ulong3 v, ulong3 i);\n" |
38049 | "long4 __ovld __cnfn rotate(long4 v, long4 i);\n" |
38050 | "ulong4 __ovld __cnfn rotate(ulong4 v, ulong4 i);\n" |
38051 | "long8 __ovld __cnfn rotate(long8 v, long8 i);\n" |
38052 | "ulong8 __ovld __cnfn rotate(ulong8 v, ulong8 i);\n" |
38053 | "long16 __ovld __cnfn rotate(long16 v, long16 i);\n" |
38054 | "ulong16 __ovld __cnfn rotate(ulong16 v, ulong16 i);\n" |
38055 | "\n" |
38056 | "/**\n" |
38057 | " * Returns x - y and saturates the result.\n" |
38058 | " */\n" |
38059 | "char __ovld __cnfn sub_sat(char x, char y);\n" |
38060 | "uchar __ovld __cnfn sub_sat(uchar x, uchar y);\n" |
38061 | "char2 __ovld __cnfn sub_sat(char2 x, char2 y);\n" |
38062 | "uchar2 __ovld __cnfn sub_sat(uchar2 x, uchar2 y);\n" |
38063 | "char3 __ovld __cnfn sub_sat(char3 x, char3 y);\n" |
38064 | "uchar3 __ovld __cnfn sub_sat(uchar3 x, uchar3 y);\n" |
38065 | "char4 __ovld __cnfn sub_sat(char4 x, char4 y);\n" |
38066 | "uchar4 __ovld __cnfn sub_sat(uchar4 x, uchar4 y);\n" |
38067 | "char8 __ovld __cnfn sub_sat(char8 x, char8 y);\n" |
38068 | "uchar8 __ovld __cnfn sub_sat(uchar8 x, uchar8 y);\n" |
38069 | "char16 __ovld __cnfn sub_sat(char16 x, char16 y);\n" |
38070 | "uchar16 __ovld __cnfn sub_sat(uchar16 x, uchar16 y);\n" |
38071 | "short __ovld __cnfn sub_sat(short x, short y);\n" |
38072 | "ushort __ovld __cnfn sub_sat(ushort x, ushort y);\n" |
38073 | "short2 __ovld __cnfn sub_sat(short2 x, short2 y);\n" |
38074 | "ushort2 __ovld __cnfn sub_sat(ushort2 x, ushort2 y);\n" |
38075 | "short3 __ovld __cnfn sub_sat(short3 x, short3 y);\n" |
38076 | "ushort3 __ovld __cnfn sub_sat(ushort3 x, ushort3 y);\n" |
38077 | "short4 __ovld __cnfn sub_sat(short4 x, short4 y);\n" |
38078 | "ushort4 __ovld __cnfn sub_sat(ushort4 x, ushort4 y);\n" |
38079 | "short8 __ovld __cnfn sub_sat(short8 x, short8 y);\n" |
38080 | "ushort8 __ovld __cnfn sub_sat(ushort8 x, ushort8 y);\n" |
38081 | "short16 __ovld __cnfn sub_sat(short16 x, short16 y);\n" |
38082 | "ushort16 __ovld __cnfn sub_sat(ushort16 x, ushort16 y);\n" |
38083 | "int __ovld __cnfn sub_sat(int x, int y);\n" |
38084 | "uint __ovld __cnfn sub_sat(uint x, uint y);\n" |
38085 | "int2 __ovld __cnfn sub_sat(int2 x, int2 y);\n" |
38086 | "uint2 __ovld __cnfn sub_sat(uint2 x, uint2 y);\n" |
38087 | "int3 __ovld __cnfn sub_sat(int3 x, int3 y);\n" |
38088 | "uint3 __ovld __cnfn sub_sat(uint3 x, uint3 y);\n" |
38089 | "int4 __ovld __cnfn sub_sat(int4 x, int4 y);\n" |
38090 | "uint4 __ovld __cnfn sub_sat(uint4 x, uint4 y);\n" |
38091 | "int8 __ovld __cnfn sub_sat(int8 x, int8 y);\n" |
38092 | "uint8 __ovld __cnfn sub_sat(uint8 x, uint8 y);\n" |
38093 | "int16 __ovld __cnfn sub_sat(int16 x, int16 y);\n" |
38094 | "uint16 __ovld __cnfn sub_sat(uint16 x, uint16 y);\n" |
38095 | "long __ovld __cnfn sub_sat(long x, long y);\n" |
38096 | "ulong __ovld __cnfn sub_sat(ulong x, ulong y);\n" |
38097 | "long2 __ovld __cnfn sub_sat(long2 x, long2 y);\n" |
38098 | "ulong2 __ovld __cnfn sub_sat(ulong2 x, ulong2 y);\n" |
38099 | "long3 __ovld __cnfn sub_sat(long3 x, long3 y);\n" |
38100 | "ulong3 __ovld __cnfn sub_sat(ulong3 x, ulong3 y);\n" |
38101 | "long4 __ovld __cnfn sub_sat(long4 x, long4 y);\n" |
38102 | "ulong4 __ovld __cnfn sub_sat(ulong4 x, ulong4 y);\n" |
38103 | "long8 __ovld __cnfn sub_sat(long8 x, long8 y);\n" |
38104 | "ulong8 __ovld __cnfn sub_sat(ulong8 x, ulong8 y);\n" |
38105 | "long16 __ovld __cnfn sub_sat(long16 x, long16 y);\n" |
38106 | "ulong16 __ovld __cnfn sub_sat(ulong16 x, ulong16 y);\n" |
38107 | "\n" |
38108 | "/**\n" |
38109 | " * result[i] = ((short)hi[i] << 8) | lo[i]\n" |
38110 | " * result[i] = ((ushort)hi[i] << 8) | lo[i]\n" |
38111 | " */\n" |
38112 | "short __ovld __cnfn upsample(char hi, uchar lo);\n" |
38113 | "ushort __ovld __cnfn upsample(uchar hi, uchar lo);\n" |
38114 | "short2 __ovld __cnfn upsample(char2 hi, uchar2 lo);\n" |
38115 | "short3 __ovld __cnfn upsample(char3 hi, uchar3 lo);\n" |
38116 | "short4 __ovld __cnfn upsample(char4 hi, uchar4 lo);\n" |
38117 | "short8 __ovld __cnfn upsample(char8 hi, uchar8 lo);\n" |
38118 | "short16 __ovld __cnfn upsample(char16 hi, uchar16 lo);\n" |
38119 | "ushort2 __ovld __cnfn upsample(uchar2 hi, uchar2 lo);\n" |
38120 | "ushort3 __ovld __cnfn upsample(uchar3 hi, uchar3 lo);\n" |
38121 | "ushort4 __ovld __cnfn upsample(uchar4 hi, uchar4 lo);\n" |
38122 | "ushort8 __ovld __cnfn upsample(uchar8 hi, uchar8 lo);\n" |
38123 | "ushort16 __ovld __cnfn upsample(uchar16 hi, uchar16 lo);\n" |
38124 | "\n" |
38125 | "/**\n" |
38126 | " * result[i] = ((int)hi[i] << 16) | lo[i]\n" |
38127 | " * result[i] = ((uint)hi[i] << 16) | lo[i]\n" |
38128 | " */\n" |
38129 | "int __ovld __cnfn upsample(short hi, ushort lo);\n" |
38130 | "uint __ovld __cnfn upsample(ushort hi, ushort lo);\n" |
38131 | "int2 __ovld __cnfn upsample(short2 hi, ushort2 lo);\n" |
38132 | "int3 __ovld __cnfn upsample(short3 hi, ushort3 lo);\n" |
38133 | "int4 __ovld __cnfn upsample(short4 hi, ushort4 lo);\n" |
38134 | "int8 __ovld __cnfn upsample(short8 hi, ushort8 lo);\n" |
38135 | "int16 __ovld __cnfn upsample(short16 hi, ushort16 lo);\n" |
38136 | "uint2 __ovld __cnfn upsample(ushort2 hi, ushort2 lo);\n" |
38137 | "uint3 __ovld __cnfn upsample(ushort3 hi, ushort3 lo);\n" |
38138 | "uint4 __ovld __cnfn upsample(ushort4 hi, ushort4 lo);\n" |
38139 | "uint8 __ovld __cnfn upsample(ushort8 hi, ushort8 lo);\n" |
38140 | "uint16 __ovld __cnfn upsample(ushort16 hi, ushort16 lo);\n" |
38141 | "/**\n" |
38142 | " * result[i] = ((long)hi[i] << 32) | lo[i]\n" |
38143 | " * result[i] = ((ulong)hi[i] << 32) | lo[i]\n" |
38144 | " */\n" |
38145 | "long __ovld __cnfn upsample(int hi, uint lo);\n" |
38146 | "ulong __ovld __cnfn upsample(uint hi, uint lo);\n" |
38147 | "long2 __ovld __cnfn upsample(int2 hi, uint2 lo);\n" |
38148 | "long3 __ovld __cnfn upsample(int3 hi, uint3 lo);\n" |
38149 | "long4 __ovld __cnfn upsample(int4 hi, uint4 lo);\n" |
38150 | "long8 __ovld __cnfn upsample(int8 hi, uint8 lo);\n" |
38151 | "long16 __ovld __cnfn upsample(int16 hi, uint16 lo);\n" |
38152 | "ulong2 __ovld __cnfn upsample(uint2 hi, uint2 lo);\n" |
38153 | "ulong3 __ovld __cnfn upsample(uint3 hi, uint3 lo);\n" |
38154 | "ulong4 __ovld __cnfn upsample(uint4 hi, uint4 lo);\n" |
38155 | "ulong8 __ovld __cnfn upsample(uint8 hi, uint8 lo);\n" |
38156 | "ulong16 __ovld __cnfn upsample(uint16 hi, uint16 lo);\n" |
38157 | "\n" |
38158 | "/*\n" |
38159 | " * popcount(x): returns the number of set bit in x\n" |
38160 | " */\n" |
38161 | "char __ovld __cnfn popcount(char x);\n" |
38162 | "uchar __ovld __cnfn popcount(uchar x);\n" |
38163 | "char2 __ovld __cnfn popcount(char2 x);\n" |
38164 | "uchar2 __ovld __cnfn popcount(uchar2 x);\n" |
38165 | "char3 __ovld __cnfn popcount(char3 x);\n" |
38166 | "uchar3 __ovld __cnfn popcount(uchar3 x);\n" |
38167 | "char4 __ovld __cnfn popcount(char4 x);\n" |
38168 | "uchar4 __ovld __cnfn popcount(uchar4 x);\n" |
38169 | "char8 __ovld __cnfn popcount(char8 x);\n" |
38170 | "uchar8 __ovld __cnfn popcount(uchar8 x);\n" |
38171 | "char16 __ovld __cnfn popcount(char16 x);\n" |
38172 | "uchar16 __ovld __cnfn popcount(uchar16 x);\n" |
38173 | "short __ovld __cnfn popcount(short x);\n" |
38174 | "ushort __ovld __cnfn popcount(ushort x);\n" |
38175 | "short2 __ovld __cnfn popcount(short2 x);\n" |
38176 | "ushort2 __ovld __cnfn popcount(ushort2 x);\n" |
38177 | "short3 __ovld __cnfn popcount(short3 x);\n" |
38178 | "ushort3 __ovld __cnfn popcount(ushort3 x);\n" |
38179 | "short4 __ovld __cnfn popcount(short4 x);\n" |
38180 | "ushort4 __ovld __cnfn popcount(ushort4 x);\n" |
38181 | "short8 __ovld __cnfn popcount(short8 x);\n" |
38182 | "ushort8 __ovld __cnfn popcount(ushort8 x);\n" |
38183 | "short16 __ovld __cnfn popcount(short16 x);\n" |
38184 | "ushort16 __ovld __cnfn popcount(ushort16 x);\n" |
38185 | "int __ovld __cnfn popcount(int x);\n" |
38186 | "uint __ovld __cnfn popcount(uint x);\n" |
38187 | "int2 __ovld __cnfn popcount(int2 x);\n" |
38188 | "uint2 __ovld __cnfn popcount(uint2 x);\n" |
38189 | "int3 __ovld __cnfn popcount(int3 x);\n" |
38190 | "uint3 __ovld __cnfn popcount(uint3 x);\n" |
38191 | "int4 __ovld __cnfn popcount(int4 x);\n" |
38192 | "uint4 __ovld __cnfn popcount(uint4 x);\n" |
38193 | "int8 __ovld __cnfn popcount(int8 x);\n" |
38194 | "uint8 __ovld __cnfn popcount(uint8 x);\n" |
38195 | "int16 __ovld __cnfn popcount(int16 x);\n" |
38196 | "uint16 __ovld __cnfn popcount(uint16 x);\n" |
38197 | "long __ovld __cnfn popcount(long x);\n" |
38198 | "ulong __ovld __cnfn popcount(ulong x);\n" |
38199 | "long2 __ovld __cnfn popcount(long2 x);\n" |
38200 | "ulong2 __ovld __cnfn popcount(ulong2 x);\n" |
38201 | "long3 __ovld __cnfn popcount(long3 x);\n" |
38202 | "ulong3 __ovld __cnfn popcount(ulong3 x);\n" |
38203 | "long4 __ovld __cnfn popcount(long4 x);\n" |
38204 | "ulong4 __ovld __cnfn popcount(ulong4 x);\n" |
38205 | "long8 __ovld __cnfn popcount(long8 x);\n" |
38206 | "ulong8 __ovld __cnfn popcount(ulong8 x);\n" |
38207 | "long16 __ovld __cnfn popcount(long16 x);\n" |
38208 | "ulong16 __ovld __cnfn popcount(ulong16 x);\n" |
38209 | "\n" |
38210 | "/**\n" |
38211 | " * Multiply two 24-bit integer values x and y and add\n" |
38212 | " * the 32-bit integer result to the 32-bit integer z.\n" |
38213 | " * Refer to definition of mul24 to see how the 24-bit\n" |
38214 | " * integer multiplication is performed.\n" |
38215 | " */\n" |
38216 | "int __ovld __cnfn mad24(int x, int y, int z);\n" |
38217 | "uint __ovld __cnfn mad24(uint x, uint y, uint z);\n" |
38218 | "int2 __ovld __cnfn mad24(int2 x, int2 y, int2 z);\n" |
38219 | "uint2 __ovld __cnfn mad24(uint2 x, uint2 y, uint2 z);\n" |
38220 | "int3 __ovld __cnfn mad24(int3 x, int3 y, int3 z);\n" |
38221 | "uint3 __ovld __cnfn mad24(uint3 x, uint3 y, uint3 z);\n" |
38222 | "int4 __ovld __cnfn mad24(int4 x, int4 y, int4 z);\n" |
38223 | "uint4 __ovld __cnfn mad24(uint4 x, uint4 y, uint4 z);\n" |
38224 | "int8 __ovld __cnfn mad24(int8 x, int8 y, int8 z);\n" |
38225 | "uint8 __ovld __cnfn mad24(uint8 x, uint8 y, uint8 z);\n" |
38226 | "int16 __ovld __cnfn mad24(int16 x, int16 y, int16 z);\n" |
38227 | "uint16 __ovld __cnfn mad24(uint16 x, uint16 y, uint16 z);\n" |
38228 | "\n" |
38229 | "/**\n" |
38230 | " * Multiply two 24-bit integer values x and y. x and y\n" |
38231 | " * are 32-bit integers but only the low 24-bits are used\n" |
38232 | " * to perform the multiplication. mul24 should only\n" |
38233 | " * be used when values in x and y are in the range [-\n" |
38234 | " * 2^23, 2^23-1] if x and y are signed integers and in the\n" |
38235 | " * range [0, 2^24-1] if x and y are unsigned integers. If\n" |
38236 | " * x and y are not in this range, the multiplication\n" |
38237 | " * result is implementation-defined.\n" |
38238 | " */\n" |
38239 | "int __ovld __cnfn mul24(int x, int y);\n" |
38240 | "uint __ovld __cnfn mul24(uint x, uint y);\n" |
38241 | "int2 __ovld __cnfn mul24(int2 x, int2 y);\n" |
38242 | "uint2 __ovld __cnfn mul24(uint2 x, uint2 y);\n" |
38243 | "int3 __ovld __cnfn mul24(int3 x, int3 y);\n" |
38244 | "uint3 __ovld __cnfn mul24(uint3 x, uint3 y);\n" |
38245 | "int4 __ovld __cnfn mul24(int4 x, int4 y);\n" |
38246 | "uint4 __ovld __cnfn mul24(uint4 x, uint4 y);\n" |
38247 | "int8 __ovld __cnfn mul24(int8 x, int8 y);\n" |
38248 | "uint8 __ovld __cnfn mul24(uint8 x, uint8 y);\n" |
38249 | "int16 __ovld __cnfn mul24(int16 x, int16 y);\n" |
38250 | "uint16 __ovld __cnfn mul24(uint16 x, uint16 y);\n" |
38251 | "\n" |
38252 | "// OpenCL v1.1 s6.11.4, v1.2 s6.12.4, v2.0 s6.13.4 - Common Functions\n" |
38253 | "\n" |
38254 | "/**\n" |
38255 | " * Returns fmin(fmax(x, minval), maxval).\n" |
38256 | " * Results are undefined if minval > maxval.\n" |
38257 | " */\n" |
38258 | "float __ovld __cnfn clamp(float x, float minval, float maxval);\n" |
38259 | "float2 __ovld __cnfn clamp(float2 x, float2 minval, float2 maxval);\n" |
38260 | "float3 __ovld __cnfn clamp(float3 x, float3 minval, float3 maxval);\n" |
38261 | "float4 __ovld __cnfn clamp(float4 x, float4 minval, float4 maxval);\n" |
38262 | "float8 __ovld __cnfn clamp(float8 x, float8 minval, float8 maxval);\n" |
38263 | "float16 __ovld __cnfn clamp(float16 x, float16 minval, float16 maxval);\n" |
38264 | "float2 __ovld __cnfn clamp(float2 x, float minval, float maxval);\n" |
38265 | "float3 __ovld __cnfn clamp(float3 x, float minval, float maxval);\n" |
38266 | "float4 __ovld __cnfn clamp(float4 x, float minval, float maxval);\n" |
38267 | "float8 __ovld __cnfn clamp(float8 x, float minval, float maxval);\n" |
38268 | "float16 __ovld __cnfn clamp(float16 x, float minval, float maxval);\n" |
38269 | "#ifdef cl_khr_fp64\n" |
38270 | "double __ovld __cnfn clamp(double x, double minval, double maxval);\n" |
38271 | "double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval);\n" |
38272 | "double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval);\n" |
38273 | "double4 __ovld __cnfn clamp(double4 x, double4 minval, double4 maxval);\n" |
38274 | "double8 __ovld __cnfn clamp(double8 x, double8 minval, double8 maxval);\n" |
38275 | "double16 __ovld __cnfn clamp(double16 x, double16 minval, double16 maxval);\n" |
38276 | "double2 __ovld __cnfn clamp(double2 x, double minval, double maxval);\n" |
38277 | "double3 __ovld __cnfn clamp(double3 x, double minval, double maxval);\n" |
38278 | "double4 __ovld __cnfn clamp(double4 x, double minval, double maxval);\n" |
38279 | "double8 __ovld __cnfn clamp(double8 x, double minval, double maxval);\n" |
38280 | "double16 __ovld __cnfn clamp(double16 x, double minval, double maxval);\n" |
38281 | "#endif //cl_khr_fp64\n" |
38282 | "#ifdef cl_khr_fp16\n" |
38283 | "half __ovld __cnfn clamp(half x, half minval, half maxval);\n" |
38284 | "half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval);\n" |
38285 | "half3 __ovld __cnfn clamp(half3 x, half3 minval, half3 maxval);\n" |
38286 | "half4 __ovld __cnfn clamp(half4 x, half4 minval, half4 maxval);\n" |
38287 | "half8 __ovld __cnfn clamp(half8 x, half8 minval, half8 maxval);\n" |
38288 | "half16 __ovld __cnfn clamp(half16 x, half16 minval, half16 maxval);\n" |
38289 | "half2 __ovld __cnfn clamp(half2 x, half minval, half maxval);\n" |
38290 | "half3 __ovld __cnfn clamp(half3 x, half minval, half maxval);\n" |
38291 | "half4 __ovld __cnfn clamp(half4 x, half minval, half maxval);\n" |
38292 | "half8 __ovld __cnfn clamp(half8 x, half minval, half maxval);\n" |
38293 | "half16 __ovld __cnfn clamp(half16 x, half minval, half maxval);\n" |
38294 | "#endif //cl_khr_fp16\n" |
38295 | "\n" |
38296 | "/**\n" |
38297 | " * Converts radians to degrees, i.e. (180 / PI) *\n" |
38298 | " * radians.\n" |
38299 | " */\n" |
38300 | "float __ovld __cnfn degrees(float radians);\n" |
38301 | "float2 __ovld __cnfn degrees(float2 radians);\n" |
38302 | "float3 __ovld __cnfn degrees(float3 radians);\n" |
38303 | "float4 __ovld __cnfn degrees(float4 radians);\n" |
38304 | "float8 __ovld __cnfn degrees(float8 radians);\n" |
38305 | "float16 __ovld __cnfn degrees(float16 radians);\n" |
38306 | "#ifdef cl_khr_fp64\n" |
38307 | "double __ovld __cnfn degrees(double radians);\n" |
38308 | "double2 __ovld __cnfn degrees(double2 radians);\n" |
38309 | "double3 __ovld __cnfn degrees(double3 radians);\n" |
38310 | "double4 __ovld __cnfn degrees(double4 radians);\n" |
38311 | "double8 __ovld __cnfn degrees(double8 radians);\n" |
38312 | "double16 __ovld __cnfn degrees(double16 radians);\n" |
38313 | "#endif //cl_khr_fp64\n" |
38314 | "#ifdef cl_khr_fp16\n" |
38315 | "half __ovld __cnfn degrees(half radians);\n" |
38316 | "half2 __ovld __cnfn degrees(half2 radians);\n" |
38317 | "half3 __ovld __cnfn degrees(half3 radians);\n" |
38318 | "half4 __ovld __cnfn degrees(half4 radians);\n" |
38319 | "half8 __ovld __cnfn degrees(half8 radians);\n" |
38320 | "half16 __ovld __cnfn degrees(half16 radians);\n" |
38321 | "#endif //cl_khr_fp16\n" |
38322 | "\n" |
38323 | "/**\n" |
38324 | " * Returns y if x < y, otherwise it returns x. If x and y\n" |
38325 | " * are infinite or NaN, the return values are undefined.\n" |
38326 | " */\n" |
38327 | "float __ovld __cnfn max(float x, float y);\n" |
38328 | "float2 __ovld __cnfn max(float2 x, float2 y);\n" |
38329 | "float3 __ovld __cnfn max(float3 x, float3 y);\n" |
38330 | "float4 __ovld __cnfn max(float4 x, float4 y);\n" |
38331 | "float8 __ovld __cnfn max(float8 x, float8 y);\n" |
38332 | "float16 __ovld __cnfn max(float16 x, float16 y);\n" |
38333 | "float2 __ovld __cnfn max(float2 x, float y);\n" |
38334 | "float3 __ovld __cnfn max(float3 x, float y);\n" |
38335 | "float4 __ovld __cnfn max(float4 x, float y);\n" |
38336 | "float8 __ovld __cnfn max(float8 x, float y);\n" |
38337 | "float16 __ovld __cnfn max(float16 x, float y);\n" |
38338 | "#ifdef cl_khr_fp64\n" |
38339 | "double __ovld __cnfn max(double x, double y);\n" |
38340 | "double2 __ovld __cnfn max(double2 x, double2 y);\n" |
38341 | "double3 __ovld __cnfn max(double3 x, double3 y);\n" |
38342 | "double4 __ovld __cnfn max(double4 x, double4 y);\n" |
38343 | "double8 __ovld __cnfn max(double8 x, double8 y);\n" |
38344 | "double16 __ovld __cnfn max(double16 x, double16 y);\n" |
38345 | "double2 __ovld __cnfn max(double2 x, double y);\n" |
38346 | "double3 __ovld __cnfn max(double3 x, double y);\n" |
38347 | "double4 __ovld __cnfn max(double4 x, double y);\n" |
38348 | "double8 __ovld __cnfn max(double8 x, double y);\n" |
38349 | "double16 __ovld __cnfn max(double16 x, double y);\n" |
38350 | "#endif //cl_khr_fp64\n" |
38351 | "#ifdef cl_khr_fp16\n" |
38352 | "half __ovld __cnfn max(half x, half y);\n" |
38353 | "half2 __ovld __cnfn max(half2 x, half2 y);\n" |
38354 | "half3 __ovld __cnfn max(half3 x, half3 y);\n" |
38355 | "half4 __ovld __cnfn max(half4 x, half4 y);\n" |
38356 | "half8 __ovld __cnfn max(half8 x, half8 y);\n" |
38357 | "half16 __ovld __cnfn max(half16 x, half16 y);\n" |
38358 | "half2 __ovld __cnfn max(half2 x, half y);\n" |
38359 | "half3 __ovld __cnfn max(half3 x, half y);\n" |
38360 | "half4 __ovld __cnfn max(half4 x, half y);\n" |
38361 | "half8 __ovld __cnfn max(half8 x, half y);\n" |
38362 | "half16 __ovld __cnfn max(half16 x, half y);\n" |
38363 | "#endif //cl_khr_fp16\n" |
38364 | "\n" |
38365 | "/**\n" |
38366 | " * Returns y if y < x, otherwise it returns x. If x and y\n" |
38367 | " * are infinite or NaN, the return values are undefined.\n" |
38368 | " */\n" |
38369 | "float __ovld __cnfn min(float x, float y);\n" |
38370 | "float2 __ovld __cnfn min(float2 x, float2 y);\n" |
38371 | "float3 __ovld __cnfn min(float3 x, float3 y);\n" |
38372 | "float4 __ovld __cnfn min(float4 x, float4 y);\n" |
38373 | "float8 __ovld __cnfn min(float8 x, float8 y);\n" |
38374 | "float16 __ovld __cnfn min(float16 x, float16 y);\n" |
38375 | "float2 __ovld __cnfn min(float2 x, float y);\n" |
38376 | "float3 __ovld __cnfn min(float3 x, float y);\n" |
38377 | "float4 __ovld __cnfn min(float4 x, float y);\n" |
38378 | "float8 __ovld __cnfn min(float8 x, float y);\n" |
38379 | "float16 __ovld __cnfn min(float16 x, float y);\n" |
38380 | "#ifdef cl_khr_fp64\n" |
38381 | "double __ovld __cnfn min(double x, double y);\n" |
38382 | "double2 __ovld __cnfn min(double2 x, double2 y);\n" |
38383 | "double3 __ovld __cnfn min(double3 x, double3 y);\n" |
38384 | "double4 __ovld __cnfn min(double4 x, double4 y);\n" |
38385 | "double8 __ovld __cnfn min(double8 x, double8 y);\n" |
38386 | "double16 __ovld __cnfn min(double16 x, double16 y);\n" |
38387 | "double2 __ovld __cnfn min(double2 x, double y);\n" |
38388 | "double3 __ovld __cnfn min(double3 x, double y);\n" |
38389 | "double4 __ovld __cnfn min(double4 x, double y);\n" |
38390 | "double8 __ovld __cnfn min(double8 x, double y);\n" |
38391 | "double16 __ovld __cnfn min(double16 x, double y);\n" |
38392 | "#endif //cl_khr_fp64\n" |
38393 | "#ifdef cl_khr_fp16\n" |
38394 | "half __ovld __cnfn min(half x, half y);\n" |
38395 | "half2 __ovld __cnfn min(half2 x, half2 y);\n" |
38396 | "half3 __ovld __cnfn min(half3 x, half3 y);\n" |
38397 | "half4 __ovld __cnfn min(half4 x, half4 y);\n" |
38398 | "half8 __ovld __cnfn min(half8 x, half8 y);\n" |
38399 | "half16 __ovld __cnfn min(half16 x, half16 y);\n" |
38400 | "half2 __ovld __cnfn min(half2 x, half y);\n" |
38401 | "half3 __ovld __cnfn min(half3 x, half y);\n" |
38402 | "half4 __ovld __cnfn min(half4 x, half y);\n" |
38403 | "half8 __ovld __cnfn min(half8 x, half y);\n" |
38404 | "half16 __ovld __cnfn min(half16 x, half y);\n" |
38405 | "#endif //cl_khr_fp16\n" |
38406 | "\n" |
38407 | "/**\n" |
38408 | " * Returns the linear blend of x & y implemented as:\n" |
38409 | " * x + (y - x) * a\n" |
38410 | " * a must be a value in the range 0.0 ... 1.0. If a is not\n" |
38411 | " * in the range 0.0 ... 1.0, the return values are\n" |
38412 | " * undefined.\n" |
38413 | " */\n" |
38414 | "float __ovld __cnfn mix(float x, float y, float a);\n" |
38415 | "float2 __ovld __cnfn mix(float2 x, float2 y, float2 a);\n" |
38416 | "float3 __ovld __cnfn mix(float3 x, float3 y, float3 a);\n" |
38417 | "float4 __ovld __cnfn mix(float4 x, float4 y, float4 a);\n" |
38418 | "float8 __ovld __cnfn mix(float8 x, float8 y, float8 a);\n" |
38419 | "float16 __ovld __cnfn mix(float16 x, float16 y, float16 a);\n" |
38420 | "float2 __ovld __cnfn mix(float2 x, float2 y, float a);\n" |
38421 | "float3 __ovld __cnfn mix(float3 x, float3 y, float a);\n" |
38422 | "float4 __ovld __cnfn mix(float4 x, float4 y, float a);\n" |
38423 | "float8 __ovld __cnfn mix(float8 x, float8 y, float a);\n" |
38424 | "float16 __ovld __cnfn mix(float16 x, float16 y, float a);\n" |
38425 | "#ifdef cl_khr_fp64\n" |
38426 | "double __ovld __cnfn mix(double x, double y, double a);\n" |
38427 | "double2 __ovld __cnfn mix(double2 x, double2 y, double2 a);\n" |
38428 | "double3 __ovld __cnfn mix(double3 x, double3 y, double3 a);\n" |
38429 | "double4 __ovld __cnfn mix(double4 x, double4 y, double4 a);\n" |
38430 | "double8 __ovld __cnfn mix(double8 x, double8 y, double8 a);\n" |
38431 | "double16 __ovld __cnfn mix(double16 x, double16 y, double16 a);\n" |
38432 | "double2 __ovld __cnfn mix(double2 x, double2 y, double a);\n" |
38433 | "double3 __ovld __cnfn mix(double3 x, double3 y, double a);\n" |
38434 | "double4 __ovld __cnfn mix(double4 x, double4 y, double a);\n" |
38435 | "double8 __ovld __cnfn mix(double8 x, double8 y, double a);\n" |
38436 | "double16 __ovld __cnfn mix(double16 x, double16 y, double a);\n" |
38437 | "#endif //cl_khr_fp64\n" |
38438 | "#ifdef cl_khr_fp16\n" |
38439 | "half __ovld __cnfn mix(half x, half y, half a);\n" |
38440 | "half2 __ovld __cnfn mix(half2 x, half2 y, half2 a);\n" |
38441 | "half3 __ovld __cnfn mix(half3 x, half3 y, half3 a);\n" |
38442 | "half4 __ovld __cnfn mix(half4 x, half4 y, half4 a);\n" |
38443 | "half8 __ovld __cnfn mix(half8 x, half8 y, half8 a);\n" |
38444 | "half16 __ovld __cnfn mix(half16 x, half16 y, half16 a);\n" |
38445 | "half2 __ovld __cnfn mix(half2 x, half2 y, half a);\n" |
38446 | "half3 __ovld __cnfn mix(half3 x, half3 y, half a);\n" |
38447 | "half4 __ovld __cnfn mix(half4 x, half4 y, half a);\n" |
38448 | "half8 __ovld __cnfn mix(half8 x, half8 y, half a);\n" |
38449 | "half16 __ovld __cnfn mix(half16 x, half16 y, half a);\n" |
38450 | "#endif //cl_khr_fp16\n" |
38451 | "\n" |
38452 | "/**\n" |
38453 | " * Converts degrees to radians, i.e. (PI / 180) *\n" |
38454 | " * degrees.\n" |
38455 | " */\n" |
38456 | "float __ovld __cnfn radians(float degrees);\n" |
38457 | "float2 __ovld __cnfn radians(float2 degrees);\n" |
38458 | "float3 __ovld __cnfn radians(float3 degrees);\n" |
38459 | "float4 __ovld __cnfn radians(float4 degrees);\n" |
38460 | "float8 __ovld __cnfn radians(float8 degrees);\n" |
38461 | "float16 __ovld __cnfn radians(float16 degrees);\n" |
38462 | "#ifdef cl_khr_fp64\n" |
38463 | "double __ovld __cnfn radians(double degrees);\n" |
38464 | "double2 __ovld __cnfn radians(double2 degrees);\n" |
38465 | "double3 __ovld __cnfn radians(double3 degrees);\n" |
38466 | "double4 __ovld __cnfn radians(double4 degrees);\n" |
38467 | "double8 __ovld __cnfn radians(double8 degrees);\n" |
38468 | "double16 __ovld __cnfn radians(double16 degrees);\n" |
38469 | "#endif //cl_khr_fp64\n" |
38470 | "#ifdef cl_khr_fp16\n" |
38471 | "half __ovld __cnfn radians(half degrees);\n" |
38472 | "half2 __ovld __cnfn radians(half2 degrees);\n" |
38473 | "half3 __ovld __cnfn radians(half3 degrees);\n" |
38474 | "half4 __ovld __cnfn radians(half4 degrees);\n" |
38475 | "half8 __ovld __cnfn radians(half8 degrees);\n" |
38476 | "half16 __ovld __cnfn radians(half16 degrees);\n" |
38477 | "#endif //cl_khr_fp16\n" |
38478 | "\n" |
38479 | "/**\n" |
38480 | " * Returns 0.0 if x < edge, otherwise it returns 1.0.\n" |
38481 | " */\n" |
38482 | "float __ovld __cnfn step(float edge, float x);\n" |
38483 | "float2 __ovld __cnfn step(float2 edge, float2 x);\n" |
38484 | "float3 __ovld __cnfn step(float3 edge, float3 x);\n" |
38485 | "float4 __ovld __cnfn step(float4 edge, float4 x);\n" |
38486 | "float8 __ovld __cnfn step(float8 edge, float8 x);\n" |
38487 | "float16 __ovld __cnfn step(float16 edge, float16 x);\n" |
38488 | "float2 __ovld __cnfn step(float edge, float2 x);\n" |
38489 | "float3 __ovld __cnfn step(float edge, float3 x);\n" |
38490 | "float4 __ovld __cnfn step(float edge, float4 x);\n" |
38491 | "float8 __ovld __cnfn step(float edge, float8 x);\n" |
38492 | "float16 __ovld __cnfn step(float edge, float16 x);\n" |
38493 | "#ifdef cl_khr_fp64\n" |
38494 | "double __ovld __cnfn step(double edge, double x);\n" |
38495 | "double2 __ovld __cnfn step(double2 edge, double2 x);\n" |
38496 | "double3 __ovld __cnfn step(double3 edge, double3 x);\n" |
38497 | "double4 __ovld __cnfn step(double4 edge, double4 x);\n" |
38498 | "double8 __ovld __cnfn step(double8 edge, double8 x);\n" |
38499 | "double16 __ovld __cnfn step(double16 edge, double16 x);\n" |
38500 | "double2 __ovld __cnfn step(double edge, double2 x);\n" |
38501 | "double3 __ovld __cnfn step(double edge, double3 x);\n" |
38502 | "double4 __ovld __cnfn step(double edge, double4 x);\n" |
38503 | "double8 __ovld __cnfn step(double edge, double8 x);\n" |
38504 | "double16 __ovld __cnfn step(double edge, double16 x);\n" |
38505 | "#endif //cl_khr_fp64\n" |
38506 | "#ifdef cl_khr_fp16\n" |
38507 | "half __ovld __cnfn step(half edge, half x);\n" |
38508 | "half2 __ovld __cnfn step(half2 edge, half2 x);\n" |
38509 | "half3 __ovld __cnfn step(half3 edge, half3 x);\n" |
38510 | "half4 __ovld __cnfn step(half4 edge, half4 x);\n" |
38511 | "half8 __ovld __cnfn step(half8 edge, half8 x);\n" |
38512 | "half16 __ovld __cnfn step(half16 edge, half16 x);\n" |
38513 | "half __ovld __cnfn step(half edge, half x);\n" |
38514 | "half2 __ovld __cnfn step(half edge, half2 x);\n" |
38515 | "half3 __ovld __cnfn step(half edge, half3 x);\n" |
38516 | "half4 __ovld __cnfn step(half edge, half4 x);\n" |
38517 | "half8 __ovld __cnfn step(half edge, half8 x);\n" |
38518 | "half16 __ovld __cnfn step(half edge, half16 x);\n" |
38519 | "#endif //cl_khr_fp16\n" |
38520 | "\n" |
38521 | "/**\n" |
38522 | " * Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and\n" |
38523 | " * performs smooth Hermite interpolation between 0\n" |
38524 | " * and 1when edge0 < x < edge1. This is useful in\n" |
38525 | " * cases where you would want a threshold function\n" |
38526 | " * with a smooth transition.\n" |
38527 | " * This is equivalent to:\n" |
38528 | " * gentype t;\n" |
38529 | " * t = clamp ((x - edge0) / (edge1 - edge0), 0, 1);\n" |
38530 | " * return t * t * (3 - 2 * t);\n" |
38531 | " * Results are undefined if edge0 >= edge1 or if x,\n" |
38532 | " * edge0 or edge1 is a NaN.\n" |
38533 | " */\n" |
38534 | "float __ovld __cnfn smoothstep(float edge0, float edge1, float x);\n" |
38535 | "float2 __ovld __cnfn smoothstep(float2 edge0, float2 edge1, float2 x);\n" |
38536 | "float3 __ovld __cnfn smoothstep(float3 edge0, float3 edge1, float3 x);\n" |
38537 | "float4 __ovld __cnfn smoothstep(float4 edge0, float4 edge1, float4 x);\n" |
38538 | "float8 __ovld __cnfn smoothstep(float8 edge0, float8 edge1, float8 x);\n" |
38539 | "float16 __ovld __cnfn smoothstep(float16 edge0, float16 edge1, float16 x);\n" |
38540 | "float2 __ovld __cnfn smoothstep(float edge0, float edge1, float2 x);\n" |
38541 | "float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x);\n" |
38542 | "float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x);\n" |
38543 | "float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x);\n" |
38544 | "float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x);\n" |
38545 | "#ifdef cl_khr_fp64\n" |
38546 | "double __ovld __cnfn smoothstep(double edge0, double edge1, double x);\n" |
38547 | "double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x);\n" |
38548 | "double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x);\n" |
38549 | "double4 __ovld __cnfn smoothstep(double4 edge0, double4 edge1, double4 x);\n" |
38550 | "double8 __ovld __cnfn smoothstep(double8 edge0, double8 edge1, double8 x);\n" |
38551 | "double16 __ovld __cnfn smoothstep(double16 edge0, double16 edge1, double16 x);\n" |
38552 | "double2 __ovld __cnfn smoothstep(double edge0, double edge1, double2 x);\n" |
38553 | "double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x);\n" |
38554 | "double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x);\n" |
38555 | "double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x);\n" |
38556 | "double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x);\n" |
38557 | "#endif //cl_khr_fp64\n" |
38558 | "#ifdef cl_khr_fp16\n" |
38559 | "half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n" |
38560 | "half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x);\n" |
38561 | "half3 __ovld __cnfn smoothstep(half3 edge0, half3 edge1, half3 x);\n" |
38562 | "half4 __ovld __cnfn smoothstep(half4 edge0, half4 edge1, half4 x);\n" |
38563 | "half8 __ovld __cnfn smoothstep(half8 edge0, half8 edge1, half8 x);\n" |
38564 | "half16 __ovld __cnfn smoothstep(half16 edge0, half16 edge1, half16 x);\n" |
38565 | "half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n" |
38566 | "half2 __ovld __cnfn smoothstep(half edge0, half edge1, half2 x);\n" |
38567 | "half3 __ovld __cnfn smoothstep(half edge0, half edge1, half3 x);\n" |
38568 | "half4 __ovld __cnfn smoothstep(half edge0, half edge1, half4 x);\n" |
38569 | "half8 __ovld __cnfn smoothstep(half edge0, half edge1, half8 x);\n" |
38570 | "half16 __ovld __cnfn smoothstep(half edge0, half edge1, half16 x);\n" |
38571 | "#endif //cl_khr_fp16\n" |
38572 | "\n" |
38573 | "/**\n" |
38574 | " * Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x =\n" |
38575 | " * +0.0, or -1.0 if x < 0. Returns 0.0 if x is a NaN.\n" |
38576 | " */\n" |
38577 | "float __ovld __cnfn sign(float x);\n" |
38578 | "float2 __ovld __cnfn sign(float2 x);\n" |
38579 | "float3 __ovld __cnfn sign(float3 x);\n" |
38580 | "float4 __ovld __cnfn sign(float4 x);\n" |
38581 | "float8 __ovld __cnfn sign(float8 x);\n" |
38582 | "float16 __ovld __cnfn sign(float16 x);\n" |
38583 | "#ifdef cl_khr_fp64\n" |
38584 | "double __ovld __cnfn sign(double x);\n" |
38585 | "double2 __ovld __cnfn sign(double2 x);\n" |
38586 | "double3 __ovld __cnfn sign(double3 x);\n" |
38587 | "double4 __ovld __cnfn sign(double4 x);\n" |
38588 | "double8 __ovld __cnfn sign(double8 x);\n" |
38589 | "double16 __ovld __cnfn sign(double16 x);\n" |
38590 | "#endif //cl_khr_fp64\n" |
38591 | "#ifdef cl_khr_fp16\n" |
38592 | "half __ovld __cnfn sign(half x);\n" |
38593 | "half2 __ovld __cnfn sign(half2 x);\n" |
38594 | "half3 __ovld __cnfn sign(half3 x);\n" |
38595 | "half4 __ovld __cnfn sign(half4 x);\n" |
38596 | "half8 __ovld __cnfn sign(half8 x);\n" |
38597 | "half16 __ovld __cnfn sign(half16 x);\n" |
38598 | "#endif //cl_khr_fp16\n" |
38599 | "\n" |
38600 | "// OpenCL v1.1 s6.11.5, v1.2 s6.12.5, v2.0 s6.13.5 - Geometric Functions\n" |
38601 | "\n" |
38602 | "/**\n" |
38603 | " * Returns the cross product of p0.xyz and p1.xyz. The\n" |
38604 | " * w component of float4 result returned will be 0.0.\n" |
38605 | " */\n" |
38606 | "float4 __ovld __cnfn cross(float4 p0, float4 p1);\n" |
38607 | "float3 __ovld __cnfn cross(float3 p0, float3 p1);\n" |
38608 | "#ifdef cl_khr_fp64\n" |
38609 | "double4 __ovld __cnfn cross(double4 p0, double4 p1);\n" |
38610 | "double3 __ovld __cnfn cross(double3 p0, double3 p1);\n" |
38611 | "#endif //cl_khr_fp64\n" |
38612 | "#ifdef cl_khr_fp16\n" |
38613 | "half4 __ovld __cnfn cross(half4 p0, half4 p1);\n" |
38614 | "half3 __ovld __cnfn cross(half3 p0, half3 p1);\n" |
38615 | "#endif //cl_khr_fp16\n" |
38616 | "\n" |
38617 | "/**\n" |
38618 | " * Compute dot product.\n" |
38619 | " */\n" |
38620 | "float __ovld __cnfn dot(float p0, float p1);\n" |
38621 | "float __ovld __cnfn dot(float2 p0, float2 p1);\n" |
38622 | "float __ovld __cnfn dot(float3 p0, float3 p1);\n" |
38623 | "float __ovld __cnfn dot(float4 p0, float4 p1);\n" |
38624 | "#ifdef cl_khr_fp64\n" |
38625 | "double __ovld __cnfn dot(double p0, double p1);\n" |
38626 | "double __ovld __cnfn dot(double2 p0, double2 p1);\n" |
38627 | "double __ovld __cnfn dot(double3 p0, double3 p1);\n" |
38628 | "double __ovld __cnfn dot(double4 p0, double4 p1);\n" |
38629 | "#endif //cl_khr_fp64\n" |
38630 | "#ifdef cl_khr_fp16\n" |
38631 | "half __ovld __cnfn dot(half p0, half p1);\n" |
38632 | "half __ovld __cnfn dot(half2 p0, half2 p1);\n" |
38633 | "half __ovld __cnfn dot(half3 p0, half3 p1);\n" |
38634 | "half __ovld __cnfn dot(half4 p0, half4 p1);\n" |
38635 | "#endif //cl_khr_fp16\n" |
38636 | "\n" |
38637 | "/**\n" |
38638 | " * Returns the distance between p0 and p1. This is\n" |
38639 | " * calculated as length(p0 - p1).\n" |
38640 | " */\n" |
38641 | "float __ovld __cnfn distance(float p0, float p1);\n" |
38642 | "float __ovld __cnfn distance(float2 p0, float2 p1);\n" |
38643 | "float __ovld __cnfn distance(float3 p0, float3 p1);\n" |
38644 | "float __ovld __cnfn distance(float4 p0, float4 p1);\n" |
38645 | "#ifdef cl_khr_fp64\n" |
38646 | "double __ovld __cnfn distance(double p0, double p1);\n" |
38647 | "double __ovld __cnfn distance(double2 p0, double2 p1);\n" |
38648 | "double __ovld __cnfn distance(double3 p0, double3 p1);\n" |
38649 | "double __ovld __cnfn distance(double4 p0, double4 p1);\n" |
38650 | "#endif //cl_khr_fp64\n" |
38651 | "#ifdef cl_khr_fp16\n" |
38652 | "half __ovld __cnfn distance(half p0, half p1);\n" |
38653 | "half __ovld __cnfn distance(half2 p0, half2 p1);\n" |
38654 | "half __ovld __cnfn distance(half3 p0, half3 p1);\n" |
38655 | "half __ovld __cnfn distance(half4 p0, half4 p1);\n" |
38656 | "#endif //cl_khr_fp16\n" |
38657 | "\n" |
38658 | "/**\n" |
38659 | " * Return the length of vector p, i.e.,\n" |
38660 | " * sqrt(p.x2 + p.y 2 + ...)\n" |
38661 | " */\n" |
38662 | "float __ovld __cnfn length(float p);\n" |
38663 | "float __ovld __cnfn length(float2 p);\n" |
38664 | "float __ovld __cnfn length(float3 p);\n" |
38665 | "float __ovld __cnfn length(float4 p);\n" |
38666 | "#ifdef cl_khr_fp64\n" |
38667 | "double __ovld __cnfn length(double p);\n" |
38668 | "double __ovld __cnfn length(double2 p);\n" |
38669 | "double __ovld __cnfn length(double3 p);\n" |
38670 | "double __ovld __cnfn length(double4 p);\n" |
38671 | "#endif //cl_khr_fp64\n" |
38672 | "#ifdef cl_khr_fp16\n" |
38673 | "half __ovld __cnfn length(half p);\n" |
38674 | "half __ovld __cnfn length(half2 p);\n" |
38675 | "half __ovld __cnfn length(half3 p);\n" |
38676 | "half __ovld __cnfn length(half4 p);\n" |
38677 | "#endif //cl_khr_fp16\n" |
38678 | "\n" |
38679 | "/**\n" |
38680 | " * Returns a vector in the same direction as p but with a\n" |
38681 | " * length of 1.\n" |
38682 | " */\n" |
38683 | "float __ovld __cnfn normalize(float p);\n" |
38684 | "float2 __ovld __cnfn normalize(float2 p);\n" |
38685 | "float3 __ovld __cnfn normalize(float3 p);\n" |
38686 | "float4 __ovld __cnfn normalize(float4 p);\n" |
38687 | "#ifdef cl_khr_fp64\n" |
38688 | "double __ovld __cnfn normalize(double p);\n" |
38689 | "double2 __ovld __cnfn normalize(double2 p);\n" |
38690 | "double3 __ovld __cnfn normalize(double3 p);\n" |
38691 | "double4 __ovld __cnfn normalize(double4 p);\n" |
38692 | "#endif //cl_khr_fp64\n" |
38693 | "#ifdef cl_khr_fp16\n" |
38694 | "half __ovld __cnfn normalize(half p);\n" |
38695 | "half2 __ovld __cnfn normalize(half2 p);\n" |
38696 | "half3 __ovld __cnfn normalize(half3 p);\n" |
38697 | "half4 __ovld __cnfn normalize(half4 p);\n" |
38698 | "#endif //cl_khr_fp16\n" |
38699 | "\n" |
38700 | "/**\n" |
38701 | " * Returns fast_length(p0 - p1).\n" |
38702 | " */\n" |
38703 | "float __ovld __cnfn fast_distance(float p0, float p1);\n" |
38704 | "float __ovld __cnfn fast_distance(float2 p0, float2 p1);\n" |
38705 | "float __ovld __cnfn fast_distance(float3 p0, float3 p1);\n" |
38706 | "float __ovld __cnfn fast_distance(float4 p0, float4 p1);\n" |
38707 | "#ifdef cl_khr_fp16\n" |
38708 | "half __ovld __cnfn fast_distance(half p0, half p1);\n" |
38709 | "half __ovld __cnfn fast_distance(half2 p0, half2 p1);\n" |
38710 | "half __ovld __cnfn fast_distance(half3 p0, half3 p1);\n" |
38711 | "half __ovld __cnfn fast_distance(half4 p0, half4 p1);\n" |
38712 | "#endif //cl_khr_fp16\n" |
38713 | "\n" |
38714 | "/**\n" |
38715 | " * Returns the length of vector p computed as:\n" |
38716 | " * half_sqrt(p.x2 + p.y2 + ...)\n" |
38717 | " */\n" |
38718 | "float __ovld __cnfn fast_length(float p);\n" |
38719 | "float __ovld __cnfn fast_length(float2 p);\n" |
38720 | "float __ovld __cnfn fast_length(float3 p);\n" |
38721 | "float __ovld __cnfn fast_length(float4 p);\n" |
38722 | "#ifdef cl_khr_fp16\n" |
38723 | "half __ovld __cnfn fast_length(half p);\n" |
38724 | "half __ovld __cnfn fast_length(half2 p);\n" |
38725 | "half __ovld __cnfn fast_length(half3 p);\n" |
38726 | "half __ovld __cnfn fast_length(half4 p);\n" |
38727 | "#endif //cl_khr_fp16\n" |
38728 | "\n" |
38729 | "/**\n" |
38730 | " * Returns a vector in the same direction as p but with a\n" |
38731 | " * length of 1. fast_normalize is computed as:\n" |
38732 | " * p * half_rsqrt (p.x^2 + p.y^2 + ... )\n" |
38733 | " * The result shall be within 8192 ulps error from the\n" |
38734 | " * infinitely precise result of\n" |
38735 | " * if (all(p == 0.0f))\n" |
38736 | " * result = p;\n" |
38737 | " * else\n" |
38738 | " * result = p / sqrt (p.x^2 + p.y^2 + ...);\n" |
38739 | " * with the following exceptions:\n" |
38740 | " * 1) If the sum of squares is greater than FLT_MAX\n" |
38741 | " * then the value of the floating-point values in the\n" |
38742 | " * result vector are undefined.\n" |
38743 | " * 2) If the sum of squares is less than FLT_MIN then\n" |
38744 | " * the implementation may return back p.\n" |
38745 | " * 3) If the device is in \"denorms are flushed to zero\"\n" |
38746 | " * mode, individual operand elements with magnitude\n" |
38747 | " * less than sqrt(FLT_MIN) may be flushed to zero\n" |
38748 | " * before proceeding with the calculation.\n" |
38749 | " */\n" |
38750 | "float __ovld __cnfn fast_normalize(float p);\n" |
38751 | "float2 __ovld __cnfn fast_normalize(float2 p);\n" |
38752 | "float3 __ovld __cnfn fast_normalize(float3 p);\n" |
38753 | "float4 __ovld __cnfn fast_normalize(float4 p);\n" |
38754 | "#ifdef cl_khr_fp16\n" |
38755 | "half __ovld __cnfn fast_normalize(half p);\n" |
38756 | "half2 __ovld __cnfn fast_normalize(half2 p);\n" |
38757 | "half3 __ovld __cnfn fast_normalize(half3 p);\n" |
38758 | "half4 __ovld __cnfn fast_normalize(half4 p);\n" |
38759 | "#endif //cl_khr_fp16\n" |
38760 | "\n" |
38761 | "// OpenCL v1.1 s6.11.6, v1.2 s6.12.6, v2.0 s6.13.6 - Relational Functions\n" |
38762 | "\n" |
38763 | "/**\n" |
38764 | " * intn isequal (floatn x, floatn y)\n" |
38765 | " * Returns the component-wise compare of x == y.\n" |
38766 | " */\n" |
38767 | "int __ovld __cnfn isequal(float x, float y);\n" |
38768 | "int2 __ovld __cnfn isequal(float2 x, float2 y);\n" |
38769 | "int3 __ovld __cnfn isequal(float3 x, float3 y);\n" |
38770 | "int4 __ovld __cnfn isequal(float4 x, float4 y);\n" |
38771 | "int8 __ovld __cnfn isequal(float8 x, float8 y);\n" |
38772 | "int16 __ovld __cnfn isequal(float16 x, float16 y);\n" |
38773 | "#ifdef cl_khr_fp64\n" |
38774 | "int __ovld __cnfn isequal(double x, double y);\n" |
38775 | "long2 __ovld __cnfn isequal(double2 x, double2 y);\n" |
38776 | "long3 __ovld __cnfn isequal(double3 x, double3 y);\n" |
38777 | "long4 __ovld __cnfn isequal(double4 x, double4 y);\n" |
38778 | "long8 __ovld __cnfn isequal(double8 x, double8 y);\n" |
38779 | "long16 __ovld __cnfn isequal(double16 x, double16 y);\n" |
38780 | "#endif //cl_khr_fp64\n" |
38781 | "#ifdef cl_khr_fp16\n" |
38782 | "int __ovld __cnfn isequal(half x, half y);\n" |
38783 | "short2 __ovld __cnfn isequal(half2 x, half2 y);\n" |
38784 | "short3 __ovld __cnfn isequal(half3 x, half3 y);\n" |
38785 | "short4 __ovld __cnfn isequal(half4 x, half4 y);\n" |
38786 | "short8 __ovld __cnfn isequal(half8 x, half8 y);\n" |
38787 | "short16 __ovld __cnfn isequal(half16 x, half16 y);\n" |
38788 | "#endif //cl_khr_fp16\n" |
38789 | "\n" |
38790 | "/**\n" |
38791 | " * Returns the component-wise compare of x != y.\n" |
38792 | " */\n" |
38793 | "int __ovld __cnfn isnotequal(float x, float y);\n" |
38794 | "int2 __ovld __cnfn isnotequal(float2 x, float2 y);\n" |
38795 | "int3 __ovld __cnfn isnotequal(float3 x, float3 y);\n" |
38796 | "int4 __ovld __cnfn isnotequal(float4 x, float4 y);\n" |
38797 | "int8 __ovld __cnfn isnotequal(float8 x, float8 y);\n" |
38798 | "int16 __ovld __cnfn isnotequal(float16 x, float16 y);\n" |
38799 | "#ifdef cl_khr_fp64\n" |
38800 | "int __ovld __cnfn isnotequal(double x, double y);\n" |
38801 | "long2 __ovld __cnfn isnotequal(double2 x, double2 y);\n" |
38802 | "long3 __ovld __cnfn isnotequal(double3 x, double3 y);\n" |
38803 | "long4 __ovld __cnfn isnotequal(double4 x, double4 y);\n" |
38804 | "long8 __ovld __cnfn isnotequal(double8 x, double8 y);\n" |
38805 | "long16 __ovld __cnfn isnotequal(double16 x, double16 y);\n" |
38806 | "#endif //cl_khr_fp64\n" |
38807 | "#ifdef cl_khr_fp16\n" |
38808 | "int __ovld __cnfn isnotequal(half x, half y);\n" |
38809 | "short2 __ovld __cnfn isnotequal(half2 x, half2 y);\n" |
38810 | "short3 __ovld __cnfn isnotequal(half3 x, half3 y);\n" |
38811 | "short4 __ovld __cnfn isnotequal(half4 x, half4 y);\n" |
38812 | "short8 __ovld __cnfn isnotequal(half8 x, half8 y);\n" |
38813 | "short16 __ovld __cnfn isnotequal(half16 x, half16 y);\n" |
38814 | "#endif //cl_khr_fp16\n" |
38815 | "\n" |
38816 | "/**\n" |
38817 | " * Returns the component-wise compare of x > y.\n" |
38818 | " */\n" |
38819 | "int __ovld __cnfn isgreater(float x, float y);\n" |
38820 | "int2 __ovld __cnfn isgreater(float2 x, float2 y);\n" |
38821 | "int3 __ovld __cnfn isgreater(float3 x, float3 y);\n" |
38822 | "int4 __ovld __cnfn isgreater(float4 x, float4 y);\n" |
38823 | "int8 __ovld __cnfn isgreater(float8 x, float8 y);\n" |
38824 | "int16 __ovld __cnfn isgreater(float16 x, float16 y);\n" |
38825 | "#ifdef cl_khr_fp64\n" |
38826 | "int __ovld __cnfn isgreater(double x, double y);\n" |
38827 | "long2 __ovld __cnfn isgreater(double2 x, double2 y);\n" |
38828 | "long3 __ovld __cnfn isgreater(double3 x, double3 y);\n" |
38829 | "long4 __ovld __cnfn isgreater(double4 x, double4 y);\n" |
38830 | "long8 __ovld __cnfn isgreater(double8 x, double8 y);\n" |
38831 | "long16 __ovld __cnfn isgreater(double16 x, double16 y);\n" |
38832 | "#endif //cl_khr_fp64\n" |
38833 | "#ifdef cl_khr_fp16\n" |
38834 | "int __ovld __cnfn isgreater(half x, half y);\n" |
38835 | "short2 __ovld __cnfn isgreater(half2 x, half2 y);\n" |
38836 | "short3 __ovld __cnfn isgreater(half3 x, half3 y);\n" |
38837 | "short4 __ovld __cnfn isgreater(half4 x, half4 y);\n" |
38838 | "short8 __ovld __cnfn isgreater(half8 x, half8 y);\n" |
38839 | "short16 __ovld __cnfn isgreater(half16 x, half16 y);\n" |
38840 | "#endif //cl_khr_fp16\n" |
38841 | "\n" |
38842 | "/**\n" |
38843 | " * Returns the component-wise compare of x >= y.\n" |
38844 | " */\n" |
38845 | "int __ovld __cnfn isgreaterequal(float x, float y);\n" |
38846 | "int2 __ovld __cnfn isgreaterequal(float2 x, float2 y);\n" |
38847 | "int3 __ovld __cnfn isgreaterequal(float3 x, float3 y);\n" |
38848 | "int4 __ovld __cnfn isgreaterequal(float4 x, float4 y);\n" |
38849 | "int8 __ovld __cnfn isgreaterequal(float8 x, float8 y);\n" |
38850 | "int16 __ovld __cnfn isgreaterequal(float16 x, float16 y);\n" |
38851 | "#ifdef cl_khr_fp64\n" |
38852 | "int __ovld __cnfn isgreaterequal(double x, double y);\n" |
38853 | "long2 __ovld __cnfn isgreaterequal(double2 x, double2 y);\n" |
38854 | "long3 __ovld __cnfn isgreaterequal(double3 x, double3 y);\n" |
38855 | "long4 __ovld __cnfn isgreaterequal(double4 x, double4 y);\n" |
38856 | "long8 __ovld __cnfn isgreaterequal(double8 x, double8 y);\n" |
38857 | "long16 __ovld __cnfn isgreaterequal(double16 x, double16 y);\n" |
38858 | "#endif //cl_khr_fp64\n" |
38859 | "#ifdef cl_khr_fp16\n" |
38860 | "int __ovld __cnfn isgreaterequal(half x, half y);\n" |
38861 | "short2 __ovld __cnfn isgreaterequal(half2 x, half2 y);\n" |
38862 | "short3 __ovld __cnfn isgreaterequal(half3 x, half3 y);\n" |
38863 | "short4 __ovld __cnfn isgreaterequal(half4 x, half4 y);\n" |
38864 | "short8 __ovld __cnfn isgreaterequal(half8 x, half8 y);\n" |
38865 | "short16 __ovld __cnfn isgreaterequal(half16 x, half16 y);\n" |
38866 | "#endif //cl_khr_fp16\n" |
38867 | "\n" |
38868 | "/**\n" |
38869 | " * Returns the component-wise compare of x < y.\n" |
38870 | " */\n" |
38871 | "int __ovld __cnfn isless(float x, float y);\n" |
38872 | "int2 __ovld __cnfn isless(float2 x, float2 y);\n" |
38873 | "int3 __ovld __cnfn isless(float3 x, float3 y);\n" |
38874 | "int4 __ovld __cnfn isless(float4 x, float4 y);\n" |
38875 | "int8 __ovld __cnfn isless(float8 x, float8 y);\n" |
38876 | "int16 __ovld __cnfn isless(float16 x, float16 y);\n" |
38877 | "#ifdef cl_khr_fp64\n" |
38878 | "int __ovld __cnfn isless(double x, double y);\n" |
38879 | "long2 __ovld __cnfn isless(double2 x, double2 y);\n" |
38880 | "long3 __ovld __cnfn isless(double3 x, double3 y);\n" |
38881 | "long4 __ovld __cnfn isless(double4 x, double4 y);\n" |
38882 | "long8 __ovld __cnfn isless(double8 x, double8 y);\n" |
38883 | "long16 __ovld __cnfn isless(double16 x, double16 y);\n" |
38884 | "#endif //cl_khr_fp64\n" |
38885 | "#ifdef cl_khr_fp16\n" |
38886 | "int __ovld __cnfn isless(half x, half y);\n" |
38887 | "short2 __ovld __cnfn isless(half2 x, half2 y);\n" |
38888 | "short3 __ovld __cnfn isless(half3 x, half3 y);\n" |
38889 | "short4 __ovld __cnfn isless(half4 x, half4 y);\n" |
38890 | "short8 __ovld __cnfn isless(half8 x, half8 y);\n" |
38891 | "short16 __ovld __cnfn isless(half16 x, half16 y);\n" |
38892 | "#endif //cl_khr_fp16\n" |
38893 | "\n" |
38894 | "/**\n" |
38895 | " * Returns the component-wise compare of x <= y.\n" |
38896 | " */\n" |
38897 | "int __ovld __cnfn islessequal(float x, float y);\n" |
38898 | "int2 __ovld __cnfn islessequal(float2 x, float2 y);\n" |
38899 | "int3 __ovld __cnfn islessequal(float3 x, float3 y);\n" |
38900 | "int4 __ovld __cnfn islessequal(float4 x, float4 y);\n" |
38901 | "int8 __ovld __cnfn islessequal(float8 x, float8 y);\n" |
38902 | "int16 __ovld __cnfn islessequal(float16 x, float16 y);\n" |
38903 | "#ifdef cl_khr_fp64\n" |
38904 | "int __ovld __cnfn islessequal(double x, double y);\n" |
38905 | "long2 __ovld __cnfn islessequal(double2 x, double2 y);\n" |
38906 | "long3 __ovld __cnfn islessequal(double3 x, double3 y);\n" |
38907 | "long4 __ovld __cnfn islessequal(double4 x, double4 y);\n" |
38908 | "long8 __ovld __cnfn islessequal(double8 x, double8 y);\n" |
38909 | "long16 __ovld __cnfn islessequal(double16 x, double16 y);\n" |
38910 | "#endif //cl_khr_fp64\n" |
38911 | "#ifdef cl_khr_fp16\n" |
38912 | "int __ovld __cnfn islessequal(half x, half y);\n" |
38913 | "short2 __ovld __cnfn islessequal(half2 x, half2 y);\n" |
38914 | "short3 __ovld __cnfn islessequal(half3 x, half3 y);\n" |
38915 | "short4 __ovld __cnfn islessequal(half4 x, half4 y);\n" |
38916 | "short8 __ovld __cnfn islessequal(half8 x, half8 y);\n" |
38917 | "short16 __ovld __cnfn islessequal(half16 x, half16 y);\n" |
38918 | "#endif //cl_khr_fp16\n" |
38919 | "\n" |
38920 | "/**\n" |
38921 | " * Returns the component-wise compare of\n" |
38922 | " * (x < y) || (x > y) .\n" |
38923 | " */\n" |
38924 | "int __ovld __cnfn islessgreater(float x, float y);\n" |
38925 | "int2 __ovld __cnfn islessgreater(float2 x, float2 y);\n" |
38926 | "int3 __ovld __cnfn islessgreater(float3 x, float3 y);\n" |
38927 | "int4 __ovld __cnfn islessgreater(float4 x, float4 y);\n" |
38928 | "int8 __ovld __cnfn islessgreater(float8 x, float8 y);\n" |
38929 | "int16 __ovld __cnfn islessgreater(float16 x, float16 y);\n" |
38930 | "#ifdef cl_khr_fp64\n" |
38931 | "int __ovld __cnfn islessgreater(double x, double y);\n" |
38932 | "long2 __ovld __cnfn islessgreater(double2 x, double2 y);\n" |
38933 | "long3 __ovld __cnfn islessgreater(double3 x, double3 y);\n" |
38934 | "long4 __ovld __cnfn islessgreater(double4 x, double4 y);\n" |
38935 | "long8 __ovld __cnfn islessgreater(double8 x, double8 y);\n" |
38936 | "long16 __ovld __cnfn islessgreater(double16 x, double16 y);\n" |
38937 | "#endif //cl_khr_fp64\n" |
38938 | "#ifdef cl_khr_fp16\n" |
38939 | "int __ovld __cnfn islessgreater(half x, half y);\n" |
38940 | "short2 __ovld __cnfn islessgreater(half2 x, half2 y);\n" |
38941 | "short3 __ovld __cnfn islessgreater(half3 x, half3 y);\n" |
38942 | "short4 __ovld __cnfn islessgreater(half4 x, half4 y);\n" |
38943 | "short8 __ovld __cnfn islessgreater(half8 x, half8 y);\n" |
38944 | "short16 __ovld __cnfn islessgreater(half16 x, half16 y);\n" |
38945 | "#endif //cl_khr_fp16\n" |
38946 | "\n" |
38947 | "/**\n" |
38948 | " * Test for finite value.\n" |
38949 | " */\n" |
38950 | "int __ovld __cnfn isfinite(float);\n" |
38951 | "int2 __ovld __cnfn isfinite(float2);\n" |
38952 | "int3 __ovld __cnfn isfinite(float3);\n" |
38953 | "int4 __ovld __cnfn isfinite(float4);\n" |
38954 | "int8 __ovld __cnfn isfinite(float8);\n" |
38955 | "int16 __ovld __cnfn isfinite(float16);\n" |
38956 | "#ifdef cl_khr_fp64\n" |
38957 | "int __ovld __cnfn isfinite(double);\n" |
38958 | "long2 __ovld __cnfn isfinite(double2);\n" |
38959 | "long3 __ovld __cnfn isfinite(double3);\n" |
38960 | "long4 __ovld __cnfn isfinite(double4);\n" |
38961 | "long8 __ovld __cnfn isfinite(double8);\n" |
38962 | "long16 __ovld __cnfn isfinite(double16);\n" |
38963 | "#endif //cl_khr_fp64\n" |
38964 | "#ifdef cl_khr_fp16\n" |
38965 | "int __ovld __cnfn isfinite(half);\n" |
38966 | "short2 __ovld __cnfn isfinite(half2);\n" |
38967 | "short3 __ovld __cnfn isfinite(half3);\n" |
38968 | "short4 __ovld __cnfn isfinite(half4);\n" |
38969 | "short8 __ovld __cnfn isfinite(half8);\n" |
38970 | "short16 __ovld __cnfn isfinite(half16);\n" |
38971 | "#endif //cl_khr_fp16\n" |
38972 | "\n" |
38973 | "/**\n" |
38974 | " * Test for infinity value (+ve or -ve) .\n" |
38975 | " */\n" |
38976 | "int __ovld __cnfn isinf(float);\n" |
38977 | "int2 __ovld __cnfn isinf(float2);\n" |
38978 | "int3 __ovld __cnfn isinf(float3);\n" |
38979 | "int4 __ovld __cnfn isinf(float4);\n" |
38980 | "int8 __ovld __cnfn isinf(float8);\n" |
38981 | "int16 __ovld __cnfn isinf(float16);\n" |
38982 | "#ifdef cl_khr_fp64\n" |
38983 | "int __ovld __cnfn isinf(double);\n" |
38984 | "long2 __ovld __cnfn isinf(double2);\n" |
38985 | "long3 __ovld __cnfn isinf(double3);\n" |
38986 | "long4 __ovld __cnfn isinf(double4);\n" |
38987 | "long8 __ovld __cnfn isinf(double8);\n" |
38988 | "long16 __ovld __cnfn isinf(double16);\n" |
38989 | "#endif //cl_khr_fp64\n" |
38990 | "#ifdef cl_khr_fp16\n" |
38991 | "int __ovld __cnfn isinf(half);\n" |
38992 | "short2 __ovld __cnfn isinf(half2);\n" |
38993 | "short3 __ovld __cnfn isinf(half3);\n" |
38994 | "short4 __ovld __cnfn isinf(half4);\n" |
38995 | "short8 __ovld __cnfn isinf(half8);\n" |
38996 | "short16 __ovld __cnfn isinf(half16);\n" |
38997 | "#endif //cl_khr_fp16\n" |
38998 | "\n" |
38999 | "/**\n" |
39000 | " * Test for a NaN.\n" |
39001 | " */\n" |
39002 | "int __ovld __cnfn isnan(float);\n" |
39003 | "int2 __ovld __cnfn isnan(float2);\n" |
39004 | "int3 __ovld __cnfn isnan(float3);\n" |
39005 | "int4 __ovld __cnfn isnan(float4);\n" |
39006 | "int8 __ovld __cnfn isnan(float8);\n" |
39007 | "int16 __ovld __cnfn isnan(float16);\n" |
39008 | "#ifdef cl_khr_fp64\n" |
39009 | "int __ovld __cnfn isnan(double);\n" |
39010 | "long2 __ovld __cnfn isnan(double2);\n" |
39011 | "long3 __ovld __cnfn isnan(double3);\n" |
39012 | "long4 __ovld __cnfn isnan(double4);\n" |
39013 | "long8 __ovld __cnfn isnan(double8);\n" |
39014 | "long16 __ovld __cnfn isnan(double16);\n" |
39015 | "#endif //cl_khr_fp64\n" |
39016 | "#ifdef cl_khr_fp16\n" |
39017 | "int __ovld __cnfn isnan(half);\n" |
39018 | "short2 __ovld __cnfn isnan(half2);\n" |
39019 | "short3 __ovld __cnfn isnan(half3);\n" |
39020 | "short4 __ovld __cnfn isnan(half4);\n" |
39021 | "short8 __ovld __cnfn isnan(half8);\n" |
39022 | "short16 __ovld __cnfn isnan(half16);\n" |
39023 | "#endif //cl_khr_fp16\n" |
39024 | "\n" |
39025 | "/**\n" |
39026 | " * Test for a normal value.\n" |
39027 | " */\n" |
39028 | "int __ovld __cnfn isnormal(float);\n" |
39029 | "int2 __ovld __cnfn isnormal(float2);\n" |
39030 | "int3 __ovld __cnfn isnormal(float3);\n" |
39031 | "int4 __ovld __cnfn isnormal(float4);\n" |
39032 | "int8 __ovld __cnfn isnormal(float8);\n" |
39033 | "int16 __ovld __cnfn isnormal(float16);\n" |
39034 | "#ifdef cl_khr_fp64\n" |
39035 | "int __ovld __cnfn isnormal(double);\n" |
39036 | "long2 __ovld __cnfn isnormal(double2);\n" |
39037 | "long3 __ovld __cnfn isnormal(double3);\n" |
39038 | "long4 __ovld __cnfn isnormal(double4);\n" |
39039 | "long8 __ovld __cnfn isnormal(double8);\n" |
39040 | "long16 __ovld __cnfn isnormal(double16);\n" |
39041 | "#endif //cl_khr_fp64\n" |
39042 | "#ifdef cl_khr_fp16\n" |
39043 | "int __ovld __cnfn isnormal(half);\n" |
39044 | "short2 __ovld __cnfn isnormal(half2);\n" |
39045 | "short3 __ovld __cnfn isnormal(half3);\n" |
39046 | "short4 __ovld __cnfn isnormal(half4);\n" |
39047 | "short8 __ovld __cnfn isnormal(half8);\n" |
39048 | "short16 __ovld __cnfn isnormal(half16);\n" |
39049 | "#endif //cl_khr_fp16\n" |
39050 | "\n" |
39051 | "/**\n" |
39052 | " * Test if arguments are ordered. isordered() takes\n" |
39053 | " * arguments x and y, and returns the result\n" |
39054 | " * isequal(x, x) && isequal(y, y).\n" |
39055 | " */\n" |
39056 | "int __ovld __cnfn isordered(float x, float y);\n" |
39057 | "int2 __ovld __cnfn isordered(float2 x, float2 y);\n" |
39058 | "int3 __ovld __cnfn isordered(float3 x, float3 y);\n" |
39059 | "int4 __ovld __cnfn isordered(float4 x, float4 y);\n" |
39060 | "int8 __ovld __cnfn isordered(float8 x, float8 y);\n" |
39061 | "int16 __ovld __cnfn isordered(float16 x, float16 y);\n" |
39062 | "#ifdef cl_khr_fp64\n" |
39063 | "int __ovld __cnfn isordered(double x, double y);\n" |
39064 | "long2 __ovld __cnfn isordered(double2 x, double2 y);\n" |
39065 | "long3 __ovld __cnfn isordered(double3 x, double3 y);\n" |
39066 | "long4 __ovld __cnfn isordered(double4 x, double4 y);\n" |
39067 | "long8 __ovld __cnfn isordered(double8 x, double8 y);\n" |
39068 | "long16 __ovld __cnfn isordered(double16 x, double16 y);\n" |
39069 | "#endif //cl_khr_fp64\n" |
39070 | "#ifdef cl_khr_fp16\n" |
39071 | "int __ovld __cnfn isordered(half x, half y);\n" |
39072 | "short2 __ovld __cnfn isordered(half2 x, half2 y);\n" |
39073 | "short3 __ovld __cnfn isordered(half3 x, half3 y);\n" |
39074 | "short4 __ovld __cnfn isordered(half4 x, half4 y);\n" |
39075 | "short8 __ovld __cnfn isordered(half8 x, half8 y);\n" |
39076 | "short16 __ovld __cnfn isordered(half16 x, half16 y);\n" |
39077 | "#endif //cl_khr_fp16\n" |
39078 | "\n" |
39079 | "/**\n" |
39080 | " * Test if arguments are unordered. isunordered()\n" |
39081 | " * takes arguments x and y, returning non-zero if x or y\n" |
39082 | " * is NaN, and zero otherwise.\n" |
39083 | " */\n" |
39084 | "int __ovld __cnfn isunordered(float x, float y);\n" |
39085 | "int2 __ovld __cnfn isunordered(float2 x, float2 y);\n" |
39086 | "int3 __ovld __cnfn isunordered(float3 x, float3 y);\n" |
39087 | "int4 __ovld __cnfn isunordered(float4 x, float4 y);\n" |
39088 | "int8 __ovld __cnfn isunordered(float8 x, float8 y);\n" |
39089 | "int16 __ovld __cnfn isunordered(float16 x, float16 y);\n" |
39090 | "#ifdef cl_khr_fp64\n" |
39091 | "int __ovld __cnfn isunordered(double x, double y);\n" |
39092 | "long2 __ovld __cnfn isunordered(double2 x, double2 y);\n" |
39093 | "long3 __ovld __cnfn isunordered(double3 x, double3 y);\n" |
39094 | "long4 __ovld __cnfn isunordered(double4 x, double4 y);\n" |
39095 | "long8 __ovld __cnfn isunordered(double8 x, double8 y);\n" |
39096 | "long16 __ovld __cnfn isunordered(double16 x, double16 y);\n" |
39097 | "#endif //cl_khr_fp64\n" |
39098 | "#ifdef cl_khr_fp16\n" |
39099 | "int __ovld __cnfn isunordered(half x, half y);\n" |
39100 | "short2 __ovld __cnfn isunordered(half2 x, half2 y);\n" |
39101 | "short3 __ovld __cnfn isunordered(half3 x, half3 y);\n" |
39102 | "short4 __ovld __cnfn isunordered(half4 x, half4 y);\n" |
39103 | "short8 __ovld __cnfn isunordered(half8 x, half8 y);\n" |
39104 | "short16 __ovld __cnfn isunordered(half16 x, half16 y);\n" |
39105 | "#endif //cl_khr_fp16\n" |
39106 | "\n" |
39107 | "/**\n" |
39108 | " * Test for sign bit. The scalar version of the function\n" |
39109 | " * returns a 1 if the sign bit in the float is set else returns\n" |
39110 | " * 0. The vector version of the function returns the\n" |
39111 | " * following for each component in floatn: a -1 if the\n" |
39112 | " * sign bit in the float is set else returns 0.\n" |
39113 | " */\n" |
39114 | "int __ovld __cnfn signbit(float);\n" |
39115 | "int2 __ovld __cnfn signbit(float2);\n" |
39116 | "int3 __ovld __cnfn signbit(float3);\n" |
39117 | "int4 __ovld __cnfn signbit(float4);\n" |
39118 | "int8 __ovld __cnfn signbit(float8);\n" |
39119 | "int16 __ovld __cnfn signbit(float16);\n" |
39120 | "#ifdef cl_khr_fp64\n" |
39121 | "int __ovld __cnfn signbit(double);\n" |
39122 | "long2 __ovld __cnfn signbit(double2);\n" |
39123 | "long3 __ovld __cnfn signbit(double3);\n" |
39124 | "long4 __ovld __cnfn signbit(double4);\n" |
39125 | "long8 __ovld __cnfn signbit(double8);\n" |
39126 | "long16 __ovld __cnfn signbit(double16);\n" |
39127 | "#endif //cl_khr_fp64\n" |
39128 | "#ifdef cl_khr_fp16\n" |
39129 | "int __ovld __cnfn signbit(half);\n" |
39130 | "short2 __ovld __cnfn signbit(half2);\n" |
39131 | "short3 __ovld __cnfn signbit(half3);\n" |
39132 | "short4 __ovld __cnfn signbit(half4);\n" |
39133 | "short8 __ovld __cnfn signbit(half8);\n" |
39134 | "short16 __ovld __cnfn signbit(half16);\n" |
39135 | "#endif //cl_khr_fp16\n" |
39136 | "\n" |
39137 | "/**\n" |
39138 | " * Returns 1 if the most significant bit in any component\n" |
39139 | " * of x is set; otherwise returns 0.\n" |
39140 | " */\n" |
39141 | "int __ovld __cnfn any(char x);\n" |
39142 | "int __ovld __cnfn any(char2 x);\n" |
39143 | "int __ovld __cnfn any(char3 x);\n" |
39144 | "int __ovld __cnfn any(char4 x);\n" |
39145 | "int __ovld __cnfn any(char8 x);\n" |
39146 | "int __ovld __cnfn any(char16 x);\n" |
39147 | "int __ovld __cnfn any(short x);\n" |
39148 | "int __ovld __cnfn any(short2 x);\n" |
39149 | "int __ovld __cnfn any(short3 x);\n" |
39150 | "int __ovld __cnfn any(short4 x);\n" |
39151 | "int __ovld __cnfn any(short8 x);\n" |
39152 | "int __ovld __cnfn any(short16 x);\n" |
39153 | "int __ovld __cnfn any(int x);\n" |
39154 | "int __ovld __cnfn any(int2 x);\n" |
39155 | "int __ovld __cnfn any(int3 x);\n" |
39156 | "int __ovld __cnfn any(int4 x);\n" |
39157 | "int __ovld __cnfn any(int8 x);\n" |
39158 | "int __ovld __cnfn any(int16 x);\n" |
39159 | "int __ovld __cnfn any(long x);\n" |
39160 | "int __ovld __cnfn any(long2 x);\n" |
39161 | "int __ovld __cnfn any(long3 x);\n" |
39162 | "int __ovld __cnfn any(long4 x);\n" |
39163 | "int __ovld __cnfn any(long8 x);\n" |
39164 | "int __ovld __cnfn any(long16 x);\n" |
39165 | "\n" |
39166 | "/**\n" |
39167 | " * Returns 1 if the most significant bit in all components\n" |
39168 | " * of x is set; otherwise returns 0.\n" |
39169 | " */\n" |
39170 | "int __ovld __cnfn all(char x);\n" |
39171 | "int __ovld __cnfn all(char2 x);\n" |
39172 | "int __ovld __cnfn all(char3 x);\n" |
39173 | "int __ovld __cnfn all(char4 x);\n" |
39174 | "int __ovld __cnfn all(char8 x);\n" |
39175 | "int __ovld __cnfn all(char16 x);\n" |
39176 | "int __ovld __cnfn all(short x);\n" |
39177 | "int __ovld __cnfn all(short2 x);\n" |
39178 | "int __ovld __cnfn all(short3 x);\n" |
39179 | "int __ovld __cnfn all(short4 x);\n" |
39180 | "int __ovld __cnfn all(short8 x);\n" |
39181 | "int __ovld __cnfn all(short16 x);\n" |
39182 | "int __ovld __cnfn all(int x);\n" |
39183 | "int __ovld __cnfn all(int2 x);\n" |
39184 | "int __ovld __cnfn all(int3 x);\n" |
39185 | "int __ovld __cnfn all(int4 x);\n" |
39186 | "int __ovld __cnfn all(int8 x);\n" |
39187 | "int __ovld __cnfn all(int16 x);\n" |
39188 | "int __ovld __cnfn all(long x);\n" |
39189 | "int __ovld __cnfn all(long2 x);\n" |
39190 | "int __ovld __cnfn all(long3 x);\n" |
39191 | "int __ovld __cnfn all(long4 x);\n" |
39192 | "int __ovld __cnfn all(long8 x);\n" |
39193 | "int __ovld __cnfn all(long16 x);\n" |
39194 | "\n" |
39195 | "/**\n" |
39196 | " * Each bit of the result is the corresponding bit of a if\n" |
39197 | " * the corresponding bit of c is 0. Otherwise it is the\n" |
39198 | " * corresponding bit of b.\n" |
39199 | " */\n" |
39200 | "char __ovld __cnfn bitselect(char a, char b, char c);\n" |
39201 | "uchar __ovld __cnfn bitselect(uchar a, uchar b, uchar c);\n" |
39202 | "char2 __ovld __cnfn bitselect(char2 a, char2 b, char2 c);\n" |
39203 | "uchar2 __ovld __cnfn bitselect(uchar2 a, uchar2 b, uchar2 c);\n" |
39204 | "char3 __ovld __cnfn bitselect(char3 a, char3 b, char3 c);\n" |
39205 | "uchar3 __ovld __cnfn bitselect(uchar3 a, uchar3 b, uchar3 c);\n" |
39206 | "char4 __ovld __cnfn bitselect(char4 a, char4 b, char4 c);\n" |
39207 | "uchar4 __ovld __cnfn bitselect(uchar4 a, uchar4 b, uchar4 c);\n" |
39208 | "char8 __ovld __cnfn bitselect(char8 a, char8 b, char8 c);\n" |
39209 | "uchar8 __ovld __cnfn bitselect(uchar8 a, uchar8 b, uchar8 c);\n" |
39210 | "char16 __ovld __cnfn bitselect(char16 a, char16 b, char16 c);\n" |
39211 | "uchar16 __ovld __cnfn bitselect(uchar16 a, uchar16 b, uchar16 c);\n" |
39212 | "short __ovld __cnfn bitselect(short a, short b, short c);\n" |
39213 | "ushort __ovld __cnfn bitselect(ushort a, ushort b, ushort c);\n" |
39214 | "short2 __ovld __cnfn bitselect(short2 a, short2 b, short2 c);\n" |
39215 | "ushort2 __ovld __cnfn bitselect(ushort2 a, ushort2 b, ushort2 c);\n" |
39216 | "short3 __ovld __cnfn bitselect(short3 a, short3 b, short3 c);\n" |
39217 | "ushort3 __ovld __cnfn bitselect(ushort3 a, ushort3 b, ushort3 c);\n" |
39218 | "short4 __ovld __cnfn bitselect(short4 a, short4 b, short4 c);\n" |
39219 | "ushort4 __ovld __cnfn bitselect(ushort4 a, ushort4 b, ushort4 c);\n" |
39220 | "short8 __ovld __cnfn bitselect(short8 a, short8 b, short8 c);\n" |
39221 | "ushort8 __ovld __cnfn bitselect(ushort8 a, ushort8 b, ushort8 c);\n" |
39222 | "short16 __ovld __cnfn bitselect(short16 a, short16 b, short16 c);\n" |
39223 | "ushort16 __ovld __cnfn bitselect(ushort16 a, ushort16 b, ushort16 c);\n" |
39224 | "int __ovld __cnfn bitselect(int a, int b, int c);\n" |
39225 | "uint __ovld __cnfn bitselect(uint a, uint b, uint c);\n" |
39226 | "int2 __ovld __cnfn bitselect(int2 a, int2 b, int2 c);\n" |
39227 | "uint2 __ovld __cnfn bitselect(uint2 a, uint2 b, uint2 c);\n" |
39228 | "int3 __ovld __cnfn bitselect(int3 a, int3 b, int3 c);\n" |
39229 | "uint3 __ovld __cnfn bitselect(uint3 a, uint3 b, uint3 c);\n" |
39230 | "int4 __ovld __cnfn bitselect(int4 a, int4 b, int4 c);\n" |
39231 | "uint4 __ovld __cnfn bitselect(uint4 a, uint4 b, uint4 c);\n" |
39232 | "int8 __ovld __cnfn bitselect(int8 a, int8 b, int8 c);\n" |
39233 | "uint8 __ovld __cnfn bitselect(uint8 a, uint8 b, uint8 c);\n" |
39234 | "int16 __ovld __cnfn bitselect(int16 a, int16 b, int16 c);\n" |
39235 | "uint16 __ovld __cnfn bitselect(uint16 a, uint16 b, uint16 c);\n" |
39236 | "long __ovld __cnfn bitselect(long a, long b, long c);\n" |
39237 | "ulong __ovld __cnfn bitselect(ulong a, ulong b, ulong c);\n" |
39238 | "long2 __ovld __cnfn bitselect(long2 a, long2 b, long2 c);\n" |
39239 | "ulong2 __ovld __cnfn bitselect(ulong2 a, ulong2 b, ulong2 c);\n" |
39240 | "long3 __ovld __cnfn bitselect(long3 a, long3 b, long3 c);\n" |
39241 | "ulong3 __ovld __cnfn bitselect(ulong3 a, ulong3 b, ulong3 c);\n" |
39242 | "long4 __ovld __cnfn bitselect(long4 a, long4 b, long4 c);\n" |
39243 | "ulong4 __ovld __cnfn bitselect(ulong4 a, ulong4 b, ulong4 c);\n" |
39244 | "long8 __ovld __cnfn bitselect(long8 a, long8 b, long8 c);\n" |
39245 | "ulong8 __ovld __cnfn bitselect(ulong8 a, ulong8 b, ulong8 c);\n" |
39246 | "long16 __ovld __cnfn bitselect(long16 a, long16 b, long16 c);\n" |
39247 | "ulong16 __ovld __cnfn bitselect(ulong16 a, ulong16 b, ulong16 c);\n" |
39248 | "float __ovld __cnfn bitselect(float a, float b, float c);\n" |
39249 | "float2 __ovld __cnfn bitselect(float2 a, float2 b, float2 c);\n" |
39250 | "float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c);\n" |
39251 | "float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c);\n" |
39252 | "float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c);\n" |
39253 | "float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c);\n" |
39254 | "#ifdef cl_khr_fp64\n" |
39255 | "double __ovld __cnfn bitselect(double a, double b, double c);\n" |
39256 | "double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c);\n" |
39257 | "double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c);\n" |
39258 | "double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c);\n" |
39259 | "double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c);\n" |
39260 | "double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c);\n" |
39261 | "#endif //cl_khr_fp64\n" |
39262 | "#ifdef cl_khr_fp16\n" |
39263 | "half __ovld __cnfn bitselect(half a, half b, half c);\n" |
39264 | "half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c);\n" |
39265 | "half3 __ovld __cnfn bitselect(half3 a, half3 b, half3 c);\n" |
39266 | "half4 __ovld __cnfn bitselect(half4 a, half4 b, half4 c);\n" |
39267 | "half8 __ovld __cnfn bitselect(half8 a, half8 b, half8 c);\n" |
39268 | "half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);\n" |
39269 | "#endif //cl_khr_fp16\n" |
39270 | "\n" |
39271 | "/**\n" |
39272 | " * For each component of a vector type,\n" |
39273 | " * result[i] = if MSB of c[i] is set ? b[i] : a[i].\n" |
39274 | " * For a scalar type, result = c ? b : a.\n" |
39275 | " * b and a must have the same type.\n" |
39276 | " * c must have the same number of elements and bits as a.\n" |
39277 | " */\n" |
39278 | "char __ovld __cnfn select(char a, char b, char c);\n" |
39279 | "uchar __ovld __cnfn select(uchar a, uchar b, char c);\n" |
39280 | "char2 __ovld __cnfn select(char2 a, char2 b, char2 c);\n" |
39281 | "uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, char2 c);\n" |
39282 | "char3 __ovld __cnfn select(char3 a, char3 b, char3 c);\n" |
39283 | "uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, char3 c);\n" |
39284 | "char4 __ovld __cnfn select(char4 a, char4 b, char4 c);\n" |
39285 | "uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, char4 c);\n" |
39286 | "char8 __ovld __cnfn select(char8 a, char8 b, char8 c);\n" |
39287 | "uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);\n" |
39288 | "char16 __ovld __cnfn select(char16 a, char16 b, char16 c);\n" |
39289 | "uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);\n" |
39290 | "\n" |
39291 | "short __ovld __cnfn select(short a, short b, short c);\n" |
39292 | "ushort __ovld __cnfn select(ushort a, ushort b, short c);\n" |
39293 | "short2 __ovld __cnfn select(short2 a, short2 b, short2 c);\n" |
39294 | "ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, short2 c);\n" |
39295 | "short3 __ovld __cnfn select(short3 a, short3 b, short3 c);\n" |
39296 | "ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, short3 c);\n" |
39297 | "short4 __ovld __cnfn select(short4 a, short4 b, short4 c);\n" |
39298 | "ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, short4 c);\n" |
39299 | "short8 __ovld __cnfn select(short8 a, short8 b, short8 c);\n" |
39300 | "ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);\n" |
39301 | "short16 __ovld __cnfn select(short16 a, short16 b, short16 c);\n" |
39302 | "ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);\n" |
39303 | "\n" |
39304 | "int __ovld __cnfn select(int a, int b, int c);\n" |
39305 | "uint __ovld __cnfn select(uint a, uint b, int c);\n" |
39306 | "int2 __ovld __cnfn select(int2 a, int2 b, int2 c);\n" |
39307 | "uint2 __ovld __cnfn select(uint2 a, uint2 b, int2 c);\n" |
39308 | "int3 __ovld __cnfn select(int3 a, int3 b, int3 c);\n" |
39309 | "uint3 __ovld __cnfn select(uint3 a, uint3 b, int3 c);\n" |
39310 | "int4 __ovld __cnfn select(int4 a, int4 b, int4 c);\n" |
39311 | "uint4 __ovld __cnfn select(uint4 a, uint4 b, int4 c);\n" |
39312 | "int8 __ovld __cnfn select(int8 a, int8 b, int8 c);\n" |
39313 | "uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);\n" |
39314 | "int16 __ovld __cnfn select(int16 a, int16 b, int16 c);\n" |
39315 | "uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);\n" |
39316 | "float __ovld __cnfn select(float a, float b, int c);\n" |
39317 | "float2 __ovld __cnfn select(float2 a, float2 b, int2 c);\n" |
39318 | "float3 __ovld __cnfn select(float3 a, float3 b, int3 c);\n" |
39319 | "float4 __ovld __cnfn select(float4 a, float4 b, int4 c);\n" |
39320 | "float8 __ovld __cnfn select(float8 a, float8 b, int8 c);\n" |
39321 | "float16 __ovld __cnfn select(float16 a, float16 b, int16 c);\n" |
39322 | "\n" |
39323 | "long __ovld __cnfn select(long a, long b, long c);\n" |
39324 | "ulong __ovld __cnfn select(ulong a, ulong b, long c);\n" |
39325 | "long2 __ovld __cnfn select(long2 a, long2 b, long2 c);\n" |
39326 | "ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, long2 c);\n" |
39327 | "long3 __ovld __cnfn select(long3 a, long3 b, long3 c);\n" |
39328 | "ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, long3 c);\n" |
39329 | "long4 __ovld __cnfn select(long4 a, long4 b, long4 c);\n" |
39330 | "ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, long4 c);\n" |
39331 | "long8 __ovld __cnfn select(long8 a, long8 b, long8 c);\n" |
39332 | "ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);\n" |
39333 | "long16 __ovld __cnfn select(long16 a, long16 b, long16 c);\n" |
39334 | "ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);\n" |
39335 | "\n" |
39336 | "char __ovld __cnfn select(char a, char b, uchar c);\n" |
39337 | "uchar __ovld __cnfn select(uchar a, uchar b, uchar c);\n" |
39338 | "char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);\n" |
39339 | "uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uchar2 c);\n" |
39340 | "char3 __ovld __cnfn select(char3 a, char3 b, uchar3 c);\n" |
39341 | "uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uchar3 c);\n" |
39342 | "char4 __ovld __cnfn select(char4 a, char4 b, uchar4 c);\n" |
39343 | "uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uchar4 c);\n" |
39344 | "char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);\n" |
39345 | "uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);\n" |
39346 | "char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);\n" |
39347 | "uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);\n" |
39348 | "\n" |
39349 | "short __ovld __cnfn select(short a, short b, ushort c);\n" |
39350 | "ushort __ovld __cnfn select(ushort a, ushort b, ushort c);\n" |
39351 | "short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);\n" |
39352 | "ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ushort2 c);\n" |
39353 | "short3 __ovld __cnfn select(short3 a, short3 b, ushort3 c);\n" |
39354 | "ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ushort3 c);\n" |
39355 | "short4 __ovld __cnfn select(short4 a, short4 b, ushort4 c);\n" |
39356 | "ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ushort4 c);\n" |
39357 | "short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);\n" |
39358 | "ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);\n" |
39359 | "short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);\n" |
39360 | "ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);\n" |
39361 | "\n" |
39362 | "int __ovld __cnfn select(int a, int b, uint c);\n" |
39363 | "uint __ovld __cnfn select(uint a, uint b, uint c);\n" |
39364 | "int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);\n" |
39365 | "uint2 __ovld __cnfn select(uint2 a, uint2 b, uint2 c);\n" |
39366 | "int3 __ovld __cnfn select(int3 a, int3 b, uint3 c);\n" |
39367 | "uint3 __ovld __cnfn select(uint3 a, uint3 b, uint3 c);\n" |
39368 | "int4 __ovld __cnfn select(int4 a, int4 b, uint4 c);\n" |
39369 | "uint4 __ovld __cnfn select(uint4 a, uint4 b, uint4 c);\n" |
39370 | "int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);\n" |
39371 | "uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);\n" |
39372 | "int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);\n" |
39373 | "uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);\n" |
39374 | "float __ovld __cnfn select(float a, float b, uint c);\n" |
39375 | "float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);\n" |
39376 | "float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);\n" |
39377 | "float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);\n" |
39378 | "float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);\n" |
39379 | "float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);\n" |
39380 | "\n" |
39381 | "long __ovld __cnfn select(long a, long b, ulong c);\n" |
39382 | "ulong __ovld __cnfn select(ulong a, ulong b, ulong c);\n" |
39383 | "long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);\n" |
39384 | "ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ulong2 c);\n" |
39385 | "long3 __ovld __cnfn select(long3 a, long3 b, ulong3 c);\n" |
39386 | "ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ulong3 c);\n" |
39387 | "long4 __ovld __cnfn select(long4 a, long4 b, ulong4 c);\n" |
39388 | "ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ulong4 c);\n" |
39389 | "long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);\n" |
39390 | "ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);\n" |
39391 | "long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);\n" |
39392 | "ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);\n" |
39393 | "\n" |
39394 | "#ifdef cl_khr_fp64\n" |
39395 | "double __ovld __cnfn select(double a, double b, long c);\n" |
39396 | "double2 __ovld __cnfn select(double2 a, double2 b, long2 c);\n" |
39397 | "double3 __ovld __cnfn select(double3 a, double3 b, long3 c);\n" |
39398 | "double4 __ovld __cnfn select(double4 a, double4 b, long4 c);\n" |
39399 | "double8 __ovld __cnfn select(double8 a, double8 b, long8 c);\n" |
39400 | "double16 __ovld __cnfn select(double16 a, double16 b, long16 c);\n" |
39401 | "double __ovld __cnfn select(double a, double b, ulong c);\n" |
39402 | "double2 __ovld __cnfn select(double2 a, double2 b, ulong2 c);\n" |
39403 | "double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c);\n" |
39404 | "double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c);\n" |
39405 | "double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c);\n" |
39406 | "double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c);\n" |
39407 | "#endif //cl_khr_fp64\n" |
39408 | "#ifdef cl_khr_fp16\n" |
39409 | "half __ovld __cnfn select(half a, half b, short c);\n" |
39410 | "half2 __ovld __cnfn select(half2 a, half2 b, short2 c);\n" |
39411 | "half3 __ovld __cnfn select(half3 a, half3 b, short3 c);\n" |
39412 | "half4 __ovld __cnfn select(half4 a, half4 b, short4 c);\n" |
39413 | "half8 __ovld __cnfn select(half8 a, half8 b, short8 c);\n" |
39414 | "half16 __ovld __cnfn select(half16 a, half16 b, short16 c);\n" |
39415 | "half __ovld __cnfn select(half a, half b, ushort c);\n" |
39416 | "half2 __ovld __cnfn select(half2 a, half2 b, ushort2 c);\n" |
39417 | "half3 __ovld __cnfn select(half3 a, half3 b, ushort3 c);\n" |
39418 | "half4 __ovld __cnfn select(half4 a, half4 b, ushort4 c);\n" |
39419 | "half8 __ovld __cnfn select(half8 a, half8 b, ushort8 c);\n" |
39420 | "half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c);\n" |
39421 | "#endif //cl_khr_fp16\n" |
39422 | "\n" |
39423 | "// OpenCL v1.1 s6.11.7, v1.2 s6.12.7, v2.0 s6.13.7 - Vector Data Load and Store Functions\n" |
39424 | "// OpenCL extensions v1.1 s9.6.6, v1.2 s9.5.6, v2.0 s9.4.6 - Vector Data Load and Store Functions for Half Type\n" |
39425 | "/**\n" |
39426 | " * Use generic type gentype to indicate the built-in data types\n" |
39427 | " * char, uchar, short, ushort, int, uint, long, ulong, float,\n" |
39428 | " * double or half.\n" |
39429 | " *\n" |
39430 | " * vloadn return sizeof (gentypen) bytes of data read from address (p + (offset * n)).\n" |
39431 | " *\n" |
39432 | " * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)).\n" |
39433 | " *\n" |
39434 | " * The address computed as (p + (offset * n)) must be\n" |
39435 | " * 8-bit aligned if gentype is char, uchar;\n" |
39436 | " * 16-bit aligned if gentype is short, ushort, half;\n" |
39437 | " * 32-bit aligned if gentype is int, uint, float;\n" |
39438 | " * 64-bit aligned if gentype is long, ulong, double.\n" |
39439 | " */\n" |
39440 | "\n" |
39441 | "char2 __ovld vload2(size_t offset, const __constant char *p);\n" |
39442 | "uchar2 __ovld vload2(size_t offset, const __constant uchar *p);\n" |
39443 | "short2 __ovld vload2(size_t offset, const __constant short *p);\n" |
39444 | "ushort2 __ovld vload2(size_t offset, const __constant ushort *p);\n" |
39445 | "int2 __ovld vload2(size_t offset, const __constant int *p);\n" |
39446 | "uint2 __ovld vload2(size_t offset, const __constant uint *p);\n" |
39447 | "long2 __ovld vload2(size_t offset, const __constant long *p);\n" |
39448 | "ulong2 __ovld vload2(size_t offset, const __constant ulong *p);\n" |
39449 | "float2 __ovld vload2(size_t offset, const __constant float *p);\n" |
39450 | "char3 __ovld vload3(size_t offset, const __constant char *p);\n" |
39451 | "uchar3 __ovld vload3(size_t offset, const __constant uchar *p);\n" |
39452 | "short3 __ovld vload3(size_t offset, const __constant short *p);\n" |
39453 | "ushort3 __ovld vload3(size_t offset, const __constant ushort *p);\n" |
39454 | "int3 __ovld vload3(size_t offset, const __constant int *p);\n" |
39455 | "uint3 __ovld vload3(size_t offset, const __constant uint *p);\n" |
39456 | "long3 __ovld vload3(size_t offset, const __constant long *p);\n" |
39457 | "ulong3 __ovld vload3(size_t offset, const __constant ulong *p);\n" |
39458 | "float3 __ovld vload3(size_t offset, const __constant float *p);\n" |
39459 | "char4 __ovld vload4(size_t offset, const __constant char *p);\n" |
39460 | "uchar4 __ovld vload4(size_t offset, const __constant uchar *p);\n" |
39461 | "short4 __ovld vload4(size_t offset, const __constant short *p);\n" |
39462 | "ushort4 __ovld vload4(size_t offset, const __constant ushort *p);\n" |
39463 | "int4 __ovld vload4(size_t offset, const __constant int *p);\n" |
39464 | "uint4 __ovld vload4(size_t offset, const __constant uint *p);\n" |
39465 | "long4 __ovld vload4(size_t offset, const __constant long *p);\n" |
39466 | "ulong4 __ovld vload4(size_t offset, const __constant ulong *p);\n" |
39467 | "float4 __ovld vload4(size_t offset, const __constant float *p);\n" |
39468 | "char8 __ovld vload8(size_t offset, const __constant char *p);\n" |
39469 | "uchar8 __ovld vload8(size_t offset, const __constant uchar *p);\n" |
39470 | "short8 __ovld vload8(size_t offset, const __constant short *p);\n" |
39471 | "ushort8 __ovld vload8(size_t offset, const __constant ushort *p);\n" |
39472 | "int8 __ovld vload8(size_t offset, const __constant int *p);\n" |
39473 | "uint8 __ovld vload8(size_t offset, const __constant uint *p);\n" |
39474 | "long8 __ovld vload8(size_t offset, const __constant long *p);\n" |
39475 | "ulong8 __ovld vload8(size_t offset, const __constant ulong *p);\n" |
39476 | "float8 __ovld vload8(size_t offset, const __constant float *p);\n" |
39477 | "char16 __ovld vload16(size_t offset, const __constant char *p);\n" |
39478 | "uchar16 __ovld vload16(size_t offset, const __constant uchar *p);\n" |
39479 | "short16 __ovld vload16(size_t offset, const __constant short *p);\n" |
39480 | "ushort16 __ovld vload16(size_t offset, const __constant ushort *p);\n" |
39481 | "int16 __ovld vload16(size_t offset, const __constant int *p);\n" |
39482 | "uint16 __ovld vload16(size_t offset, const __constant uint *p);\n" |
39483 | "long16 __ovld vload16(size_t offset, const __constant long *p);\n" |
39484 | "ulong16 __ovld vload16(size_t offset, const __constant ulong *p);\n" |
39485 | "float16 __ovld vload16(size_t offset, const __constant float *p);\n" |
39486 | "#ifdef cl_khr_fp64\n" |
39487 | "double2 __ovld vload2(size_t offset, const __constant double *p);\n" |
39488 | "double3 __ovld vload3(size_t offset, const __constant double *p);\n" |
39489 | "double4 __ovld vload4(size_t offset, const __constant double *p);\n" |
39490 | "double8 __ovld vload8(size_t offset, const __constant double *p);\n" |
39491 | "double16 __ovld vload16(size_t offset, const __constant double *p);\n" |
39492 | "#endif //cl_khr_fp64\n" |
39493 | "\n" |
39494 | "#ifdef cl_khr_fp16\n" |
39495 | "half __ovld vload(size_t offset, const __constant half *p);\n" |
39496 | "half2 __ovld vload2(size_t offset, const __constant half *p);\n" |
39497 | "half3 __ovld vload3(size_t offset, const __constant half *p);\n" |
39498 | "half4 __ovld vload4(size_t offset, const __constant half *p);\n" |
39499 | "half8 __ovld vload8(size_t offset, const __constant half *p);\n" |
39500 | "half16 __ovld vload16(size_t offset, const __constant half *p);\n" |
39501 | "#endif //cl_khr_fp16\n" |
39502 | "\n" |
39503 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
39504 | "char2 __ovld vload2(size_t offset, const char *p);\n" |
39505 | "uchar2 __ovld vload2(size_t offset, const uchar *p);\n" |
39506 | "short2 __ovld vload2(size_t offset, const short *p);\n" |
39507 | "ushort2 __ovld vload2(size_t offset, const ushort *p);\n" |
39508 | "int2 __ovld vload2(size_t offset, const int *p);\n" |
39509 | "uint2 __ovld vload2(size_t offset, const uint *p);\n" |
39510 | "long2 __ovld vload2(size_t offset, const long *p);\n" |
39511 | "ulong2 __ovld vload2(size_t offset, const ulong *p);\n" |
39512 | "float2 __ovld vload2(size_t offset, const float *p);\n" |
39513 | "char3 __ovld vload3(size_t offset, const char *p);\n" |
39514 | "uchar3 __ovld vload3(size_t offset, const uchar *p);\n" |
39515 | "short3 __ovld vload3(size_t offset, const short *p);\n" |
39516 | "ushort3 __ovld vload3(size_t offset, const ushort *p);\n" |
39517 | "int3 __ovld vload3(size_t offset, const int *p);\n" |
39518 | "uint3 __ovld vload3(size_t offset, const uint *p);\n" |
39519 | "long3 __ovld vload3(size_t offset, const long *p);\n" |
39520 | "ulong3 __ovld vload3(size_t offset, const ulong *p);\n" |
39521 | "float3 __ovld vload3(size_t offset, const float *p);\n" |
39522 | "char4 __ovld vload4(size_t offset, const char *p);\n" |
39523 | "uchar4 __ovld vload4(size_t offset, const uchar *p);\n" |
39524 | "short4 __ovld vload4(size_t offset, const short *p);\n" |
39525 | "ushort4 __ovld vload4(size_t offset, const ushort *p);\n" |
39526 | "int4 __ovld vload4(size_t offset, const int *p);\n" |
39527 | "uint4 __ovld vload4(size_t offset, const uint *p);\n" |
39528 | "long4 __ovld vload4(size_t offset, const long *p);\n" |
39529 | "ulong4 __ovld vload4(size_t offset, const ulong *p);\n" |
39530 | "float4 __ovld vload4(size_t offset, const float *p);\n" |
39531 | "char8 __ovld vload8(size_t offset, const char *p);\n" |
39532 | "uchar8 __ovld vload8(size_t offset, const uchar *p);\n" |
39533 | "short8 __ovld vload8(size_t offset, const short *p);\n" |
39534 | "ushort8 __ovld vload8(size_t offset, const ushort *p);\n" |
39535 | "int8 __ovld vload8(size_t offset, const int *p);\n" |
39536 | "uint8 __ovld vload8(size_t offset, const uint *p);\n" |
39537 | "long8 __ovld vload8(size_t offset, const long *p);\n" |
39538 | "ulong8 __ovld vload8(size_t offset, const ulong *p);\n" |
39539 | "float8 __ovld vload8(size_t offset, const float *p);\n" |
39540 | "char16 __ovld vload16(size_t offset, const char *p);\n" |
39541 | "uchar16 __ovld vload16(size_t offset, const uchar *p);\n" |
39542 | "short16 __ovld vload16(size_t offset, const short *p);\n" |
39543 | "ushort16 __ovld vload16(size_t offset, const ushort *p);\n" |
39544 | "int16 __ovld vload16(size_t offset, const int *p);\n" |
39545 | "uint16 __ovld vload16(size_t offset, const uint *p);\n" |
39546 | "long16 __ovld vload16(size_t offset, const long *p);\n" |
39547 | "ulong16 __ovld vload16(size_t offset, const ulong *p);\n" |
39548 | "float16 __ovld vload16(size_t offset, const float *p);\n" |
39549 | "\n" |
39550 | "#ifdef cl_khr_fp64\n" |
39551 | "double2 __ovld vload2(size_t offset, const double *p);\n" |
39552 | "double3 __ovld vload3(size_t offset, const double *p);\n" |
39553 | "double4 __ovld vload4(size_t offset, const double *p);\n" |
39554 | "double8 __ovld vload8(size_t offset, const double *p);\n" |
39555 | "double16 __ovld vload16(size_t offset, const double *p);\n" |
39556 | "#endif //cl_khr_fp64\n" |
39557 | "\n" |
39558 | "#ifdef cl_khr_fp16\n" |
39559 | "half __ovld vload(size_t offset, const half *p);\n" |
39560 | "half2 __ovld vload2(size_t offset, const half *p);\n" |
39561 | "half3 __ovld vload3(size_t offset, const half *p);\n" |
39562 | "half4 __ovld vload4(size_t offset, const half *p);\n" |
39563 | "half8 __ovld vload8(size_t offset, const half *p);\n" |
39564 | "half16 __ovld vload16(size_t offset, const half *p);\n" |
39565 | "#endif //cl_khr_fp16\n" |
39566 | "#else\n" |
39567 | "char2 __ovld vload2(size_t offset, const __global char *p);\n" |
39568 | "uchar2 __ovld vload2(size_t offset, const __global uchar *p);\n" |
39569 | "short2 __ovld vload2(size_t offset, const __global short *p);\n" |
39570 | "ushort2 __ovld vload2(size_t offset, const __global ushort *p);\n" |
39571 | "int2 __ovld vload2(size_t offset, const __global int *p);\n" |
39572 | "uint2 __ovld vload2(size_t offset, const __global uint *p);\n" |
39573 | "long2 __ovld vload2(size_t offset, const __global long *p);\n" |
39574 | "ulong2 __ovld vload2(size_t offset, const __global ulong *p);\n" |
39575 | "float2 __ovld vload2(size_t offset, const __global float *p);\n" |
39576 | "char3 __ovld vload3(size_t offset, const __global char *p);\n" |
39577 | "uchar3 __ovld vload3(size_t offset, const __global uchar *p);\n" |
39578 | "short3 __ovld vload3(size_t offset, const __global short *p);\n" |
39579 | "ushort3 __ovld vload3(size_t offset, const __global ushort *p);\n" |
39580 | "int3 __ovld vload3(size_t offset, const __global int *p);\n" |
39581 | "uint3 __ovld vload3(size_t offset, const __global uint *p);\n" |
39582 | "long3 __ovld vload3(size_t offset, const __global long *p);\n" |
39583 | "ulong3 __ovld vload3(size_t offset, const __global ulong *p);\n" |
39584 | "float3 __ovld vload3(size_t offset, const __global float *p);\n" |
39585 | "char4 __ovld vload4(size_t offset, const __global char *p);\n" |
39586 | "uchar4 __ovld vload4(size_t offset, const __global uchar *p);\n" |
39587 | "short4 __ovld vload4(size_t offset, const __global short *p);\n" |
39588 | "ushort4 __ovld vload4(size_t offset, const __global ushort *p);\n" |
39589 | "int4 __ovld vload4(size_t offset, const __global int *p);\n" |
39590 | "uint4 __ovld vload4(size_t offset, const __global uint *p);\n" |
39591 | "long4 __ovld vload4(size_t offset, const __global long *p);\n" |
39592 | "ulong4 __ovld vload4(size_t offset, const __global ulong *p);\n" |
39593 | "float4 __ovld vload4(size_t offset, const __global float *p);\n" |
39594 | "char8 __ovld vload8(size_t offset, const __global char *p);\n" |
39595 | "uchar8 __ovld vload8(size_t offset, const __global uchar *p);\n" |
39596 | "short8 __ovld vload8(size_t offset, const __global short *p);\n" |
39597 | "ushort8 __ovld vload8(size_t offset, const __global ushort *p);\n" |
39598 | "int8 __ovld vload8(size_t offset, const __global int *p);\n" |
39599 | "uint8 __ovld vload8(size_t offset, const __global uint *p);\n" |
39600 | "long8 __ovld vload8(size_t offset, const __global long *p);\n" |
39601 | "ulong8 __ovld vload8(size_t offset, const __global ulong *p);\n" |
39602 | "float8 __ovld vload8(size_t offset, const __global float *p);\n" |
39603 | "char16 __ovld vload16(size_t offset, const __global char *p);\n" |
39604 | "uchar16 __ovld vload16(size_t offset, const __global uchar *p);\n" |
39605 | "short16 __ovld vload16(size_t offset, const __global short *p);\n" |
39606 | "ushort16 __ovld vload16(size_t offset, const __global ushort *p);\n" |
39607 | "int16 __ovld vload16(size_t offset, const __global int *p);\n" |
39608 | "uint16 __ovld vload16(size_t offset, const __global uint *p);\n" |
39609 | "long16 __ovld vload16(size_t offset, const __global long *p);\n" |
39610 | "ulong16 __ovld vload16(size_t offset, const __global ulong *p);\n" |
39611 | "float16 __ovld vload16(size_t offset, const __global float *p);\n" |
39612 | "char2 __ovld vload2(size_t offset, const __local char *p);\n" |
39613 | "uchar2 __ovld vload2(size_t offset, const __local uchar *p);\n" |
39614 | "short2 __ovld vload2(size_t offset, const __local short *p);\n" |
39615 | "ushort2 __ovld vload2(size_t offset, const __local ushort *p);\n" |
39616 | "int2 __ovld vload2(size_t offset, const __local int *p);\n" |
39617 | "uint2 __ovld vload2(size_t offset, const __local uint *p);\n" |
39618 | "long2 __ovld vload2(size_t offset, const __local long *p);\n" |
39619 | "ulong2 __ovld vload2(size_t offset, const __local ulong *p);\n" |
39620 | "float2 __ovld vload2(size_t offset, const __local float *p);\n" |
39621 | "char3 __ovld vload3(size_t offset, const __local char *p);\n" |
39622 | "uchar3 __ovld vload3(size_t offset, const __local uchar *p);\n" |
39623 | "short3 __ovld vload3(size_t offset, const __local short *p);\n" |
39624 | "ushort3 __ovld vload3(size_t offset, const __local ushort *p);\n" |
39625 | "int3 __ovld vload3(size_t offset, const __local int *p);\n" |
39626 | "uint3 __ovld vload3(size_t offset, const __local uint *p);\n" |
39627 | "long3 __ovld vload3(size_t offset, const __local long *p);\n" |
39628 | "ulong3 __ovld vload3(size_t offset, const __local ulong *p);\n" |
39629 | "float3 __ovld vload3(size_t offset, const __local float *p);\n" |
39630 | "char4 __ovld vload4(size_t offset, const __local char *p);\n" |
39631 | "uchar4 __ovld vload4(size_t offset, const __local uchar *p);\n" |
39632 | "short4 __ovld vload4(size_t offset, const __local short *p);\n" |
39633 | "ushort4 __ovld vload4(size_t offset, const __local ushort *p);\n" |
39634 | "int4 __ovld vload4(size_t offset, const __local int *p);\n" |
39635 | "uint4 __ovld vload4(size_t offset, const __local uint *p);\n" |
39636 | "long4 __ovld vload4(size_t offset, const __local long *p);\n" |
39637 | "ulong4 __ovld vload4(size_t offset, const __local ulong *p);\n" |
39638 | "float4 __ovld vload4(size_t offset, const __local float *p);\n" |
39639 | "char8 __ovld vload8(size_t offset, const __local char *p);\n" |
39640 | "uchar8 __ovld vload8(size_t offset, const __local uchar *p);\n" |
39641 | "short8 __ovld vload8(size_t offset, const __local short *p);\n" |
39642 | "ushort8 __ovld vload8(size_t offset, const __local ushort *p);\n" |
39643 | "int8 __ovld vload8(size_t offset, const __local int *p);\n" |
39644 | "uint8 __ovld vload8(size_t offset, const __local uint *p);\n" |
39645 | "long8 __ovld vload8(size_t offset, const __local long *p);\n" |
39646 | "ulong8 __ovld vload8(size_t offset, const __local ulong *p);\n" |
39647 | "float8 __ovld vload8(size_t offset, const __local float *p);\n" |
39648 | "char16 __ovld vload16(size_t offset, const __local char *p);\n" |
39649 | "uchar16 __ovld vload16(size_t offset, const __local uchar *p);\n" |
39650 | "short16 __ovld vload16(size_t offset, const __local short *p);\n" |
39651 | "ushort16 __ovld vload16(size_t offset, const __local ushort *p);\n" |
39652 | "int16 __ovld vload16(size_t offset, const __local int *p);\n" |
39653 | "uint16 __ovld vload16(size_t offset, const __local uint *p);\n" |
39654 | "long16 __ovld vload16(size_t offset, const __local long *p);\n" |
39655 | "ulong16 __ovld vload16(size_t offset, const __local ulong *p);\n" |
39656 | "float16 __ovld vload16(size_t offset, const __local float *p);\n" |
39657 | "char2 __ovld vload2(size_t offset, const __private char *p);\n" |
39658 | "uchar2 __ovld vload2(size_t offset, const __private uchar *p);\n" |
39659 | "short2 __ovld vload2(size_t offset, const __private short *p);\n" |
39660 | "ushort2 __ovld vload2(size_t offset, const __private ushort *p);\n" |
39661 | "int2 __ovld vload2(size_t offset, const __private int *p);\n" |
39662 | "uint2 __ovld vload2(size_t offset, const __private uint *p);\n" |
39663 | "long2 __ovld vload2(size_t offset, const __private long *p);\n" |
39664 | "ulong2 __ovld vload2(size_t offset, const __private ulong *p);\n" |
39665 | "float2 __ovld vload2(size_t offset, const __private float *p);\n" |
39666 | "char3 __ovld vload3(size_t offset, const __private char *p);\n" |
39667 | "uchar3 __ovld vload3(size_t offset, const __private uchar *p);\n" |
39668 | "short3 __ovld vload3(size_t offset, const __private short *p);\n" |
39669 | "ushort3 __ovld vload3(size_t offset, const __private ushort *p);\n" |
39670 | "int3 __ovld vload3(size_t offset, const __private int *p);\n" |
39671 | "uint3 __ovld vload3(size_t offset, const __private uint *p);\n" |
39672 | "long3 __ovld vload3(size_t offset, const __private long *p);\n" |
39673 | "ulong3 __ovld vload3(size_t offset, const __private ulong *p);\n" |
39674 | "float3 __ovld vload3(size_t offset, const __private float *p);\n" |
39675 | "char4 __ovld vload4(size_t offset, const __private char *p);\n" |
39676 | "uchar4 __ovld vload4(size_t offset, const __private uchar *p);\n" |
39677 | "short4 __ovld vload4(size_t offset, const __private short *p);\n" |
39678 | "ushort4 __ovld vload4(size_t offset, const __private ushort *p);\n" |
39679 | "int4 __ovld vload4(size_t offset, const __private int *p);\n" |
39680 | "uint4 __ovld vload4(size_t offset, const __private uint *p);\n" |
39681 | "long4 __ovld vload4(size_t offset, const __private long *p);\n" |
39682 | "ulong4 __ovld vload4(size_t offset, const __private ulong *p);\n" |
39683 | "float4 __ovld vload4(size_t offset, const __private float *p);\n" |
39684 | "char8 __ovld vload8(size_t offset, const __private char *p);\n" |
39685 | "uchar8 __ovld vload8(size_t offset, const __private uchar *p);\n" |
39686 | "short8 __ovld vload8(size_t offset, const __private short *p);\n" |
39687 | "ushort8 __ovld vload8(size_t offset, const __private ushort *p);\n" |
39688 | "int8 __ovld vload8(size_t offset, const __private int *p);\n" |
39689 | "uint8 __ovld vload8(size_t offset, const __private uint *p);\n" |
39690 | "long8 __ovld vload8(size_t offset, const __private long *p);\n" |
39691 | "ulong8 __ovld vload8(size_t offset, const __private ulong *p);\n" |
39692 | "float8 __ovld vload8(size_t offset, const __private float *p);\n" |
39693 | "char16 __ovld vload16(size_t offset, const __private char *p);\n" |
39694 | "uchar16 __ovld vload16(size_t offset, const __private uchar *p);\n" |
39695 | "short16 __ovld vload16(size_t offset, const __private short *p);\n" |
39696 | "ushort16 __ovld vload16(size_t offset, const __private ushort *p);\n" |
39697 | "int16 __ovld vload16(size_t offset, const __private int *p);\n" |
39698 | "uint16 __ovld vload16(size_t offset, const __private uint *p);\n" |
39699 | "long16 __ovld vload16(size_t offset, const __private long *p);\n" |
39700 | "ulong16 __ovld vload16(size_t offset, const __private ulong *p);\n" |
39701 | "float16 __ovld vload16(size_t offset, const __private float *p);\n" |
39702 | "\n" |
39703 | "#ifdef cl_khr_fp64\n" |
39704 | "double2 __ovld vload2(size_t offset, const __global double *p);\n" |
39705 | "double3 __ovld vload3(size_t offset, const __global double *p);\n" |
39706 | "double4 __ovld vload4(size_t offset, const __global double *p);\n" |
39707 | "double8 __ovld vload8(size_t offset, const __global double *p);\n" |
39708 | "double16 __ovld vload16(size_t offset, const __global double *p);\n" |
39709 | "double2 __ovld vload2(size_t offset, const __local double *p);\n" |
39710 | "double3 __ovld vload3(size_t offset, const __local double *p);\n" |
39711 | "double4 __ovld vload4(size_t offset, const __local double *p);\n" |
39712 | "double8 __ovld vload8(size_t offset, const __local double *p);\n" |
39713 | "double16 __ovld vload16(size_t offset, const __local double *p);\n" |
39714 | "double2 __ovld vload2(size_t offset, const __private double *p);\n" |
39715 | "double3 __ovld vload3(size_t offset, const __private double *p);\n" |
39716 | "double4 __ovld vload4(size_t offset, const __private double *p);\n" |
39717 | "double8 __ovld vload8(size_t offset, const __private double *p);\n" |
39718 | "double16 __ovld vload16(size_t offset, const __private double *p);\n" |
39719 | "#endif //cl_khr_fp64\n" |
39720 | "\n" |
39721 | "#ifdef cl_khr_fp16\n" |
39722 | "half __ovld vload(size_t offset, const __global half *p);\n" |
39723 | "half2 __ovld vload2(size_t offset, const __global half *p);\n" |
39724 | "half3 __ovld vload3(size_t offset, const __global half *p);\n" |
39725 | "half4 __ovld vload4(size_t offset, const __global half *p);\n" |
39726 | "half8 __ovld vload8(size_t offset, const __global half *p);\n" |
39727 | "half16 __ovld vload16(size_t offset, const __global half *p);\n" |
39728 | "half __ovld vload(size_t offset, const __local half *p);\n" |
39729 | "half2 __ovld vload2(size_t offset, const __local half *p);\n" |
39730 | "half3 __ovld vload3(size_t offset, const __local half *p);\n" |
39731 | "half4 __ovld vload4(size_t offset, const __local half *p);\n" |
39732 | "half8 __ovld vload8(size_t offset, const __local half *p);\n" |
39733 | "half16 __ovld vload16(size_t offset, const __local half *p);\n" |
39734 | "half __ovld vload(size_t offset, const __private half *p);\n" |
39735 | "half2 __ovld vload2(size_t offset, const __private half *p);\n" |
39736 | "half3 __ovld vload3(size_t offset, const __private half *p);\n" |
39737 | "half4 __ovld vload4(size_t offset, const __private half *p);\n" |
39738 | "half8 __ovld vload8(size_t offset, const __private half *p);\n" |
39739 | "half16 __ovld vload16(size_t offset, const __private half *p);\n" |
39740 | "#endif //cl_khr_fp16\n" |
39741 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
39742 | "\n" |
39743 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
39744 | "void __ovld vstore2(char2 data, size_t offset, char *p);\n" |
39745 | "void __ovld vstore2(uchar2 data, size_t offset, uchar *p);\n" |
39746 | "void __ovld vstore2(short2 data, size_t offset, short *p);\n" |
39747 | "void __ovld vstore2(ushort2 data, size_t offset, ushort *p);\n" |
39748 | "void __ovld vstore2(int2 data, size_t offset, int *p);\n" |
39749 | "void __ovld vstore2(uint2 data, size_t offset, uint *p);\n" |
39750 | "void __ovld vstore2(long2 data, size_t offset, long *p);\n" |
39751 | "void __ovld vstore2(ulong2 data, size_t offset, ulong *p);\n" |
39752 | "void __ovld vstore2(float2 data, size_t offset, float *p);\n" |
39753 | "void __ovld vstore3(char3 data, size_t offset, char *p);\n" |
39754 | "void __ovld vstore3(uchar3 data, size_t offset, uchar *p);\n" |
39755 | "void __ovld vstore3(short3 data, size_t offset, short *p);\n" |
39756 | "void __ovld vstore3(ushort3 data, size_t offset, ushort *p);\n" |
39757 | "void __ovld vstore3(int3 data, size_t offset, int *p);\n" |
39758 | "void __ovld vstore3(uint3 data, size_t offset, uint *p);\n" |
39759 | "void __ovld vstore3(long3 data, size_t offset, long *p);\n" |
39760 | "void __ovld vstore3(ulong3 data, size_t offset, ulong *p);\n" |
39761 | "void __ovld vstore3(float3 data, size_t offset, float *p);\n" |
39762 | "void __ovld vstore4(char4 data, size_t offset, char *p);\n" |
39763 | "void __ovld vstore4(uchar4 data, size_t offset, uchar *p);\n" |
39764 | "void __ovld vstore4(short4 data, size_t offset, short *p);\n" |
39765 | "void __ovld vstore4(ushort4 data, size_t offset, ushort *p);\n" |
39766 | "void __ovld vstore4(int4 data, size_t offset, int *p);\n" |
39767 | "void __ovld vstore4(uint4 data, size_t offset, uint *p);\n" |
39768 | "void __ovld vstore4(long4 data, size_t offset, long *p);\n" |
39769 | "void __ovld vstore4(ulong4 data, size_t offset, ulong *p);\n" |
39770 | "void __ovld vstore4(float4 data, size_t offset, float *p);\n" |
39771 | "void __ovld vstore8(char8 data, size_t offset, char *p);\n" |
39772 | "void __ovld vstore8(uchar8 data, size_t offset, uchar *p);\n" |
39773 | "void __ovld vstore8(short8 data, size_t offset, short *p);\n" |
39774 | "void __ovld vstore8(ushort8 data, size_t offset, ushort *p);\n" |
39775 | "void __ovld vstore8(int8 data, size_t offset, int *p);\n" |
39776 | "void __ovld vstore8(uint8 data, size_t offset, uint *p);\n" |
39777 | "void __ovld vstore8(long8 data, size_t offset, long *p);\n" |
39778 | "void __ovld vstore8(ulong8 data, size_t offset, ulong *p);\n" |
39779 | "void __ovld vstore8(float8 data, size_t offset, float *p);\n" |
39780 | "void __ovld vstore16(char16 data, size_t offset, char *p);\n" |
39781 | "void __ovld vstore16(uchar16 data, size_t offset, uchar *p);\n" |
39782 | "void __ovld vstore16(short16 data, size_t offset, short *p);\n" |
39783 | "void __ovld vstore16(ushort16 data, size_t offset, ushort *p);\n" |
39784 | "void __ovld vstore16(int16 data, size_t offset, int *p);\n" |
39785 | "void __ovld vstore16(uint16 data, size_t offset, uint *p);\n" |
39786 | "void __ovld vstore16(long16 data, size_t offset, long *p);\n" |
39787 | "void __ovld vstore16(ulong16 data, size_t offset, ulong *p);\n" |
39788 | "void __ovld vstore16(float16 data, size_t offset, float *p);\n" |
39789 | "#ifdef cl_khr_fp64\n" |
39790 | "void __ovld vstore2(double2 data, size_t offset, double *p);\n" |
39791 | "void __ovld vstore3(double3 data, size_t offset, double *p);\n" |
39792 | "void __ovld vstore4(double4 data, size_t offset, double *p);\n" |
39793 | "void __ovld vstore8(double8 data, size_t offset, double *p);\n" |
39794 | "void __ovld vstore16(double16 data, size_t offset, double *p);\n" |
39795 | "#endif //cl_khr_fp64\n" |
39796 | "#ifdef cl_khr_fp16\n" |
39797 | "void __ovld vstore(half data, size_t offset, half *p);\n" |
39798 | "void __ovld vstore2(half2 data, size_t offset, half *p);\n" |
39799 | "void __ovld vstore3(half3 data, size_t offset, half *p);\n" |
39800 | "void __ovld vstore4(half4 data, size_t offset, half *p);\n" |
39801 | "void __ovld vstore8(half8 data, size_t offset, half *p);\n" |
39802 | "void __ovld vstore16(half16 data, size_t offset, half *p);\n" |
39803 | "#endif //cl_khr_fp16\n" |
39804 | "#else\n" |
39805 | "void __ovld vstore2(char2 data, size_t offset, __global char *p);\n" |
39806 | "void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p);\n" |
39807 | "void __ovld vstore2(short2 data, size_t offset, __global short *p);\n" |
39808 | "void __ovld vstore2(ushort2 data, size_t offset, __global ushort *p);\n" |
39809 | "void __ovld vstore2(int2 data, size_t offset, __global int *p);\n" |
39810 | "void __ovld vstore2(uint2 data, size_t offset, __global uint *p);\n" |
39811 | "void __ovld vstore2(long2 data, size_t offset, __global long *p);\n" |
39812 | "void __ovld vstore2(ulong2 data, size_t offset, __global ulong *p);\n" |
39813 | "void __ovld vstore2(float2 data, size_t offset, __global float *p);\n" |
39814 | "void __ovld vstore3(char3 data, size_t offset, __global char *p);\n" |
39815 | "void __ovld vstore3(uchar3 data, size_t offset, __global uchar *p);\n" |
39816 | "void __ovld vstore3(short3 data, size_t offset, __global short *p);\n" |
39817 | "void __ovld vstore3(ushort3 data, size_t offset, __global ushort *p);\n" |
39818 | "void __ovld vstore3(int3 data, size_t offset, __global int *p);\n" |
39819 | "void __ovld vstore3(uint3 data, size_t offset, __global uint *p);\n" |
39820 | "void __ovld vstore3(long3 data, size_t offset, __global long *p);\n" |
39821 | "void __ovld vstore3(ulong3 data, size_t offset, __global ulong *p);\n" |
39822 | "void __ovld vstore3(float3 data, size_t offset, __global float *p);\n" |
39823 | "void __ovld vstore4(char4 data, size_t offset, __global char *p);\n" |
39824 | "void __ovld vstore4(uchar4 data, size_t offset, __global uchar *p);\n" |
39825 | "void __ovld vstore4(short4 data, size_t offset, __global short *p);\n" |
39826 | "void __ovld vstore4(ushort4 data, size_t offset, __global ushort *p);\n" |
39827 | "void __ovld vstore4(int4 data, size_t offset, __global int *p);\n" |
39828 | "void __ovld vstore4(uint4 data, size_t offset, __global uint *p);\n" |
39829 | "void __ovld vstore4(long4 data, size_t offset, __global long *p);\n" |
39830 | "void __ovld vstore4(ulong4 data, size_t offset, __global ulong *p);\n" |
39831 | "void __ovld vstore4(float4 data, size_t offset, __global float *p);\n" |
39832 | "void __ovld vstore8(char8 data, size_t offset, __global char *p);\n" |
39833 | "void __ovld vstore8(uchar8 data, size_t offset, __global uchar *p);\n" |
39834 | "void __ovld vstore8(short8 data, size_t offset, __global short *p);\n" |
39835 | "void __ovld vstore8(ushort8 data, size_t offset, __global ushort *p);\n" |
39836 | "void __ovld vstore8(int8 data, size_t offset, __global int *p);\n" |
39837 | "void __ovld vstore8(uint8 data, size_t offset, __global uint *p);\n" |
39838 | "void __ovld vstore8(long8 data, size_t offset, __global long *p);\n" |
39839 | "void __ovld vstore8(ulong8 data, size_t offset, __global ulong *p);\n" |
39840 | "void __ovld vstore8(float8 data, size_t offset, __global float *p);\n" |
39841 | "void __ovld vstore16(char16 data, size_t offset, __global char *p);\n" |
39842 | "void __ovld vstore16(uchar16 data, size_t offset, __global uchar *p);\n" |
39843 | "void __ovld vstore16(short16 data, size_t offset, __global short *p);\n" |
39844 | "void __ovld vstore16(ushort16 data, size_t offset, __global ushort *p);\n" |
39845 | "void __ovld vstore16(int16 data, size_t offset, __global int *p);\n" |
39846 | "void __ovld vstore16(uint16 data, size_t offset, __global uint *p);\n" |
39847 | "void __ovld vstore16(long16 data, size_t offset, __global long *p);\n" |
39848 | "void __ovld vstore16(ulong16 data, size_t offset, __global ulong *p);\n" |
39849 | "void __ovld vstore16(float16 data, size_t offset, __global float *p);\n" |
39850 | "void __ovld vstore2(char2 data, size_t offset, __local char *p);\n" |
39851 | "void __ovld vstore2(uchar2 data, size_t offset, __local uchar *p);\n" |
39852 | "void __ovld vstore2(short2 data, size_t offset, __local short *p);\n" |
39853 | "void __ovld vstore2(ushort2 data, size_t offset, __local ushort *p);\n" |
39854 | "void __ovld vstore2(int2 data, size_t offset, __local int *p);\n" |
39855 | "void __ovld vstore2(uint2 data, size_t offset, __local uint *p);\n" |
39856 | "void __ovld vstore2(long2 data, size_t offset, __local long *p);\n" |
39857 | "void __ovld vstore2(ulong2 data, size_t offset, __local ulong *p);\n" |
39858 | "void __ovld vstore2(float2 data, size_t offset, __local float *p);\n" |
39859 | "void __ovld vstore3(char3 data, size_t offset, __local char *p);\n" |
39860 | "void __ovld vstore3(uchar3 data, size_t offset, __local uchar *p);\n" |
39861 | "void __ovld vstore3(short3 data, size_t offset, __local short *p);\n" |
39862 | "void __ovld vstore3(ushort3 data, size_t offset, __local ushort *p);\n" |
39863 | "void __ovld vstore3(int3 data, size_t offset, __local int *p);\n" |
39864 | "void __ovld vstore3(uint3 data, size_t offset, __local uint *p);\n" |
39865 | "void __ovld vstore3(long3 data, size_t offset, __local long *p);\n" |
39866 | "void __ovld vstore3(ulong3 data, size_t offset, __local ulong *p);\n" |
39867 | "void __ovld vstore3(float3 data, size_t offset, __local float *p);\n" |
39868 | "void __ovld vstore4(char4 data, size_t offset, __local char *p);\n" |
39869 | "void __ovld vstore4(uchar4 data, size_t offset, __local uchar *p);\n" |
39870 | "void __ovld vstore4(short4 data, size_t offset, __local short *p);\n" |
39871 | "void __ovld vstore4(ushort4 data, size_t offset, __local ushort *p);\n" |
39872 | "void __ovld vstore4(int4 data, size_t offset, __local int *p);\n" |
39873 | "void __ovld vstore4(uint4 data, size_t offset, __local uint *p);\n" |
39874 | "void __ovld vstore4(long4 data, size_t offset, __local long *p);\n" |
39875 | "void __ovld vstore4(ulong4 data, size_t offset, __local ulong *p);\n" |
39876 | "void __ovld vstore4(float4 data, size_t offset, __local float *p);\n" |
39877 | "void __ovld vstore8(char8 data, size_t offset, __local char *p);\n" |
39878 | "void __ovld vstore8(uchar8 data, size_t offset, __local uchar *p);\n" |
39879 | "void __ovld vstore8(short8 data, size_t offset, __local short *p);\n" |
39880 | "void __ovld vstore8(ushort8 data, size_t offset, __local ushort *p);\n" |
39881 | "void __ovld vstore8(int8 data, size_t offset, __local int *p);\n" |
39882 | "void __ovld vstore8(uint8 data, size_t offset, __local uint *p);\n" |
39883 | "void __ovld vstore8(long8 data, size_t offset, __local long *p);\n" |
39884 | "void __ovld vstore8(ulong8 data, size_t offset, __local ulong *p);\n" |
39885 | "void __ovld vstore8(float8 data, size_t offset, __local float *p);\n" |
39886 | "void __ovld vstore16(char16 data, size_t offset, __local char *p);\n" |
39887 | "void __ovld vstore16(uchar16 data, size_t offset, __local uchar *p);\n" |
39888 | "void __ovld vstore16(short16 data, size_t offset, __local short *p);\n" |
39889 | "void __ovld vstore16(ushort16 data, size_t offset, __local ushort *p);\n" |
39890 | "void __ovld vstore16(int16 data, size_t offset, __local int *p);\n" |
39891 | "void __ovld vstore16(uint16 data, size_t offset, __local uint *p);\n" |
39892 | "void __ovld vstore16(long16 data, size_t offset, __local long *p);\n" |
39893 | "void __ovld vstore16(ulong16 data, size_t offset, __local ulong *p);\n" |
39894 | "void __ovld vstore16(float16 data, size_t offset, __local float *p);\n" |
39895 | "void __ovld vstore2(char2 data, size_t offset, __private char *p);\n" |
39896 | "void __ovld vstore2(uchar2 data, size_t offset, __private uchar *p);\n" |
39897 | "void __ovld vstore2(short2 data, size_t offset, __private short *p);\n" |
39898 | "void __ovld vstore2(ushort2 data, size_t offset, __private ushort *p);\n" |
39899 | "void __ovld vstore2(int2 data, size_t offset, __private int *p);\n" |
39900 | "void __ovld vstore2(uint2 data, size_t offset, __private uint *p);\n" |
39901 | "void __ovld vstore2(long2 data, size_t offset, __private long *p);\n" |
39902 | "void __ovld vstore2(ulong2 data, size_t offset, __private ulong *p);\n" |
39903 | "void __ovld vstore2(float2 data, size_t offset, __private float *p);\n" |
39904 | "void __ovld vstore3(char3 data, size_t offset, __private char *p);\n" |
39905 | "void __ovld vstore3(uchar3 data, size_t offset, __private uchar *p);\n" |
39906 | "void __ovld vstore3(short3 data, size_t offset, __private short *p);\n" |
39907 | "void __ovld vstore3(ushort3 data, size_t offset, __private ushort *p);\n" |
39908 | "void __ovld vstore3(int3 data, size_t offset, __private int *p);\n" |
39909 | "void __ovld vstore3(uint3 data, size_t offset, __private uint *p);\n" |
39910 | "void __ovld vstore3(long3 data, size_t offset, __private long *p);\n" |
39911 | "void __ovld vstore3(ulong3 data, size_t offset, __private ulong *p);\n" |
39912 | "void __ovld vstore3(float3 data, size_t offset, __private float *p);\n" |
39913 | "void __ovld vstore4(char4 data, size_t offset, __private char *p);\n" |
39914 | "void __ovld vstore4(uchar4 data, size_t offset, __private uchar *p);\n" |
39915 | "void __ovld vstore4(short4 data, size_t offset, __private short *p);\n" |
39916 | "void __ovld vstore4(ushort4 data, size_t offset, __private ushort *p);\n" |
39917 | "void __ovld vstore4(int4 data, size_t offset, __private int *p);\n" |
39918 | "void __ovld vstore4(uint4 data, size_t offset, __private uint *p);\n" |
39919 | "void __ovld vstore4(long4 data, size_t offset, __private long *p);\n" |
39920 | "void __ovld vstore4(ulong4 data, size_t offset, __private ulong *p);\n" |
39921 | "void __ovld vstore4(float4 data, size_t offset, __private float *p);\n" |
39922 | "void __ovld vstore8(char8 data, size_t offset, __private char *p);\n" |
39923 | "void __ovld vstore8(uchar8 data, size_t offset, __private uchar *p);\n" |
39924 | "void __ovld vstore8(short8 data, size_t offset, __private short *p);\n" |
39925 | "void __ovld vstore8(ushort8 data, size_t offset, __private ushort *p);\n" |
39926 | "void __ovld vstore8(int8 data, size_t offset, __private int *p);\n" |
39927 | "void __ovld vstore8(uint8 data, size_t offset, __private uint *p);\n" |
39928 | "void __ovld vstore8(long8 data, size_t offset, __private long *p);\n" |
39929 | "void __ovld vstore8(ulong8 data, size_t offset, __private ulong *p);\n" |
39930 | "void __ovld vstore8(float8 data, size_t offset, __private float *p);\n" |
39931 | "void __ovld vstore16(char16 data, size_t offset, __private char *p);\n" |
39932 | "void __ovld vstore16(uchar16 data, size_t offset, __private uchar *p);\n" |
39933 | "void __ovld vstore16(short16 data, size_t offset, __private short *p);\n" |
39934 | "void __ovld vstore16(ushort16 data, size_t offset, __private ushort *p);\n" |
39935 | "void __ovld vstore16(int16 data, size_t offset, __private int *p);\n" |
39936 | "void __ovld vstore16(uint16 data, size_t offset, __private uint *p);\n" |
39937 | "void __ovld vstore16(long16 data, size_t offset, __private long *p);\n" |
39938 | "void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p);\n" |
39939 | "void __ovld vstore16(float16 data, size_t offset, __private float *p);\n" |
39940 | "#ifdef cl_khr_fp64\n" |
39941 | "void __ovld vstore2(double2 data, size_t offset, __global double *p);\n" |
39942 | "void __ovld vstore3(double3 data, size_t offset, __global double *p);\n" |
39943 | "void __ovld vstore4(double4 data, size_t offset, __global double *p);\n" |
39944 | "void __ovld vstore8(double8 data, size_t offset, __global double *p);\n" |
39945 | "void __ovld vstore16(double16 data, size_t offset, __global double *p);\n" |
39946 | "void __ovld vstore2(double2 data, size_t offset, __local double *p);\n" |
39947 | "void __ovld vstore3(double3 data, size_t offset, __local double *p);\n" |
39948 | "void __ovld vstore4(double4 data, size_t offset, __local double *p);\n" |
39949 | "void __ovld vstore8(double8 data, size_t offset, __local double *p);\n" |
39950 | "void __ovld vstore16(double16 data, size_t offset, __local double *p);\n" |
39951 | "void __ovld vstore2(double2 data, size_t offset, __private double *p);\n" |
39952 | "void __ovld vstore3(double3 data, size_t offset, __private double *p);\n" |
39953 | "void __ovld vstore4(double4 data, size_t offset, __private double *p);\n" |
39954 | "void __ovld vstore8(double8 data, size_t offset, __private double *p);\n" |
39955 | "void __ovld vstore16(double16 data, size_t offset, __private double *p);\n" |
39956 | "#endif //cl_khr_fp64\n" |
39957 | "#ifdef cl_khr_fp16\n" |
39958 | "void __ovld vstore(half data, size_t offset, __global half *p);\n" |
39959 | "void __ovld vstore2(half2 data, size_t offset, __global half *p);\n" |
39960 | "void __ovld vstore3(half3 data, size_t offset, __global half *p);\n" |
39961 | "void __ovld vstore4(half4 data, size_t offset, __global half *p);\n" |
39962 | "void __ovld vstore8(half8 data, size_t offset, __global half *p);\n" |
39963 | "void __ovld vstore16(half16 data, size_t offset, __global half *p);\n" |
39964 | "void __ovld vstore(half data, size_t offset, __local half *p);\n" |
39965 | "void __ovld vstore2(half2 data, size_t offset, __local half *p);\n" |
39966 | "void __ovld vstore3(half3 data, size_t offset, __local half *p);\n" |
39967 | "void __ovld vstore4(half4 data, size_t offset, __local half *p);\n" |
39968 | "void __ovld vstore8(half8 data, size_t offset, __local half *p);\n" |
39969 | "void __ovld vstore16(half16 data, size_t offset, __local half *p);\n" |
39970 | "void __ovld vstore(half data, size_t offset, __private half *p);\n" |
39971 | "void __ovld vstore2(half2 data, size_t offset, __private half *p);\n" |
39972 | "void __ovld vstore3(half3 data, size_t offset, __private half *p);\n" |
39973 | "void __ovld vstore4(half4 data, size_t offset, __private half *p);\n" |
39974 | "void __ovld vstore8(half8 data, size_t offset, __private half *p);\n" |
39975 | "void __ovld vstore16(half16 data, size_t offset, __private half *p);\n" |
39976 | "#endif //cl_khr_fp16\n" |
39977 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
39978 | "\n" |
39979 | "/**\n" |
39980 | " * Read sizeof (half) bytes of data from address\n" |
39981 | " * (p + offset). The data read is interpreted as a\n" |
39982 | " * half value. The half value is converted to a\n" |
39983 | " * float value and the float value is returned.\n" |
39984 | " * The read address computed as (p + offset)\n" |
39985 | " * must be 16-bit aligned.\n" |
39986 | " */\n" |
39987 | "float __ovld vload_half(size_t offset, const __constant half *p);\n" |
39988 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
39989 | "float __ovld vload_half(size_t offset, const half *p);\n" |
39990 | "#else\n" |
39991 | "float __ovld vload_half(size_t offset, const __global half *p);\n" |
39992 | "float __ovld vload_half(size_t offset, const __local half *p);\n" |
39993 | "float __ovld vload_half(size_t offset, const __private half *p);\n" |
39994 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
39995 | "\n" |
39996 | "/**\n" |
39997 | " * Read sizeof (halfn) bytes of data from address\n" |
39998 | " * (p + (offset * n)). The data read is interpreted\n" |
39999 | " * as a halfn value. The halfn value read is\n" |
40000 | " * converted to a floatn value and the floatn\n" |
40001 | " * value is returned. The read address computed\n" |
40002 | " * as (p + (offset * n)) must be 16-bit aligned.\n" |
40003 | " */\n" |
40004 | "float2 __ovld vload_half2(size_t offset, const __constant half *p);\n" |
40005 | "float3 __ovld vload_half3(size_t offset, const __constant half *p);\n" |
40006 | "float4 __ovld vload_half4(size_t offset, const __constant half *p);\n" |
40007 | "float8 __ovld vload_half8(size_t offset, const __constant half *p);\n" |
40008 | "float16 __ovld vload_half16(size_t offset, const __constant half *p);\n" |
40009 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40010 | "float2 __ovld vload_half2(size_t offset, const half *p);\n" |
40011 | "float3 __ovld vload_half3(size_t offset, const half *p);\n" |
40012 | "float4 __ovld vload_half4(size_t offset, const half *p);\n" |
40013 | "float8 __ovld vload_half8(size_t offset, const half *p);\n" |
40014 | "float16 __ovld vload_half16(size_t offset, const half *p);\n" |
40015 | "#else\n" |
40016 | "float2 __ovld vload_half2(size_t offset, const __global half *p);\n" |
40017 | "float3 __ovld vload_half3(size_t offset, const __global half *p);\n" |
40018 | "float4 __ovld vload_half4(size_t offset, const __global half *p);\n" |
40019 | "float8 __ovld vload_half8(size_t offset, const __global half *p);\n" |
40020 | "float16 __ovld vload_half16(size_t offset, const __global half *p);\n" |
40021 | "float2 __ovld vload_half2(size_t offset, const __local half *p);\n" |
40022 | "float3 __ovld vload_half3(size_t offset, const __local half *p);\n" |
40023 | "float4 __ovld vload_half4(size_t offset, const __local half *p);\n" |
40024 | "float8 __ovld vload_half8(size_t offset, const __local half *p);\n" |
40025 | "float16 __ovld vload_half16(size_t offset, const __local half *p);\n" |
40026 | "float2 __ovld vload_half2(size_t offset, const __private half *p);\n" |
40027 | "float3 __ovld vload_half3(size_t offset, const __private half *p);\n" |
40028 | "float4 __ovld vload_half4(size_t offset, const __private half *p);\n" |
40029 | "float8 __ovld vload_half8(size_t offset, const __private half *p);\n" |
40030 | "float16 __ovld vload_half16(size_t offset, const __private half *p);\n" |
40031 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40032 | "\n" |
40033 | "/**\n" |
40034 | " * The float value given by data is first\n" |
40035 | " * converted to a half value using the appropriate\n" |
40036 | " * rounding mode. The half value is then written\n" |
40037 | " * to address computed as (p + offset). The\n" |
40038 | " * address computed as (p + offset) must be 16-\n" |
40039 | " * bit aligned.\n" |
40040 | " * vstore_half use the current rounding mode.\n" |
40041 | " * The default current rounding mode is round to\n" |
40042 | " * nearest even.\n" |
40043 | " */\n" |
40044 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40045 | "void __ovld vstore_half(float data, size_t offset, half *p);\n" |
40046 | "void __ovld vstore_half_rte(float data, size_t offset, half *p);\n" |
40047 | "void __ovld vstore_half_rtz(float data, size_t offset, half *p);\n" |
40048 | "void __ovld vstore_half_rtp(float data, size_t offset, half *p);\n" |
40049 | "void __ovld vstore_half_rtn(float data, size_t offset, half *p);\n" |
40050 | "#ifdef cl_khr_fp64\n" |
40051 | "void __ovld vstore_half(double data, size_t offset, half *p);\n" |
40052 | "void __ovld vstore_half_rte(double data, size_t offset, half *p);\n" |
40053 | "void __ovld vstore_half_rtz(double data, size_t offset, half *p);\n" |
40054 | "void __ovld vstore_half_rtp(double data, size_t offset, half *p);\n" |
40055 | "void __ovld vstore_half_rtn(double data, size_t offset, half *p);\n" |
40056 | "#endif //cl_khr_fp64\n" |
40057 | "#else\n" |
40058 | "void __ovld vstore_half(float data, size_t offset, __global half *p);\n" |
40059 | "void __ovld vstore_half_rte(float data, size_t offset, __global half *p);\n" |
40060 | "void __ovld vstore_half_rtz(float data, size_t offset, __global half *p);\n" |
40061 | "void __ovld vstore_half_rtp(float data, size_t offset, __global half *p);\n" |
40062 | "void __ovld vstore_half_rtn(float data, size_t offset, __global half *p);\n" |
40063 | "void __ovld vstore_half(float data, size_t offset, __local half *p);\n" |
40064 | "void __ovld vstore_half_rte(float data, size_t offset, __local half *p);\n" |
40065 | "void __ovld vstore_half_rtz(float data, size_t offset, __local half *p);\n" |
40066 | "void __ovld vstore_half_rtp(float data, size_t offset, __local half *p);\n" |
40067 | "void __ovld vstore_half_rtn(float data, size_t offset, __local half *p);\n" |
40068 | "void __ovld vstore_half(float data, size_t offset, __private half *p);\n" |
40069 | "void __ovld vstore_half_rte(float data, size_t offset, __private half *p);\n" |
40070 | "void __ovld vstore_half_rtz(float data, size_t offset, __private half *p);\n" |
40071 | "void __ovld vstore_half_rtp(float data, size_t offset, __private half *p);\n" |
40072 | "void __ovld vstore_half_rtn(float data, size_t offset, __private half *p);\n" |
40073 | "#ifdef cl_khr_fp64\n" |
40074 | "void __ovld vstore_half(double data, size_t offset, __global half *p);\n" |
40075 | "void __ovld vstore_half_rte(double data, size_t offset, __global half *p);\n" |
40076 | "void __ovld vstore_half_rtz(double data, size_t offset, __global half *p);\n" |
40077 | "void __ovld vstore_half_rtp(double data, size_t offset, __global half *p);\n" |
40078 | "void __ovld vstore_half_rtn(double data, size_t offset, __global half *p);\n" |
40079 | "void __ovld vstore_half(double data, size_t offset, __local half *p);\n" |
40080 | "void __ovld vstore_half_rte(double data, size_t offset, __local half *p);\n" |
40081 | "void __ovld vstore_half_rtz(double data, size_t offset, __local half *p);\n" |
40082 | "void __ovld vstore_half_rtp(double data, size_t offset, __local half *p);\n" |
40083 | "void __ovld vstore_half_rtn(double data, size_t offset, __local half *p);\n" |
40084 | "void __ovld vstore_half(double data, size_t offset, __private half *p);\n" |
40085 | "void __ovld vstore_half_rte(double data, size_t offset, __private half *p);\n" |
40086 | "void __ovld vstore_half_rtz(double data, size_t offset, __private half *p);\n" |
40087 | "void __ovld vstore_half_rtp(double data, size_t offset, __private half *p);\n" |
40088 | "void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);\n" |
40089 | "#endif //cl_khr_fp64\n" |
40090 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40091 | "\n" |
40092 | "/**\n" |
40093 | " * The floatn value given by data is converted to\n" |
40094 | " * a halfn value using the appropriate rounding\n" |
40095 | " * mode. The halfn value is then written to\n" |
40096 | " * address computed as (p + (offset * n)). The\n" |
40097 | " * address computed as (p + (offset * n)) must be\n" |
40098 | " * 16-bit aligned.\n" |
40099 | " * vstore_halfn uses the current rounding mode.\n" |
40100 | " * The default current rounding mode is round to\n" |
40101 | " * nearest even.\n" |
40102 | " */\n" |
40103 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40104 | "void __ovld vstore_half2(float2 data, size_t offset, half *p);\n" |
40105 | "void __ovld vstore_half3(float3 data, size_t offset, half *p);\n" |
40106 | "void __ovld vstore_half4(float4 data, size_t offset, half *p);\n" |
40107 | "void __ovld vstore_half8(float8 data, size_t offset, half *p);\n" |
40108 | "void __ovld vstore_half16(float16 data, size_t offset, half *p);\n" |
40109 | "void __ovld vstore_half2_rte(float2 data, size_t offset, half *p);\n" |
40110 | "void __ovld vstore_half3_rte(float3 data, size_t offset, half *p);\n" |
40111 | "void __ovld vstore_half4_rte(float4 data, size_t offset, half *p);\n" |
40112 | "void __ovld vstore_half8_rte(float8 data, size_t offset, half *p);\n" |
40113 | "void __ovld vstore_half16_rte(float16 data, size_t offset, half *p);\n" |
40114 | "void __ovld vstore_half2_rtz(float2 data, size_t offset, half *p);\n" |
40115 | "void __ovld vstore_half3_rtz(float3 data, size_t offset, half *p);\n" |
40116 | "void __ovld vstore_half4_rtz(float4 data, size_t offset, half *p);\n" |
40117 | "void __ovld vstore_half8_rtz(float8 data, size_t offset, half *p);\n" |
40118 | "void __ovld vstore_half16_rtz(float16 data, size_t offset, half *p);\n" |
40119 | "void __ovld vstore_half2_rtp(float2 data, size_t offset, half *p);\n" |
40120 | "void __ovld vstore_half3_rtp(float3 data, size_t offset, half *p);\n" |
40121 | "void __ovld vstore_half4_rtp(float4 data, size_t offset, half *p);\n" |
40122 | "void __ovld vstore_half8_rtp(float8 data, size_t offset, half *p);\n" |
40123 | "void __ovld vstore_half16_rtp(float16 data, size_t offset, half *p);\n" |
40124 | "void __ovld vstore_half2_rtn(float2 data, size_t offset, half *p);\n" |
40125 | "void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p);\n" |
40126 | "void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p);\n" |
40127 | "void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p);\n" |
40128 | "void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p);\n" |
40129 | "#ifdef cl_khr_fp64\n" |
40130 | "void __ovld vstore_half2(double2 data, size_t offset, half *p);\n" |
40131 | "void __ovld vstore_half3(double3 data, size_t offset, half *p);\n" |
40132 | "void __ovld vstore_half4(double4 data, size_t offset, half *p);\n" |
40133 | "void __ovld vstore_half8(double8 data, size_t offset, half *p);\n" |
40134 | "void __ovld vstore_half16(double16 data, size_t offset, half *p);\n" |
40135 | "void __ovld vstore_half2_rte(double2 data, size_t offset, half *p);\n" |
40136 | "void __ovld vstore_half3_rte(double3 data, size_t offset, half *p);\n" |
40137 | "void __ovld vstore_half4_rte(double4 data, size_t offset, half *p);\n" |
40138 | "void __ovld vstore_half8_rte(double8 data, size_t offset, half *p);\n" |
40139 | "void __ovld vstore_half16_rte(double16 data, size_t offset, half *p);\n" |
40140 | "void __ovld vstore_half2_rtz(double2 data, size_t offset, half *p);\n" |
40141 | "void __ovld vstore_half3_rtz(double3 data, size_t offset, half *p);\n" |
40142 | "void __ovld vstore_half4_rtz(double4 data, size_t offset, half *p);\n" |
40143 | "void __ovld vstore_half8_rtz(double8 data, size_t offset, half *p);\n" |
40144 | "void __ovld vstore_half16_rtz(double16 data, size_t offset, half *p);\n" |
40145 | "void __ovld vstore_half2_rtp(double2 data, size_t offset, half *p);\n" |
40146 | "void __ovld vstore_half3_rtp(double3 data, size_t offset, half *p);\n" |
40147 | "void __ovld vstore_half4_rtp(double4 data, size_t offset, half *p);\n" |
40148 | "void __ovld vstore_half8_rtp(double8 data, size_t offset, half *p);\n" |
40149 | "void __ovld vstore_half16_rtp(double16 data, size_t offset, half *p);\n" |
40150 | "void __ovld vstore_half2_rtn(double2 data, size_t offset, half *p);\n" |
40151 | "void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p);\n" |
40152 | "void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p);\n" |
40153 | "void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p);\n" |
40154 | "void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p);\n" |
40155 | "#endif //cl_khr_fp64\n" |
40156 | "#else\n" |
40157 | "void __ovld vstore_half2(float2 data, size_t offset, __global half *p);\n" |
40158 | "void __ovld vstore_half3(float3 data, size_t offset, __global half *p);\n" |
40159 | "void __ovld vstore_half4(float4 data, size_t offset, __global half *p);\n" |
40160 | "void __ovld vstore_half8(float8 data, size_t offset, __global half *p);\n" |
40161 | "void __ovld vstore_half16(float16 data, size_t offset, __global half *p);\n" |
40162 | "void __ovld vstore_half2_rte(float2 data, size_t offset, __global half *p);\n" |
40163 | "void __ovld vstore_half3_rte(float3 data, size_t offset, __global half *p);\n" |
40164 | "void __ovld vstore_half4_rte(float4 data, size_t offset, __global half *p);\n" |
40165 | "void __ovld vstore_half8_rte(float8 data, size_t offset, __global half *p);\n" |
40166 | "void __ovld vstore_half16_rte(float16 data, size_t offset, __global half *p);\n" |
40167 | "void __ovld vstore_half2_rtz(float2 data, size_t offset, __global half *p);\n" |
40168 | "void __ovld vstore_half3_rtz(float3 data, size_t offset, __global half *p);\n" |
40169 | "void __ovld vstore_half4_rtz(float4 data, size_t offset, __global half *p);\n" |
40170 | "void __ovld vstore_half8_rtz(float8 data, size_t offset, __global half *p);\n" |
40171 | "void __ovld vstore_half16_rtz(float16 data, size_t offset, __global half *p);\n" |
40172 | "void __ovld vstore_half2_rtp(float2 data, size_t offset, __global half *p);\n" |
40173 | "void __ovld vstore_half3_rtp(float3 data, size_t offset, __global half *p);\n" |
40174 | "void __ovld vstore_half4_rtp(float4 data, size_t offset, __global half *p);\n" |
40175 | "void __ovld vstore_half8_rtp(float8 data, size_t offset, __global half *p);\n" |
40176 | "void __ovld vstore_half16_rtp(float16 data, size_t offset, __global half *p);\n" |
40177 | "void __ovld vstore_half2_rtn(float2 data, size_t offset, __global half *p);\n" |
40178 | "void __ovld vstore_half3_rtn(float3 data, size_t offset, __global half *p);\n" |
40179 | "void __ovld vstore_half4_rtn(float4 data, size_t offset, __global half *p);\n" |
40180 | "void __ovld vstore_half8_rtn(float8 data, size_t offset, __global half *p);\n" |
40181 | "void __ovld vstore_half16_rtn(float16 data, size_t offset, __global half *p);\n" |
40182 | "void __ovld vstore_half2(float2 data, size_t offset, __local half *p);\n" |
40183 | "void __ovld vstore_half3(float3 data, size_t offset, __local half *p);\n" |
40184 | "void __ovld vstore_half4(float4 data, size_t offset, __local half *p);\n" |
40185 | "void __ovld vstore_half8(float8 data, size_t offset, __local half *p);\n" |
40186 | "void __ovld vstore_half16(float16 data, size_t offset, __local half *p);\n" |
40187 | "void __ovld vstore_half2_rte(float2 data, size_t offset, __local half *p);\n" |
40188 | "void __ovld vstore_half3_rte(float3 data, size_t offset, __local half *p);\n" |
40189 | "void __ovld vstore_half4_rte(float4 data, size_t offset, __local half *p);\n" |
40190 | "void __ovld vstore_half8_rte(float8 data, size_t offset, __local half *p);\n" |
40191 | "void __ovld vstore_half16_rte(float16 data, size_t offset, __local half *p);\n" |
40192 | "void __ovld vstore_half2_rtz(float2 data, size_t offset, __local half *p);\n" |
40193 | "void __ovld vstore_half3_rtz(float3 data, size_t offset, __local half *p);\n" |
40194 | "void __ovld vstore_half4_rtz(float4 data, size_t offset, __local half *p);\n" |
40195 | "void __ovld vstore_half8_rtz(float8 data, size_t offset, __local half *p);\n" |
40196 | "void __ovld vstore_half16_rtz(float16 data, size_t offset, __local half *p);\n" |
40197 | "void __ovld vstore_half2_rtp(float2 data, size_t offset, __local half *p);\n" |
40198 | "void __ovld vstore_half3_rtp(float3 data, size_t offset, __local half *p);\n" |
40199 | "void __ovld vstore_half4_rtp(float4 data, size_t offset, __local half *p);\n" |
40200 | "void __ovld vstore_half8_rtp(float8 data, size_t offset, __local half *p);\n" |
40201 | "void __ovld vstore_half16_rtp(float16 data, size_t offset, __local half *p);\n" |
40202 | "void __ovld vstore_half2_rtn(float2 data, size_t offset, __local half *p);\n" |
40203 | "void __ovld vstore_half3_rtn(float3 data, size_t offset, __local half *p);\n" |
40204 | "void __ovld vstore_half4_rtn(float4 data, size_t offset, __local half *p);\n" |
40205 | "void __ovld vstore_half8_rtn(float8 data, size_t offset, __local half *p);\n" |
40206 | "void __ovld vstore_half16_rtn(float16 data, size_t offset, __local half *p);\n" |
40207 | "void __ovld vstore_half2(float2 data, size_t offset, __private half *p);\n" |
40208 | "void __ovld vstore_half3(float3 data, size_t offset, __private half *p);\n" |
40209 | "void __ovld vstore_half4(float4 data, size_t offset, __private half *p);\n" |
40210 | "void __ovld vstore_half8(float8 data, size_t offset, __private half *p);\n" |
40211 | "void __ovld vstore_half16(float16 data, size_t offset, __private half *p);\n" |
40212 | "void __ovld vstore_half2_rte(float2 data, size_t offset, __private half *p);\n" |
40213 | "void __ovld vstore_half3_rte(float3 data, size_t offset, __private half *p);\n" |
40214 | "void __ovld vstore_half4_rte(float4 data, size_t offset, __private half *p);\n" |
40215 | "void __ovld vstore_half8_rte(float8 data, size_t offset, __private half *p);\n" |
40216 | "void __ovld vstore_half16_rte(float16 data, size_t offset, __private half *p);\n" |
40217 | "void __ovld vstore_half2_rtz(float2 data, size_t offset, __private half *p);\n" |
40218 | "void __ovld vstore_half3_rtz(float3 data, size_t offset, __private half *p);\n" |
40219 | "void __ovld vstore_half4_rtz(float4 data, size_t offset, __private half *p);\n" |
40220 | "void __ovld vstore_half8_rtz(float8 data, size_t offset, __private half *p);\n" |
40221 | "void __ovld vstore_half16_rtz(float16 data, size_t offset, __private half *p);\n" |
40222 | "void __ovld vstore_half2_rtp(float2 data, size_t offset, __private half *p);\n" |
40223 | "void __ovld vstore_half3_rtp(float3 data, size_t offset, __private half *p);\n" |
40224 | "void __ovld vstore_half4_rtp(float4 data, size_t offset, __private half *p);\n" |
40225 | "void __ovld vstore_half8_rtp(float8 data, size_t offset, __private half *p);\n" |
40226 | "void __ovld vstore_half16_rtp(float16 data, size_t offset, __private half *p);\n" |
40227 | "void __ovld vstore_half2_rtn(float2 data, size_t offset, __private half *p);\n" |
40228 | "void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p);\n" |
40229 | "void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p);\n" |
40230 | "void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p);\n" |
40231 | "void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p);\n" |
40232 | "#ifdef cl_khr_fp64\n" |
40233 | "void __ovld vstore_half2(double2 data, size_t offset, __global half *p);\n" |
40234 | "void __ovld vstore_half3(double3 data, size_t offset, __global half *p);\n" |
40235 | "void __ovld vstore_half4(double4 data, size_t offset, __global half *p);\n" |
40236 | "void __ovld vstore_half8(double8 data, size_t offset, __global half *p);\n" |
40237 | "void __ovld vstore_half16(double16 data, size_t offset, __global half *p);\n" |
40238 | "void __ovld vstore_half2_rte(double2 data, size_t offset, __global half *p);\n" |
40239 | "void __ovld vstore_half3_rte(double3 data, size_t offset, __global half *p);\n" |
40240 | "void __ovld vstore_half4_rte(double4 data, size_t offset, __global half *p);\n" |
40241 | "void __ovld vstore_half8_rte(double8 data, size_t offset, __global half *p);\n" |
40242 | "void __ovld vstore_half16_rte(double16 data, size_t offset, __global half *p);\n" |
40243 | "void __ovld vstore_half2_rtz(double2 data, size_t offset, __global half *p);\n" |
40244 | "void __ovld vstore_half3_rtz(double3 data, size_t offset, __global half *p);\n" |
40245 | "void __ovld vstore_half4_rtz(double4 data, size_t offset, __global half *p);\n" |
40246 | "void __ovld vstore_half8_rtz(double8 data, size_t offset, __global half *p);\n" |
40247 | "void __ovld vstore_half16_rtz(double16 data, size_t offset, __global half *p);\n" |
40248 | "void __ovld vstore_half2_rtp(double2 data, size_t offset, __global half *p);\n" |
40249 | "void __ovld vstore_half3_rtp(double3 data, size_t offset, __global half *p);\n" |
40250 | "void __ovld vstore_half4_rtp(double4 data, size_t offset, __global half *p);\n" |
40251 | "void __ovld vstore_half8_rtp(double8 data, size_t offset, __global half *p);\n" |
40252 | "void __ovld vstore_half16_rtp(double16 data, size_t offset, __global half *p);\n" |
40253 | "void __ovld vstore_half2_rtn(double2 data, size_t offset, __global half *p);\n" |
40254 | "void __ovld vstore_half3_rtn(double3 data, size_t offset, __global half *p);\n" |
40255 | "void __ovld vstore_half4_rtn(double4 data, size_t offset, __global half *p);\n" |
40256 | "void __ovld vstore_half8_rtn(double8 data, size_t offset, __global half *p);\n" |
40257 | "void __ovld vstore_half16_rtn(double16 data, size_t offset, __global half *p);\n" |
40258 | "void __ovld vstore_half2(double2 data, size_t offset, __local half *p);\n" |
40259 | "void __ovld vstore_half3(double3 data, size_t offset, __local half *p);\n" |
40260 | "void __ovld vstore_half4(double4 data, size_t offset, __local half *p);\n" |
40261 | "void __ovld vstore_half8(double8 data, size_t offset, __local half *p);\n" |
40262 | "void __ovld vstore_half16(double16 data, size_t offset, __local half *p);\n" |
40263 | "void __ovld vstore_half2_rte(double2 data, size_t offset, __local half *p);\n" |
40264 | "void __ovld vstore_half3_rte(double3 data, size_t offset, __local half *p);\n" |
40265 | "void __ovld vstore_half4_rte(double4 data, size_t offset, __local half *p);\n" |
40266 | "void __ovld vstore_half8_rte(double8 data, size_t offset, __local half *p);\n" |
40267 | "void __ovld vstore_half16_rte(double16 data, size_t offset, __local half *p);\n" |
40268 | "void __ovld vstore_half2_rtz(double2 data, size_t offset, __local half *p);\n" |
40269 | "void __ovld vstore_half3_rtz(double3 data, size_t offset, __local half *p);\n" |
40270 | "void __ovld vstore_half4_rtz(double4 data, size_t offset, __local half *p);\n" |
40271 | "void __ovld vstore_half8_rtz(double8 data, size_t offset, __local half *p);\n" |
40272 | "void __ovld vstore_half16_rtz(double16 data, size_t offset, __local half *p);\n" |
40273 | "void __ovld vstore_half2_rtp(double2 data, size_t offset, __local half *p);\n" |
40274 | "void __ovld vstore_half3_rtp(double3 data, size_t offset, __local half *p);\n" |
40275 | "void __ovld vstore_half4_rtp(double4 data, size_t offset, __local half *p);\n" |
40276 | "void __ovld vstore_half8_rtp(double8 data, size_t offset, __local half *p);\n" |
40277 | "void __ovld vstore_half16_rtp(double16 data, size_t offset, __local half *p);\n" |
40278 | "void __ovld vstore_half2_rtn(double2 data, size_t offset, __local half *p);\n" |
40279 | "void __ovld vstore_half3_rtn(double3 data, size_t offset, __local half *p);\n" |
40280 | "void __ovld vstore_half4_rtn(double4 data, size_t offset, __local half *p);\n" |
40281 | "void __ovld vstore_half8_rtn(double8 data, size_t offset, __local half *p);\n" |
40282 | "void __ovld vstore_half16_rtn(double16 data, size_t offset, __local half *p);\n" |
40283 | "void __ovld vstore_half2(double2 data, size_t offset, __private half *p);\n" |
40284 | "void __ovld vstore_half3(double3 data, size_t offset, __private half *p);\n" |
40285 | "void __ovld vstore_half4(double4 data, size_t offset, __private half *p);\n" |
40286 | "void __ovld vstore_half8(double8 data, size_t offset, __private half *p);\n" |
40287 | "void __ovld vstore_half16(double16 data, size_t offset, __private half *p);\n" |
40288 | "void __ovld vstore_half2_rte(double2 data, size_t offset, __private half *p);\n" |
40289 | "void __ovld vstore_half3_rte(double3 data, size_t offset, __private half *p);\n" |
40290 | "void __ovld vstore_half4_rte(double4 data, size_t offset, __private half *p);\n" |
40291 | "void __ovld vstore_half8_rte(double8 data, size_t offset, __private half *p);\n" |
40292 | "void __ovld vstore_half16_rte(double16 data, size_t offset, __private half *p);\n" |
40293 | "void __ovld vstore_half2_rtz(double2 data, size_t offset, __private half *p);\n" |
40294 | "void __ovld vstore_half3_rtz(double3 data, size_t offset, __private half *p);\n" |
40295 | "void __ovld vstore_half4_rtz(double4 data, size_t offset, __private half *p);\n" |
40296 | "void __ovld vstore_half8_rtz(double8 data, size_t offset, __private half *p);\n" |
40297 | "void __ovld vstore_half16_rtz(double16 data, size_t offset, __private half *p);\n" |
40298 | "void __ovld vstore_half2_rtp(double2 data, size_t offset, __private half *p);\n" |
40299 | "void __ovld vstore_half3_rtp(double3 data, size_t offset, __private half *p);\n" |
40300 | "void __ovld vstore_half4_rtp(double4 data, size_t offset, __private half *p);\n" |
40301 | "void __ovld vstore_half8_rtp(double8 data, size_t offset, __private half *p);\n" |
40302 | "void __ovld vstore_half16_rtp(double16 data, size_t offset, __private half *p);\n" |
40303 | "void __ovld vstore_half2_rtn(double2 data, size_t offset, __private half *p);\n" |
40304 | "void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p);\n" |
40305 | "void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p);\n" |
40306 | "void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p);\n" |
40307 | "void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);\n" |
40308 | "#endif //cl_khr_fp64\n" |
40309 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40310 | "\n" |
40311 | "/**\n" |
40312 | " * For n = 1, 2, 4, 8 and 16 read sizeof (halfn)\n" |
40313 | " * bytes of data from address (p + (offset * n)).\n" |
40314 | " * The data read is interpreted as a halfn value.\n" |
40315 | " * The halfn value read is converted to a floatn\n" |
40316 | " * value and the floatn value is returned.\n" |
40317 | " * The address computed as (p + (offset * n))\n" |
40318 | " * must be aligned to sizeof (halfn) bytes.\n" |
40319 | " * For n = 3, vloada_half3 reads a half3 from\n" |
40320 | " * address (p + (offset * 4)) and returns a float3.\n" |
40321 | " * The address computed as (p + (offset * 4))\n" |
40322 | " * must be aligned to sizeof (half) * 4 bytes.\n" |
40323 | " */\n" |
40324 | "float __ovld vloada_half(size_t offset, const __constant half *p);\n" |
40325 | "float2 __ovld vloada_half2(size_t offset, const __constant half *p);\n" |
40326 | "float3 __ovld vloada_half3(size_t offset, const __constant half *p);\n" |
40327 | "float4 __ovld vloada_half4(size_t offset, const __constant half *p);\n" |
40328 | "float8 __ovld vloada_half8(size_t offset, const __constant half *p);\n" |
40329 | "float16 __ovld vloada_half16(size_t offset, const __constant half *p);\n" |
40330 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40331 | "float __ovld vloada_half(size_t offset, const half *p);\n" |
40332 | "float2 __ovld vloada_half2(size_t offset, const half *p);\n" |
40333 | "float3 __ovld vloada_half3(size_t offset, const half *p);\n" |
40334 | "float4 __ovld vloada_half4(size_t offset, const half *p);\n" |
40335 | "float8 __ovld vloada_half8(size_t offset, const half *p);\n" |
40336 | "float16 __ovld vloada_half16(size_t offset, const half *p);\n" |
40337 | "#else\n" |
40338 | "float __ovld vloada_half(size_t offset, const __global half *p);\n" |
40339 | "float2 __ovld vloada_half2(size_t offset, const __global half *p);\n" |
40340 | "float3 __ovld vloada_half3(size_t offset, const __global half *p);\n" |
40341 | "float4 __ovld vloada_half4(size_t offset, const __global half *p);\n" |
40342 | "float8 __ovld vloada_half8(size_t offset, const __global half *p);\n" |
40343 | "float16 __ovld vloada_half16(size_t offset, const __global half *p);\n" |
40344 | "float __ovld vloada_half(size_t offset, const __local half *p);\n" |
40345 | "float2 __ovld vloada_half2(size_t offset, const __local half *p);\n" |
40346 | "float3 __ovld vloada_half3(size_t offset, const __local half *p);\n" |
40347 | "float4 __ovld vloada_half4(size_t offset, const __local half *p);\n" |
40348 | "float8 __ovld vloada_half8(size_t offset, const __local half *p);\n" |
40349 | "float16 __ovld vloada_half16(size_t offset, const __local half *p);\n" |
40350 | "float __ovld vloada_half(size_t offset, const __private half *p);\n" |
40351 | "float2 __ovld vloada_half2(size_t offset, const __private half *p);\n" |
40352 | "float3 __ovld vloada_half3(size_t offset, const __private half *p);\n" |
40353 | "float4 __ovld vloada_half4(size_t offset, const __private half *p);\n" |
40354 | "float8 __ovld vloada_half8(size_t offset, const __private half *p);\n" |
40355 | "float16 __ovld vloada_half16(size_t offset, const __private half *p);\n" |
40356 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40357 | "\n" |
40358 | "/**\n" |
40359 | " * The floatn value given by data is converted to\n" |
40360 | " * a halfn value using the appropriate rounding\n" |
40361 | " * mode.\n" |
40362 | " * For n = 1, 2, 4, 8 and 16, the halfn value is\n" |
40363 | " * written to the address computed as (p + (offset\n" |
40364 | " * * n)). The address computed as (p + (offset *\n" |
40365 | " * n)) must be aligned to sizeof (halfn) bytes.\n" |
40366 | " * For n = 3, the half3 value is written to the\n" |
40367 | " * address computed as (p + (offset * 4)). The\n" |
40368 | " * address computed as (p + (offset * 4)) must be\n" |
40369 | " * aligned to sizeof (half) * 4 bytes.\n" |
40370 | " * vstorea_halfn uses the current rounding\n" |
40371 | " * mode. The default current rounding mode is\n" |
40372 | " * round to nearest even.\n" |
40373 | " */\n" |
40374 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40375 | "void __ovld vstorea_half(float data, size_t offset, half *p);\n" |
40376 | "void __ovld vstorea_half2(float2 data, size_t offset, half *p);\n" |
40377 | "void __ovld vstorea_half3(float3 data, size_t offset, half *p);\n" |
40378 | "void __ovld vstorea_half4(float4 data, size_t offset, half *p);\n" |
40379 | "void __ovld vstorea_half8(float8 data, size_t offset, half *p);\n" |
40380 | "void __ovld vstorea_half16(float16 data, size_t offset, half *p);\n" |
40381 | "\n" |
40382 | "void __ovld vstorea_half_rte(float data, size_t offset, half *p);\n" |
40383 | "void __ovld vstorea_half2_rte(float2 data, size_t offset, half *p);\n" |
40384 | "void __ovld vstorea_half3_rte(float3 data, size_t offset, half *p);\n" |
40385 | "void __ovld vstorea_half4_rte(float4 data, size_t offset, half *p);\n" |
40386 | "void __ovld vstorea_half8_rte(float8 data, size_t offset, half *p);\n" |
40387 | "void __ovld vstorea_half16_rte(float16 data, size_t offset, half *p);\n" |
40388 | "\n" |
40389 | "void __ovld vstorea_half_rtz(float data, size_t offset, half *p);\n" |
40390 | "void __ovld vstorea_half2_rtz(float2 data, size_t offset, half *p);\n" |
40391 | "void __ovld vstorea_half3_rtz(float3 data, size_t offset, half *p);\n" |
40392 | "void __ovld vstorea_half4_rtz(float4 data, size_t offset, half *p);\n" |
40393 | "void __ovld vstorea_half8_rtz(float8 data, size_t offset, half *p);\n" |
40394 | "void __ovld vstorea_half16_rtz(float16 data, size_t offset, half *p);\n" |
40395 | "\n" |
40396 | "void __ovld vstorea_half_rtp(float data, size_t offset, half *p);\n" |
40397 | "void __ovld vstorea_half2_rtp(float2 data, size_t offset, half *p);\n" |
40398 | "void __ovld vstorea_half3_rtp(float3 data, size_t offset, half *p);\n" |
40399 | "void __ovld vstorea_half4_rtp(float4 data, size_t offset, half *p);\n" |
40400 | "void __ovld vstorea_half8_rtp(float8 data, size_t offset, half *p);\n" |
40401 | "void __ovld vstorea_half16_rtp(float16 data, size_t offset, half *p);\n" |
40402 | "\n" |
40403 | "void __ovld vstorea_half_rtn(float data, size_t offset, half *p);\n" |
40404 | "void __ovld vstorea_half2_rtn(float2 data, size_t offset, half *p);\n" |
40405 | "void __ovld vstorea_half3_rtn(float3 data, size_t offset, half *p);\n" |
40406 | "void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p);\n" |
40407 | "void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p);\n" |
40408 | "void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p);\n" |
40409 | "\n" |
40410 | "#ifdef cl_khr_fp64\n" |
40411 | "void __ovld vstorea_half(double data, size_t offset, half *p);\n" |
40412 | "void __ovld vstorea_half2(double2 data, size_t offset, half *p);\n" |
40413 | "void __ovld vstorea_half3(double3 data, size_t offset, half *p);\n" |
40414 | "void __ovld vstorea_half4(double4 data, size_t offset, half *p);\n" |
40415 | "void __ovld vstorea_half8(double8 data, size_t offset, half *p);\n" |
40416 | "void __ovld vstorea_half16(double16 data, size_t offset, half *p);\n" |
40417 | "\n" |
40418 | "void __ovld vstorea_half_rte(double data, size_t offset, half *p);\n" |
40419 | "void __ovld vstorea_half2_rte(double2 data, size_t offset, half *p);\n" |
40420 | "void __ovld vstorea_half3_rte(double3 data, size_t offset, half *p);\n" |
40421 | "void __ovld vstorea_half4_rte(double4 data, size_t offset, half *p);\n" |
40422 | "void __ovld vstorea_half8_rte(double8 data, size_t offset, half *p);\n" |
40423 | "void __ovld vstorea_half16_rte(double16 data, size_t offset, half *p);\n" |
40424 | "\n" |
40425 | "void __ovld vstorea_half_rtz(double data, size_t offset, half *p);\n" |
40426 | "void __ovld vstorea_half2_rtz(double2 data, size_t offset, half *p);\n" |
40427 | "void __ovld vstorea_half3_rtz(double3 data, size_t offset, half *p);\n" |
40428 | "void __ovld vstorea_half4_rtz(double4 data, size_t offset, half *p);\n" |
40429 | "void __ovld vstorea_half8_rtz(double8 data, size_t offset, half *p);\n" |
40430 | "void __ovld vstorea_half16_rtz(double16 data, size_t offset, half *p);\n" |
40431 | "\n" |
40432 | "void __ovld vstorea_half_rtp(double data, size_t offset, half *p);\n" |
40433 | "void __ovld vstorea_half2_rtp(double2 data, size_t offset, half *p);\n" |
40434 | "void __ovld vstorea_half3_rtp(double3 data, size_t offset, half *p);\n" |
40435 | "void __ovld vstorea_half4_rtp(double4 data, size_t offset, half *p);\n" |
40436 | "void __ovld vstorea_half8_rtp(double8 data, size_t offset, half *p);\n" |
40437 | "void __ovld vstorea_half16_rtp(double16 data, size_t offset, half *p);\n" |
40438 | "\n" |
40439 | "void __ovld vstorea_half_rtn(double data, size_t offset, half *p);\n" |
40440 | "void __ovld vstorea_half2_rtn(double2 data, size_t offset, half *p);\n" |
40441 | "void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p);\n" |
40442 | "void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p);\n" |
40443 | "void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p);\n" |
40444 | "void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p);\n" |
40445 | "#endif //cl_khr_fp64\n" |
40446 | "\n" |
40447 | "#else\n" |
40448 | "void __ovld vstorea_half(float data, size_t offset, __global half *p);\n" |
40449 | "void __ovld vstorea_half2(float2 data, size_t offset, __global half *p);\n" |
40450 | "void __ovld vstorea_half3(float3 data, size_t offset, __global half *p);\n" |
40451 | "void __ovld vstorea_half4(float4 data, size_t offset, __global half *p);\n" |
40452 | "void __ovld vstorea_half8(float8 data, size_t offset, __global half *p);\n" |
40453 | "void __ovld vstorea_half16(float16 data, size_t offset, __global half *p);\n" |
40454 | "\n" |
40455 | "void __ovld vstorea_half_rte(float data, size_t offset, __global half *p);\n" |
40456 | "void __ovld vstorea_half2_rte(float2 data, size_t offset, __global half *p);\n" |
40457 | "void __ovld vstorea_half3_rte(float3 data, size_t offset, __global half *p);\n" |
40458 | "void __ovld vstorea_half4_rte(float4 data, size_t offset, __global half *p);\n" |
40459 | "void __ovld vstorea_half8_rte(float8 data, size_t offset, __global half *p);\n" |
40460 | "void __ovld vstorea_half16_rte(float16 data, size_t offset, __global half *p);\n" |
40461 | "\n" |
40462 | "void __ovld vstorea_half_rtz(float data, size_t offset, __global half *p);\n" |
40463 | "void __ovld vstorea_half2_rtz(float2 data, size_t offset, __global half *p);\n" |
40464 | "void __ovld vstorea_half3_rtz(float3 data, size_t offset, __global half *p);\n" |
40465 | "void __ovld vstorea_half4_rtz(float4 data, size_t offset, __global half *p);\n" |
40466 | "void __ovld vstorea_half8_rtz(float8 data, size_t offset, __global half *p);\n" |
40467 | "void __ovld vstorea_half16_rtz(float16 data, size_t offset, __global half *p);\n" |
40468 | "\n" |
40469 | "void __ovld vstorea_half_rtp(float data, size_t offset, __global half *p);\n" |
40470 | "void __ovld vstorea_half2_rtp(float2 data, size_t offset, __global half *p);\n" |
40471 | "void __ovld vstorea_half3_rtp(float3 data, size_t offset, __global half *p);\n" |
40472 | "void __ovld vstorea_half4_rtp(float4 data, size_t offset, __global half *p);\n" |
40473 | "void __ovld vstorea_half8_rtp(float8 data, size_t offset, __global half *p);\n" |
40474 | "void __ovld vstorea_half16_rtp(float16 data, size_t offset, __global half *p);\n" |
40475 | "\n" |
40476 | "void __ovld vstorea_half_rtn(float data, size_t offset, __global half *p);\n" |
40477 | "void __ovld vstorea_half2_rtn(float2 data, size_t offset, __global half *p);\n" |
40478 | "void __ovld vstorea_half3_rtn(float3 data, size_t offset, __global half *p);\n" |
40479 | "void __ovld vstorea_half4_rtn(float4 data, size_t offset, __global half *p);\n" |
40480 | "void __ovld vstorea_half8_rtn(float8 data, size_t offset, __global half *p);\n" |
40481 | "void __ovld vstorea_half16_rtn(float16 data, size_t offset, __global half *p);\n" |
40482 | "\n" |
40483 | "void __ovld vstorea_half(float data, size_t offset, __local half *p);\n" |
40484 | "void __ovld vstorea_half2(float2 data, size_t offset, __local half *p);\n" |
40485 | "void __ovld vstorea_half3(float3 data, size_t offset, __local half *p);\n" |
40486 | "void __ovld vstorea_half4(float4 data, size_t offset, __local half *p);\n" |
40487 | "void __ovld vstorea_half8(float8 data, size_t offset, __local half *p);\n" |
40488 | "void __ovld vstorea_half16(float16 data, size_t offset, __local half *p);\n" |
40489 | "\n" |
40490 | "void __ovld vstorea_half_rte(float data, size_t offset, __local half *p);\n" |
40491 | "void __ovld vstorea_half2_rte(float2 data, size_t offset, __local half *p);\n" |
40492 | "void __ovld vstorea_half3_rte(float3 data, size_t offset, __local half *p);\n" |
40493 | "void __ovld vstorea_half4_rte(float4 data, size_t offset, __local half *p);\n" |
40494 | "void __ovld vstorea_half8_rte(float8 data, size_t offset, __local half *p);\n" |
40495 | "void __ovld vstorea_half16_rte(float16 data, size_t offset, __local half *p);\n" |
40496 | "\n" |
40497 | "void __ovld vstorea_half_rtz(float data, size_t offset, __local half *p);\n" |
40498 | "void __ovld vstorea_half2_rtz(float2 data, size_t offset, __local half *p);\n" |
40499 | "void __ovld vstorea_half3_rtz(float3 data, size_t offset, __local half *p);\n" |
40500 | "void __ovld vstorea_half4_rtz(float4 data, size_t offset, __local half *p);\n" |
40501 | "void __ovld vstorea_half8_rtz(float8 data, size_t offset, __local half *p);\n" |
40502 | "void __ovld vstorea_half16_rtz(float16 data, size_t offset, __local half *p);\n" |
40503 | "\n" |
40504 | "void __ovld vstorea_half_rtp(float data, size_t offset, __local half *p);\n" |
40505 | "void __ovld vstorea_half2_rtp(float2 data, size_t offset, __local half *p);\n" |
40506 | "void __ovld vstorea_half3_rtp(float3 data, size_t offset, __local half *p);\n" |
40507 | "void __ovld vstorea_half4_rtp(float4 data, size_t offset, __local half *p);\n" |
40508 | "void __ovld vstorea_half8_rtp(float8 data, size_t offset, __local half *p);\n" |
40509 | "void __ovld vstorea_half16_rtp(float16 data, size_t offset, __local half *p);\n" |
40510 | "\n" |
40511 | "void __ovld vstorea_half_rtn(float data, size_t offset, __local half *p);\n" |
40512 | "void __ovld vstorea_half2_rtn(float2 data, size_t offset, __local half *p);\n" |
40513 | "void __ovld vstorea_half3_rtn(float3 data, size_t offset, __local half *p);\n" |
40514 | "void __ovld vstorea_half4_rtn(float4 data, size_t offset, __local half *p);\n" |
40515 | "void __ovld vstorea_half8_rtn(float8 data, size_t offset, __local half *p);\n" |
40516 | "void __ovld vstorea_half16_rtn(float16 data, size_t offset, __local half *p);\n" |
40517 | "\n" |
40518 | "void __ovld vstorea_half(float data, size_t offset, __private half *p);\n" |
40519 | "void __ovld vstorea_half2(float2 data, size_t offset, __private half *p);\n" |
40520 | "void __ovld vstorea_half3(float3 data, size_t offset, __private half *p);\n" |
40521 | "void __ovld vstorea_half4(float4 data, size_t offset, __private half *p);\n" |
40522 | "void __ovld vstorea_half8(float8 data, size_t offset, __private half *p);\n" |
40523 | "void __ovld vstorea_half16(float16 data, size_t offset, __private half *p);\n" |
40524 | "\n" |
40525 | "void __ovld vstorea_half_rte(float data, size_t offset, __private half *p);\n" |
40526 | "void __ovld vstorea_half2_rte(float2 data, size_t offset, __private half *p);\n" |
40527 | "void __ovld vstorea_half3_rte(float3 data, size_t offset, __private half *p);\n" |
40528 | "void __ovld vstorea_half4_rte(float4 data, size_t offset, __private half *p);\n" |
40529 | "void __ovld vstorea_half8_rte(float8 data, size_t offset, __private half *p);\n" |
40530 | "void __ovld vstorea_half16_rte(float16 data, size_t offset, __private half *p);\n" |
40531 | "\n" |
40532 | "void __ovld vstorea_half_rtz(float data, size_t offset, __private half *p);\n" |
40533 | "void __ovld vstorea_half2_rtz(float2 data, size_t offset, __private half *p);\n" |
40534 | "void __ovld vstorea_half3_rtz(float3 data, size_t offset, __private half *p);\n" |
40535 | "void __ovld vstorea_half4_rtz(float4 data, size_t offset, __private half *p);\n" |
40536 | "void __ovld vstorea_half8_rtz(float8 data, size_t offset, __private half *p);\n" |
40537 | "void __ovld vstorea_half16_rtz(float16 data, size_t offset, __private half *p);\n" |
40538 | "\n" |
40539 | "void __ovld vstorea_half_rtp(float data, size_t offset, __private half *p);\n" |
40540 | "void __ovld vstorea_half2_rtp(float2 data, size_t offset, __private half *p);\n" |
40541 | "void __ovld vstorea_half3_rtp(float3 data, size_t offset, __private half *p);\n" |
40542 | "void __ovld vstorea_half4_rtp(float4 data, size_t offset, __private half *p);\n" |
40543 | "void __ovld vstorea_half8_rtp(float8 data, size_t offset, __private half *p);\n" |
40544 | "void __ovld vstorea_half16_rtp(float16 data, size_t offset, __private half *p);\n" |
40545 | "\n" |
40546 | "void __ovld vstorea_half_rtn(float data, size_t offset, __private half *p);\n" |
40547 | "void __ovld vstorea_half2_rtn(float2 data, size_t offset, __private half *p);\n" |
40548 | "void __ovld vstorea_half3_rtn(float3 data, size_t offset, __private half *p);\n" |
40549 | "void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p);\n" |
40550 | "void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p);\n" |
40551 | "void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p);\n" |
40552 | "\n" |
40553 | "#ifdef cl_khr_fp64\n" |
40554 | "void __ovld vstorea_half(double data, size_t offset, __global half *p);\n" |
40555 | "void __ovld vstorea_half2(double2 data, size_t offset, __global half *p);\n" |
40556 | "void __ovld vstorea_half3(double3 data, size_t offset, __global half *p);\n" |
40557 | "void __ovld vstorea_half4(double4 data, size_t offset, __global half *p);\n" |
40558 | "void __ovld vstorea_half8(double8 data, size_t offset, __global half *p);\n" |
40559 | "void __ovld vstorea_half16(double16 data, size_t offset, __global half *p);\n" |
40560 | "\n" |
40561 | "void __ovld vstorea_half_rte(double data, size_t offset, __global half *p);\n" |
40562 | "void __ovld vstorea_half2_rte(double2 data, size_t offset, __global half *p);\n" |
40563 | "void __ovld vstorea_half3_rte(double3 data, size_t offset, __global half *p);\n" |
40564 | "void __ovld vstorea_half4_rte(double4 data, size_t offset, __global half *p);\n" |
40565 | "void __ovld vstorea_half8_rte(double8 data, size_t offset, __global half *p);\n" |
40566 | "void __ovld vstorea_half16_rte(double16 data, size_t offset, __global half *p);\n" |
40567 | "\n" |
40568 | "void __ovld vstorea_half_rtz(double data, size_t offset, __global half *p);\n" |
40569 | "void __ovld vstorea_half2_rtz(double2 data, size_t offset, __global half *p);\n" |
40570 | "void __ovld vstorea_half3_rtz(double3 data, size_t offset, __global half *p);\n" |
40571 | "void __ovld vstorea_half4_rtz(double4 data, size_t offset, __global half *p);\n" |
40572 | "void __ovld vstorea_half8_rtz(double8 data, size_t offset, __global half *p);\n" |
40573 | "void __ovld vstorea_half16_rtz(double16 data, size_t offset, __global half *p);\n" |
40574 | "\n" |
40575 | "void __ovld vstorea_half_rtp(double data, size_t offset, __global half *p);\n" |
40576 | "void __ovld vstorea_half2_rtp(double2 data, size_t offset, __global half *p);\n" |
40577 | "void __ovld vstorea_half3_rtp(double3 data, size_t offset, __global half *p);\n" |
40578 | "void __ovld vstorea_half4_rtp(double4 data, size_t offset, __global half *p);\n" |
40579 | "void __ovld vstorea_half8_rtp(double8 data, size_t offset, __global half *p);\n" |
40580 | "void __ovld vstorea_half16_rtp(double16 data, size_t offset, __global half *p);\n" |
40581 | "\n" |
40582 | "void __ovld vstorea_half_rtn(double data, size_t offset, __global half *p);\n" |
40583 | "void __ovld vstorea_half2_rtn(double2 data, size_t offset, __global half *p);\n" |
40584 | "void __ovld vstorea_half3_rtn(double3 data, size_t offset, __global half *p);\n" |
40585 | "void __ovld vstorea_half4_rtn(double4 data, size_t offset, __global half *p);\n" |
40586 | "void __ovld vstorea_half8_rtn(double8 data, size_t offset, __global half *p);\n" |
40587 | "void __ovld vstorea_half16_rtn(double16 data, size_t offset, __global half *p);\n" |
40588 | "\n" |
40589 | "void __ovld vstorea_half(double data, size_t offset, __local half *p);\n" |
40590 | "void __ovld vstorea_half2(double2 data, size_t offset, __local half *p);\n" |
40591 | "void __ovld vstorea_half3(double3 data, size_t offset, __local half *p);\n" |
40592 | "void __ovld vstorea_half4(double4 data, size_t offset, __local half *p);\n" |
40593 | "void __ovld vstorea_half8(double8 data, size_t offset, __local half *p);\n" |
40594 | "void __ovld vstorea_half16(double16 data, size_t offset, __local half *p);\n" |
40595 | "\n" |
40596 | "void __ovld vstorea_half_rte(double data, size_t offset, __local half *p);\n" |
40597 | "void __ovld vstorea_half2_rte(double2 data, size_t offset, __local half *p);\n" |
40598 | "void __ovld vstorea_half3_rte(double3 data, size_t offset, __local half *p);\n" |
40599 | "void __ovld vstorea_half4_rte(double4 data, size_t offset, __local half *p);\n" |
40600 | "void __ovld vstorea_half8_rte(double8 data, size_t offset, __local half *p);\n" |
40601 | "void __ovld vstorea_half16_rte(double16 data, size_t offset, __local half *p);\n" |
40602 | "\n" |
40603 | "void __ovld vstorea_half_rtz(double data, size_t offset, __local half *p);\n" |
40604 | "void __ovld vstorea_half2_rtz(double2 data, size_t offset, __local half *p);\n" |
40605 | "void __ovld vstorea_half3_rtz(double3 data, size_t offset, __local half *p);\n" |
40606 | "void __ovld vstorea_half4_rtz(double4 data, size_t offset, __local half *p);\n" |
40607 | "void __ovld vstorea_half8_rtz(double8 data, size_t offset, __local half *p);\n" |
40608 | "void __ovld vstorea_half16_rtz(double16 data, size_t offset, __local half *p);\n" |
40609 | "\n" |
40610 | "void __ovld vstorea_half_rtp(double data, size_t offset, __local half *p);\n" |
40611 | "void __ovld vstorea_half2_rtp(double2 data, size_t offset, __local half *p);\n" |
40612 | "void __ovld vstorea_half3_rtp(double3 data, size_t offset, __local half *p);\n" |
40613 | "void __ovld vstorea_half4_rtp(double4 data, size_t offset, __local half *p);\n" |
40614 | "void __ovld vstorea_half8_rtp(double8 data, size_t offset, __local half *p);\n" |
40615 | "void __ovld vstorea_half16_rtp(double16 data, size_t offset, __local half *p);\n" |
40616 | "\n" |
40617 | "void __ovld vstorea_half_rtn(double data, size_t offset, __local half *p);\n" |
40618 | "void __ovld vstorea_half2_rtn(double2 data, size_t offset, __local half *p);\n" |
40619 | "void __ovld vstorea_half3_rtn(double3 data, size_t offset, __local half *p);\n" |
40620 | "void __ovld vstorea_half4_rtn(double4 data, size_t offset, __local half *p);\n" |
40621 | "void __ovld vstorea_half8_rtn(double8 data, size_t offset, __local half *p);\n" |
40622 | "void __ovld vstorea_half16_rtn(double16 data, size_t offset, __local half *p);\n" |
40623 | "\n" |
40624 | "void __ovld vstorea_half(double data, size_t offset, __private half *p);\n" |
40625 | "void __ovld vstorea_half2(double2 data, size_t offset, __private half *p);\n" |
40626 | "void __ovld vstorea_half3(double3 data, size_t offset, __private half *p);\n" |
40627 | "void __ovld vstorea_half4(double4 data, size_t offset, __private half *p);\n" |
40628 | "void __ovld vstorea_half8(double8 data, size_t offset, __private half *p);\n" |
40629 | "void __ovld vstorea_half16(double16 data, size_t offset, __private half *p);\n" |
40630 | "\n" |
40631 | "void __ovld vstorea_half_rte(double data, size_t offset, __private half *p);\n" |
40632 | "void __ovld vstorea_half2_rte(double2 data, size_t offset, __private half *p);\n" |
40633 | "void __ovld vstorea_half3_rte(double3 data, size_t offset, __private half *p);\n" |
40634 | "void __ovld vstorea_half4_rte(double4 data, size_t offset, __private half *p);\n" |
40635 | "void __ovld vstorea_half8_rte(double8 data, size_t offset, __private half *p);\n" |
40636 | "void __ovld vstorea_half16_rte(double16 data, size_t offset, __private half *p);\n" |
40637 | "\n" |
40638 | "void __ovld vstorea_half_rtz(double data, size_t offset, __private half *p);\n" |
40639 | "void __ovld vstorea_half2_rtz(double2 data, size_t offset, __private half *p);\n" |
40640 | "void __ovld vstorea_half3_rtz(double3 data, size_t offset, __private half *p);\n" |
40641 | "void __ovld vstorea_half4_rtz(double4 data, size_t offset, __private half *p);\n" |
40642 | "void __ovld vstorea_half8_rtz(double8 data, size_t offset, __private half *p);\n" |
40643 | "void __ovld vstorea_half16_rtz(double16 data, size_t offset, __private half *p);\n" |
40644 | "\n" |
40645 | "void __ovld vstorea_half_rtp(double data, size_t offset, __private half *p);\n" |
40646 | "void __ovld vstorea_half2_rtp(double2 data, size_t offset, __private half *p);\n" |
40647 | "void __ovld vstorea_half3_rtp(double3 data, size_t offset, __private half *p);\n" |
40648 | "void __ovld vstorea_half4_rtp(double4 data, size_t offset, __private half *p);\n" |
40649 | "void __ovld vstorea_half8_rtp(double8 data, size_t offset, __private half *p);\n" |
40650 | "void __ovld vstorea_half16_rtp(double16 data, size_t offset, __private half *p);\n" |
40651 | "\n" |
40652 | "void __ovld vstorea_half_rtn(double data, size_t offset, __private half *p);\n" |
40653 | "void __ovld vstorea_half2_rtn(double2 data,size_t offset, __private half *p);\n" |
40654 | "void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p);\n" |
40655 | "void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);\n" |
40656 | "void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p);\n" |
40657 | "void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);\n" |
40658 | "#endif //cl_khr_fp64\n" |
40659 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40660 | "\n" |
40661 | "// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions\n" |
40662 | "\n" |
40663 | "// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence\n" |
40664 | "typedef uint cl_mem_fence_flags;\n" |
40665 | "\n" |
40666 | "/**\n" |
40667 | " * Queue a memory fence to ensure correct\n" |
40668 | " * ordering of memory operations to local memory\n" |
40669 | " */\n" |
40670 | "#define CLK_LOCAL_MEM_FENCE 0x01\n" |
40671 | "\n" |
40672 | "/**\n" |
40673 | " * Queue a memory fence to ensure correct\n" |
40674 | " * ordering of memory operations to global memory\n" |
40675 | " */\n" |
40676 | "#define CLK_GLOBAL_MEM_FENCE 0x02\n" |
40677 | "\n" |
40678 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40679 | "/**\n" |
40680 | " * Queue a memory fence to ensure correct ordering of memory\n" |
40681 | " * operations between work-items of a work-group to\n" |
40682 | " * image memory.\n" |
40683 | " */\n" |
40684 | "#define CLK_IMAGE_MEM_FENCE 0x04\n" |
40685 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40686 | "\n" |
40687 | "/**\n" |
40688 | " * All work-items in a work-group executing the kernel\n" |
40689 | " * on a processor must execute this function before any\n" |
40690 | " * are allowed to continue execution beyond the barrier.\n" |
40691 | " * This function must be encountered by all work-items in\n" |
40692 | " * a work-group executing the kernel.\n" |
40693 | " * If barrier is inside a conditional statement, then all\n" |
40694 | " * work-items must enter the conditional if any work-item\n" |
40695 | " * enters the conditional statement and executes the\n" |
40696 | " * barrier.\n" |
40697 | " * If barrer is inside a loop, all work-items must execute\n" |
40698 | " * the barrier for each iteration of the loop before any are\n" |
40699 | " * allowed to continue execution beyond the barrier.\n" |
40700 | " * The barrier function also queues a memory fence\n" |
40701 | " * (reads and writes) to ensure correct ordering of\n" |
40702 | " * memory operations to local or global memory.\n" |
40703 | " * The flags argument specifies the memory address space\n" |
40704 | " * and can be set to a combination of the following literal\n" |
40705 | " * values.\n" |
40706 | " * CLK_LOCAL_MEM_FENCE - The barrier function\n" |
40707 | " * will either flush any variables stored in local memory\n" |
40708 | " * or queue a memory fence to ensure correct ordering of\n" |
40709 | " * memory operations to local memory.\n" |
40710 | " * CLK_GLOBAL_MEM_FENCE - The barrier function\n" |
40711 | " * will queue a memory fence to ensure correct ordering\n" |
40712 | " * of memory operations to global memory. This can be\n" |
40713 | " * useful when work-items, for example, write to buffer or\n" |
40714 | " * image objects and then want to read the updated data.\n" |
40715 | " */\n" |
40716 | "\n" |
40717 | "void __ovld __conv barrier(cl_mem_fence_flags flags);\n" |
40718 | "\n" |
40719 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40720 | "\n" |
40721 | "typedef enum memory_scope {\n" |
40722 | " memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,\n" |
40723 | " memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,\n" |
40724 | " memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,\n" |
40725 | " memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,\n" |
40726 | "#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n" |
40727 | " memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP\n" |
40728 | "#endif\n" |
40729 | "} memory_scope;\n" |
40730 | "\n" |
40731 | "void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n" |
40732 | "void __ovld __conv work_group_barrier(cl_mem_fence_flags flags);\n" |
40733 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40734 | "\n" |
40735 | "// OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions\n" |
40736 | "\n" |
40737 | "/**\n" |
40738 | " * Orders loads and stores of a work-item\n" |
40739 | " * executing a kernel. This means that loads\n" |
40740 | " * and stores preceding the mem_fence will\n" |
40741 | " * be committed to memory before any loads\n" |
40742 | " * and stores following the mem_fence.\n" |
40743 | " * The flags argument specifies the memory\n" |
40744 | " * address space and can be set to a\n" |
40745 | " * combination of the following literal\n" |
40746 | " * values:\n" |
40747 | " * CLK_LOCAL_MEM_FENCE\n" |
40748 | " * CLK_GLOBAL_MEM_FENCE.\n" |
40749 | " */\n" |
40750 | "void __ovld mem_fence(cl_mem_fence_flags flags);\n" |
40751 | "\n" |
40752 | "/**\n" |
40753 | " * Read memory barrier that orders only\n" |
40754 | " * loads.\n" |
40755 | " * The flags argument specifies the memory\n" |
40756 | " * address space and can be set to a\n" |
40757 | " * combination of the following literal\n" |
40758 | " * values:\n" |
40759 | " * CLK_LOCAL_MEM_FENCE\n" |
40760 | " * CLK_GLOBAL_MEM_FENCE.\n" |
40761 | " */\n" |
40762 | "void __ovld read_mem_fence(cl_mem_fence_flags flags);\n" |
40763 | "\n" |
40764 | "/**\n" |
40765 | " * Write memory barrier that orders only\n" |
40766 | " * stores.\n" |
40767 | " * The flags argument specifies the memory\n" |
40768 | " * address space and can be set to a\n" |
40769 | " * combination of the following literal\n" |
40770 | " * values:\n" |
40771 | " * CLK_LOCAL_MEM_FENCE\n" |
40772 | " * CLK_GLOBAL_MEM_FENCE.\n" |
40773 | " */\n" |
40774 | "void __ovld write_mem_fence(cl_mem_fence_flags flags);\n" |
40775 | "\n" |
40776 | "// OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions\n" |
40777 | "\n" |
40778 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40779 | "cl_mem_fence_flags __ovld get_fence(const void *ptr);\n" |
40780 | "cl_mem_fence_flags __ovld get_fence(void *ptr);\n" |
40781 | "\n" |
40782 | "/**\n" |
40783 | " * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions\n" |
40784 | " * and checked in Sema since they should be declared as\n" |
40785 | " * addr gentype* to_addr (gentype*);\n" |
40786 | " * where gentype is builtin type or user defined type.\n" |
40787 | " */\n" |
40788 | "\n" |
40789 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
40790 | "\n" |
40791 | "// OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch\n" |
40792 | "\n" |
40793 | "/**\n" |
40794 | " * event_t async_work_group_copy (\n" |
40795 | " * __global gentype *dst,\n" |
40796 | " * const __local gentype *src,\n" |
40797 | " * size_t num_elements,\n" |
40798 | " * event_t event)\n" |
40799 | " * Perform an async copy of num_elements\n" |
40800 | " * gentype elements from src to dst. The async\n" |
40801 | " * copy is performed by all work-items in a workgroup\n" |
40802 | " * and this built-in function must therefore\n" |
40803 | " * be encountered by all work-items in a workgroup\n" |
40804 | " * executing the kernel with the same\n" |
40805 | " * argument values; otherwise the results are\n" |
40806 | " * undefined.\n" |
40807 | " * Returns an event object that can be used by\n" |
40808 | " * wait_group_events to wait for the async copy\n" |
40809 | " * to finish. The event argument can also be used\n" |
40810 | " * to associate the async_work_group_copy with\n" |
40811 | " * a previous async copy allowing an event to be\n" |
40812 | " * shared by multiple async copies; otherwise event\n" |
40813 | " * should be zero.\n" |
40814 | " * If event argument is non-zero, the event object\n" |
40815 | " * supplied in event argument will be returned.\n" |
40816 | " * This function does not perform any implicit\n" |
40817 | " * synchronization of source data such as using a\n" |
40818 | " * barrier before performing the copy.\n" |
40819 | " */\n" |
40820 | "event_t __ovld async_work_group_copy(__local char *dst, const __global char *src, size_t num_elements, event_t event);\n" |
40821 | "event_t __ovld async_work_group_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, event_t event);\n" |
40822 | "event_t __ovld async_work_group_copy(__local short *dst, const __global short *src, size_t num_elements, event_t event);\n" |
40823 | "event_t __ovld async_work_group_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, event_t event);\n" |
40824 | "event_t __ovld async_work_group_copy(__local int *dst, const __global int *src, size_t num_elements, event_t event);\n" |
40825 | "event_t __ovld async_work_group_copy(__local uint *dst, const __global uint *src, size_t num_elements, event_t event);\n" |
40826 | "event_t __ovld async_work_group_copy(__local long *dst, const __global long *src, size_t num_elements, event_t event);\n" |
40827 | "event_t __ovld async_work_group_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, event_t event);\n" |
40828 | "event_t __ovld async_work_group_copy(__local float *dst, const __global float *src, size_t num_elements, event_t event);\n" |
40829 | "event_t __ovld async_work_group_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, event_t event);\n" |
40830 | "event_t __ovld async_work_group_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, event_t event);\n" |
40831 | "event_t __ovld async_work_group_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, event_t event);\n" |
40832 | "event_t __ovld async_work_group_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, event_t event);\n" |
40833 | "event_t __ovld async_work_group_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, event_t event);\n" |
40834 | "event_t __ovld async_work_group_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, event_t event);\n" |
40835 | "event_t __ovld async_work_group_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, event_t event);\n" |
40836 | "event_t __ovld async_work_group_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, event_t event);\n" |
40837 | "event_t __ovld async_work_group_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, event_t event);\n" |
40838 | "event_t __ovld async_work_group_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, event_t event);\n" |
40839 | "event_t __ovld async_work_group_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, event_t event);\n" |
40840 | "event_t __ovld async_work_group_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, event_t event);\n" |
40841 | "event_t __ovld async_work_group_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, event_t event);\n" |
40842 | "event_t __ovld async_work_group_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, event_t event);\n" |
40843 | "event_t __ovld async_work_group_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, event_t event);\n" |
40844 | "event_t __ovld async_work_group_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, event_t event);\n" |
40845 | "event_t __ovld async_work_group_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, event_t event);\n" |
40846 | "event_t __ovld async_work_group_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, event_t event);\n" |
40847 | "event_t __ovld async_work_group_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, event_t event);\n" |
40848 | "event_t __ovld async_work_group_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, event_t event);\n" |
40849 | "event_t __ovld async_work_group_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, event_t event);\n" |
40850 | "event_t __ovld async_work_group_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, event_t event);\n" |
40851 | "event_t __ovld async_work_group_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, event_t event);\n" |
40852 | "event_t __ovld async_work_group_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, event_t event);\n" |
40853 | "event_t __ovld async_work_group_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, event_t event);\n" |
40854 | "event_t __ovld async_work_group_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, event_t event);\n" |
40855 | "event_t __ovld async_work_group_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, event_t event);\n" |
40856 | "event_t __ovld async_work_group_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, event_t event);\n" |
40857 | "event_t __ovld async_work_group_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, event_t event);\n" |
40858 | "event_t __ovld async_work_group_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, event_t event);\n" |
40859 | "event_t __ovld async_work_group_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, event_t event);\n" |
40860 | "event_t __ovld async_work_group_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, event_t event);\n" |
40861 | "event_t __ovld async_work_group_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, event_t event);\n" |
40862 | "event_t __ovld async_work_group_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, event_t event);\n" |
40863 | "event_t __ovld async_work_group_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, event_t event);\n" |
40864 | "event_t __ovld async_work_group_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, event_t event);\n" |
40865 | "event_t __ovld async_work_group_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, event_t event);\n" |
40866 | "event_t __ovld async_work_group_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, event_t event);\n" |
40867 | "event_t __ovld async_work_group_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, event_t event);\n" |
40868 | "event_t __ovld async_work_group_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, event_t event);\n" |
40869 | "event_t __ovld async_work_group_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, event_t event);\n" |
40870 | "event_t __ovld async_work_group_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, event_t event);\n" |
40871 | "event_t __ovld async_work_group_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, event_t event);\n" |
40872 | "event_t __ovld async_work_group_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, event_t event);\n" |
40873 | "event_t __ovld async_work_group_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, event_t event);\n" |
40874 | "event_t __ovld async_work_group_copy(__global char *dst, const __local char *src, size_t num_elements, event_t event);\n" |
40875 | "event_t __ovld async_work_group_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, event_t event);\n" |
40876 | "event_t __ovld async_work_group_copy(__global short *dst, const __local short *src, size_t num_elements, event_t event);\n" |
40877 | "event_t __ovld async_work_group_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, event_t event);\n" |
40878 | "event_t __ovld async_work_group_copy(__global int *dst, const __local int *src, size_t num_elements, event_t event);\n" |
40879 | "event_t __ovld async_work_group_copy(__global uint *dst, const __local uint *src, size_t num_elements, event_t event);\n" |
40880 | "event_t __ovld async_work_group_copy(__global long *dst, const __local long *src, size_t num_elements, event_t event);\n" |
40881 | "event_t __ovld async_work_group_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, event_t event);\n" |
40882 | "event_t __ovld async_work_group_copy(__global float *dst, const __local float *src, size_t num_elements, event_t event);\n" |
40883 | "event_t __ovld async_work_group_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, event_t event);\n" |
40884 | "event_t __ovld async_work_group_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, event_t event);\n" |
40885 | "event_t __ovld async_work_group_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, event_t event);\n" |
40886 | "event_t __ovld async_work_group_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, event_t event);\n" |
40887 | "event_t __ovld async_work_group_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, event_t event);\n" |
40888 | "event_t __ovld async_work_group_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, event_t event);\n" |
40889 | "event_t __ovld async_work_group_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, event_t event);\n" |
40890 | "event_t __ovld async_work_group_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, event_t event);\n" |
40891 | "event_t __ovld async_work_group_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, event_t event);\n" |
40892 | "event_t __ovld async_work_group_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, event_t event);\n" |
40893 | "event_t __ovld async_work_group_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, event_t event);\n" |
40894 | "event_t __ovld async_work_group_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, event_t event);\n" |
40895 | "event_t __ovld async_work_group_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, event_t event);\n" |
40896 | "event_t __ovld async_work_group_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, event_t event);\n" |
40897 | "event_t __ovld async_work_group_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, event_t event);\n" |
40898 | "event_t __ovld async_work_group_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, event_t event);\n" |
40899 | "event_t __ovld async_work_group_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, event_t event);\n" |
40900 | "event_t __ovld async_work_group_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, event_t event);\n" |
40901 | "event_t __ovld async_work_group_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, event_t event);\n" |
40902 | "event_t __ovld async_work_group_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, event_t event);\n" |
40903 | "event_t __ovld async_work_group_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, event_t event);\n" |
40904 | "event_t __ovld async_work_group_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, event_t event);\n" |
40905 | "event_t __ovld async_work_group_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, event_t event);\n" |
40906 | "event_t __ovld async_work_group_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, event_t event);\n" |
40907 | "event_t __ovld async_work_group_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, event_t event);\n" |
40908 | "event_t __ovld async_work_group_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, event_t event);\n" |
40909 | "event_t __ovld async_work_group_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, event_t event);\n" |
40910 | "event_t __ovld async_work_group_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, event_t event);\n" |
40911 | "event_t __ovld async_work_group_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, event_t event);\n" |
40912 | "event_t __ovld async_work_group_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, event_t event);\n" |
40913 | "event_t __ovld async_work_group_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, event_t event);\n" |
40914 | "event_t __ovld async_work_group_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, event_t event);\n" |
40915 | "event_t __ovld async_work_group_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, event_t event);\n" |
40916 | "event_t __ovld async_work_group_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, event_t event);\n" |
40917 | "event_t __ovld async_work_group_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, event_t event);\n" |
40918 | "event_t __ovld async_work_group_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, event_t event);\n" |
40919 | "event_t __ovld async_work_group_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, event_t event);\n" |
40920 | "event_t __ovld async_work_group_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, event_t event);\n" |
40921 | "event_t __ovld async_work_group_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, event_t event);\n" |
40922 | "event_t __ovld async_work_group_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, event_t event);\n" |
40923 | "event_t __ovld async_work_group_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, event_t event);\n" |
40924 | "event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, event_t event);\n" |
40925 | "event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event);\n" |
40926 | "event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event);\n" |
40927 | "event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event);\n" |
40928 | "#ifdef cl_khr_fp64\n" |
40929 | "event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event);\n" |
40930 | "event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event);\n" |
40931 | "event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event);\n" |
40932 | "event_t __ovld async_work_group_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, event_t event);\n" |
40933 | "event_t __ovld async_work_group_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, event_t event);\n" |
40934 | "event_t __ovld async_work_group_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, event_t event);\n" |
40935 | "event_t __ovld async_work_group_copy(__global double *dst, const __local double *src, size_t num_elements, event_t event);\n" |
40936 | "event_t __ovld async_work_group_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, event_t event);\n" |
40937 | "event_t __ovld async_work_group_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, event_t event);\n" |
40938 | "event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event);\n" |
40939 | "event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event);\n" |
40940 | "event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event);\n" |
40941 | "#endif //cl_khr_fp64\n" |
40942 | "#ifdef cl_khr_fp16\n" |
40943 | "event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event);\n" |
40944 | "event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event);\n" |
40945 | "event_t __ovld async_work_group_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, event_t event);\n" |
40946 | "event_t __ovld async_work_group_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, event_t event);\n" |
40947 | "event_t __ovld async_work_group_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, event_t event);\n" |
40948 | "event_t __ovld async_work_group_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, event_t event);\n" |
40949 | "event_t __ovld async_work_group_copy(__global half *dst, const __local half *src, size_t num_elements, event_t event);\n" |
40950 | "event_t __ovld async_work_group_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, event_t event);\n" |
40951 | "event_t __ovld async_work_group_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, event_t event);\n" |
40952 | "event_t __ovld async_work_group_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, event_t event);\n" |
40953 | "event_t __ovld async_work_group_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, event_t event);\n" |
40954 | "event_t __ovld async_work_group_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, event_t event);\n" |
40955 | "#endif //cl_khr_fp16\n" |
40956 | "\n" |
40957 | "/**\n" |
40958 | " * Perform an async gather of num_elements\n" |
40959 | " * gentype elements from src to dst. The\n" |
40960 | " * src_stride is the stride in elements for each\n" |
40961 | " * gentype element read from src. The dst_stride\n" |
40962 | " * is the stride in elements for each gentype\n" |
40963 | " * element written to dst. The async gather is\n" |
40964 | " * performed by all work-items in a work-group.\n" |
40965 | " * This built-in function must therefore be\n" |
40966 | " * encountered by all work-items in a work-group\n" |
40967 | " * executing the kernel with the same argument\n" |
40968 | " * values; otherwise the results are undefined.\n" |
40969 | " * Returns an event object that can be used by\n" |
40970 | " * wait_group_events to wait for the async copy\n" |
40971 | " * to finish. The event argument can also be used\n" |
40972 | " * to associate the\n" |
40973 | " * async_work_group_strided_copy with a\n" |
40974 | " * previous async copy allowing an event to be\n" |
40975 | " * shared by multiple async copies; otherwise event\n" |
40976 | " * should be zero.\n" |
40977 | " * If event argument is non-zero, the event object\n" |
40978 | " * supplied in event argument will be returned.\n" |
40979 | " * This function does not perform any implicit\n" |
40980 | " * synchronization of source data such as using a\n" |
40981 | " * barrier before performing the copy.\n" |
40982 | " */\n" |
40983 | "event_t __ovld async_work_group_strided_copy(__local char *dst, const __global char *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40984 | "event_t __ovld async_work_group_strided_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40985 | "event_t __ovld async_work_group_strided_copy(__local short *dst, const __global short *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40986 | "event_t __ovld async_work_group_strided_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40987 | "event_t __ovld async_work_group_strided_copy(__local int *dst, const __global int *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40988 | "event_t __ovld async_work_group_strided_copy(__local uint *dst, const __global uint *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40989 | "event_t __ovld async_work_group_strided_copy(__local long *dst, const __global long *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40990 | "event_t __ovld async_work_group_strided_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40991 | "event_t __ovld async_work_group_strided_copy(__local float *dst, const __global float *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40992 | "event_t __ovld async_work_group_strided_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40993 | "event_t __ovld async_work_group_strided_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40994 | "event_t __ovld async_work_group_strided_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40995 | "event_t __ovld async_work_group_strided_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40996 | "event_t __ovld async_work_group_strided_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40997 | "event_t __ovld async_work_group_strided_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40998 | "event_t __ovld async_work_group_strided_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
40999 | "event_t __ovld async_work_group_strided_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41000 | "event_t __ovld async_work_group_strided_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41001 | "event_t __ovld async_work_group_strided_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41002 | "event_t __ovld async_work_group_strided_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41003 | "event_t __ovld async_work_group_strided_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41004 | "event_t __ovld async_work_group_strided_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41005 | "event_t __ovld async_work_group_strided_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41006 | "event_t __ovld async_work_group_strided_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41007 | "event_t __ovld async_work_group_strided_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41008 | "event_t __ovld async_work_group_strided_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41009 | "event_t __ovld async_work_group_strided_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41010 | "event_t __ovld async_work_group_strided_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41011 | "event_t __ovld async_work_group_strided_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41012 | "event_t __ovld async_work_group_strided_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41013 | "event_t __ovld async_work_group_strided_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41014 | "event_t __ovld async_work_group_strided_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41015 | "event_t __ovld async_work_group_strided_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41016 | "event_t __ovld async_work_group_strided_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41017 | "event_t __ovld async_work_group_strided_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41018 | "event_t __ovld async_work_group_strided_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41019 | "event_t __ovld async_work_group_strided_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41020 | "event_t __ovld async_work_group_strided_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41021 | "event_t __ovld async_work_group_strided_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41022 | "event_t __ovld async_work_group_strided_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41023 | "event_t __ovld async_work_group_strided_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41024 | "event_t __ovld async_work_group_strided_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41025 | "event_t __ovld async_work_group_strided_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41026 | "event_t __ovld async_work_group_strided_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41027 | "event_t __ovld async_work_group_strided_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41028 | "event_t __ovld async_work_group_strided_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41029 | "event_t __ovld async_work_group_strided_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41030 | "event_t __ovld async_work_group_strided_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41031 | "event_t __ovld async_work_group_strided_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41032 | "event_t __ovld async_work_group_strided_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41033 | "event_t __ovld async_work_group_strided_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41034 | "event_t __ovld async_work_group_strided_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41035 | "event_t __ovld async_work_group_strided_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41036 | "event_t __ovld async_work_group_strided_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41037 | "event_t __ovld async_work_group_strided_copy(__global char *dst, const __local char *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41038 | "event_t __ovld async_work_group_strided_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41039 | "event_t __ovld async_work_group_strided_copy(__global short *dst, const __local short *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41040 | "event_t __ovld async_work_group_strided_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41041 | "event_t __ovld async_work_group_strided_copy(__global int *dst, const __local int *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41042 | "event_t __ovld async_work_group_strided_copy(__global uint *dst, const __local uint *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41043 | "event_t __ovld async_work_group_strided_copy(__global long *dst, const __local long *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41044 | "event_t __ovld async_work_group_strided_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41045 | "event_t __ovld async_work_group_strided_copy(__global float *dst, const __local float *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41046 | "event_t __ovld async_work_group_strided_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41047 | "event_t __ovld async_work_group_strided_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41048 | "event_t __ovld async_work_group_strided_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41049 | "event_t __ovld async_work_group_strided_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41050 | "event_t __ovld async_work_group_strided_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41051 | "event_t __ovld async_work_group_strided_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41052 | "event_t __ovld async_work_group_strided_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41053 | "event_t __ovld async_work_group_strided_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41054 | "event_t __ovld async_work_group_strided_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41055 | "event_t __ovld async_work_group_strided_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41056 | "event_t __ovld async_work_group_strided_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41057 | "event_t __ovld async_work_group_strided_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41058 | "event_t __ovld async_work_group_strided_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41059 | "event_t __ovld async_work_group_strided_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41060 | "event_t __ovld async_work_group_strided_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41061 | "event_t __ovld async_work_group_strided_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41062 | "event_t __ovld async_work_group_strided_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41063 | "event_t __ovld async_work_group_strided_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41064 | "event_t __ovld async_work_group_strided_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41065 | "event_t __ovld async_work_group_strided_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41066 | "event_t __ovld async_work_group_strided_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41067 | "event_t __ovld async_work_group_strided_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41068 | "event_t __ovld async_work_group_strided_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41069 | "event_t __ovld async_work_group_strided_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41070 | "event_t __ovld async_work_group_strided_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41071 | "event_t __ovld async_work_group_strided_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41072 | "event_t __ovld async_work_group_strided_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41073 | "event_t __ovld async_work_group_strided_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41074 | "event_t __ovld async_work_group_strided_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41075 | "event_t __ovld async_work_group_strided_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41076 | "event_t __ovld async_work_group_strided_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41077 | "event_t __ovld async_work_group_strided_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41078 | "event_t __ovld async_work_group_strided_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41079 | "event_t __ovld async_work_group_strided_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41080 | "event_t __ovld async_work_group_strided_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41081 | "event_t __ovld async_work_group_strided_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41082 | "event_t __ovld async_work_group_strided_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41083 | "event_t __ovld async_work_group_strided_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41084 | "event_t __ovld async_work_group_strided_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41085 | "event_t __ovld async_work_group_strided_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41086 | "event_t __ovld async_work_group_strided_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41087 | "event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41088 | "event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41089 | "event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41090 | "event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41091 | "#ifdef cl_khr_fp64\n" |
41092 | "event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41093 | "event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41094 | "event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41095 | "event_t __ovld async_work_group_strided_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41096 | "event_t __ovld async_work_group_strided_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41097 | "event_t __ovld async_work_group_strided_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41098 | "event_t __ovld async_work_group_strided_copy(__global double *dst, const __local double *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41099 | "event_t __ovld async_work_group_strided_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41100 | "event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41101 | "event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41102 | "event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41103 | "event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41104 | "#endif //cl_khr_fp64\n" |
41105 | "#ifdef cl_khr_fp16\n" |
41106 | "event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41107 | "event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41108 | "event_t __ovld async_work_group_strided_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41109 | "event_t __ovld async_work_group_strided_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41110 | "event_t __ovld async_work_group_strided_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41111 | "event_t __ovld async_work_group_strided_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
41112 | "event_t __ovld async_work_group_strided_copy(__global half *dst, const __local half *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41113 | "event_t __ovld async_work_group_strided_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41114 | "event_t __ovld async_work_group_strided_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41115 | "event_t __ovld async_work_group_strided_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41116 | "event_t __ovld async_work_group_strided_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41117 | "event_t __ovld async_work_group_strided_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
41118 | "#endif //cl_khr_fp16\n" |
41119 | "\n" |
41120 | "/**\n" |
41121 | " * Wait for events that identify the\n" |
41122 | " * async_work_group_copy operations to\n" |
41123 | " * complete. The event objects specified in\n" |
41124 | " * event_list will be released after the wait is\n" |
41125 | " * performed.\n" |
41126 | " * This function must be encountered by all workitems\n" |
41127 | " * in a work-group executing the kernel with\n" |
41128 | " * the same num_events and event objects specified\n" |
41129 | " * in event_list; otherwise the results are undefined.\n" |
41130 | " */\n" |
41131 | "void __ovld wait_group_events(int num_events, event_t *event_list);\n" |
41132 | "\n" |
41133 | "/**\n" |
41134 | " * Prefetch num_elements * sizeof(gentype)\n" |
41135 | " * bytes into the global cache. The prefetch\n" |
41136 | " * instruction is applied to a work-item in a workgroup\n" |
41137 | " * and does not affect the functional\n" |
41138 | " * behavior of the kernel.\n" |
41139 | " */\n" |
41140 | "void __ovld prefetch(const __global char *p, size_t num_elements);\n" |
41141 | "void __ovld prefetch(const __global uchar *p, size_t num_elements);\n" |
41142 | "void __ovld prefetch(const __global short *p, size_t num_elements);\n" |
41143 | "void __ovld prefetch(const __global ushort *p, size_t num_elements);\n" |
41144 | "void __ovld prefetch(const __global int *p, size_t num_elements);\n" |
41145 | "void __ovld prefetch(const __global uint *p, size_t num_elements);\n" |
41146 | "void __ovld prefetch(const __global long *p, size_t num_elements);\n" |
41147 | "void __ovld prefetch(const __global ulong *p, size_t num_elements);\n" |
41148 | "void __ovld prefetch(const __global float *p, size_t num_elements);\n" |
41149 | "void __ovld prefetch(const __global char2 *p, size_t num_elements);\n" |
41150 | "void __ovld prefetch(const __global uchar2 *p, size_t num_elements);\n" |
41151 | "void __ovld prefetch(const __global short2 *p, size_t num_elements);\n" |
41152 | "void __ovld prefetch(const __global ushort2 *p, size_t num_elements);\n" |
41153 | "void __ovld prefetch(const __global int2 *p, size_t num_elements);\n" |
41154 | "void __ovld prefetch(const __global uint2 *p, size_t num_elements);\n" |
41155 | "void __ovld prefetch(const __global long2 *p, size_t num_elements);\n" |
41156 | "void __ovld prefetch(const __global ulong2 *p, size_t num_elements);\n" |
41157 | "void __ovld prefetch(const __global float2 *p, size_t num_elements);\n" |
41158 | "void __ovld prefetch(const __global char3 *p, size_t num_elements);\n" |
41159 | "void __ovld prefetch(const __global uchar3 *p, size_t num_elements);\n" |
41160 | "void __ovld prefetch(const __global short3 *p, size_t num_elements);\n" |
41161 | "void __ovld prefetch(const __global ushort3 *p, size_t num_elements);\n" |
41162 | "void __ovld prefetch(const __global int3 *p, size_t num_elements);\n" |
41163 | "void __ovld prefetch(const __global uint3 *p, size_t num_elements);\n" |
41164 | "void __ovld prefetch(const __global long3 *p, size_t num_elements);\n" |
41165 | "void __ovld prefetch(const __global ulong3 *p, size_t num_elements);\n" |
41166 | "void __ovld prefetch(const __global float3 *p, size_t num_elements);\n" |
41167 | "void __ovld prefetch(const __global char4 *p, size_t num_elements);\n" |
41168 | "void __ovld prefetch(const __global uchar4 *p, size_t num_elements);\n" |
41169 | "void __ovld prefetch(const __global short4 *p, size_t num_elements);\n" |
41170 | "void __ovld prefetch(const __global ushort4 *p, size_t num_elements);\n" |
41171 | "void __ovld prefetch(const __global int4 *p, size_t num_elements);\n" |
41172 | "void __ovld prefetch(const __global uint4 *p, size_t num_elements);\n" |
41173 | "void __ovld prefetch(const __global long4 *p, size_t num_elements);\n" |
41174 | "void __ovld prefetch(const __global ulong4 *p, size_t num_elements);\n" |
41175 | "void __ovld prefetch(const __global float4 *p, size_t num_elements);\n" |
41176 | "void __ovld prefetch(const __global char8 *p, size_t num_elements);\n" |
41177 | "void __ovld prefetch(const __global uchar8 *p, size_t num_elements);\n" |
41178 | "void __ovld prefetch(const __global short8 *p, size_t num_elements);\n" |
41179 | "void __ovld prefetch(const __global ushort8 *p, size_t num_elements);\n" |
41180 | "void __ovld prefetch(const __global int8 *p, size_t num_elements);\n" |
41181 | "void __ovld prefetch(const __global uint8 *p, size_t num_elements);\n" |
41182 | "void __ovld prefetch(const __global long8 *p, size_t num_elements);\n" |
41183 | "void __ovld prefetch(const __global ulong8 *p, size_t num_elements);\n" |
41184 | "void __ovld prefetch(const __global float8 *p, size_t num_elements);\n" |
41185 | "void __ovld prefetch(const __global char16 *p, size_t num_elements);\n" |
41186 | "void __ovld prefetch(const __global uchar16 *p, size_t num_elements);\n" |
41187 | "void __ovld prefetch(const __global short16 *p, size_t num_elements);\n" |
41188 | "void __ovld prefetch(const __global ushort16 *p, size_t num_elements);\n" |
41189 | "void __ovld prefetch(const __global int16 *p, size_t num_elements);\n" |
41190 | "void __ovld prefetch(const __global uint16 *p, size_t num_elements);\n" |
41191 | "void __ovld prefetch(const __global long16 *p, size_t num_elements);\n" |
41192 | "void __ovld prefetch(const __global ulong16 *p, size_t num_elements);\n" |
41193 | "void __ovld prefetch(const __global float16 *p, size_t num_elements);\n" |
41194 | "#ifdef cl_khr_fp64\n" |
41195 | "void __ovld prefetch(const __global double *p, size_t num_elements);\n" |
41196 | "void __ovld prefetch(const __global double2 *p, size_t num_elements);\n" |
41197 | "void __ovld prefetch(const __global double3 *p, size_t num_elements);\n" |
41198 | "void __ovld prefetch(const __global double4 *p, size_t num_elements);\n" |
41199 | "void __ovld prefetch(const __global double8 *p, size_t num_elements);\n" |
41200 | "void __ovld prefetch(const __global double16 *p, size_t num_elements);\n" |
41201 | "#endif //cl_khr_fp64\n" |
41202 | "#ifdef cl_khr_fp16\n" |
41203 | "void __ovld prefetch(const __global half *p, size_t num_elements);\n" |
41204 | "void __ovld prefetch(const __global half2 *p, size_t num_elements);\n" |
41205 | "void __ovld prefetch(const __global half3 *p, size_t num_elements);\n" |
41206 | "void __ovld prefetch(const __global half4 *p, size_t num_elements);\n" |
41207 | "void __ovld prefetch(const __global half8 *p, size_t num_elements);\n" |
41208 | "void __ovld prefetch(const __global half16 *p, size_t num_elements);\n" |
41209 | "#endif // cl_khr_fp16\n" |
41210 | "\n" |
41211 | "// OpenCL v1.1 s6.11.1, v1.2 s6.12.11 - Atomic Functions\n" |
41212 | "\n" |
41213 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41214 | "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" |
41215 | "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n" |
41216 | "#endif\n" |
41217 | "/**\n" |
41218 | " * Read the 32-bit value (referred to as old)\n" |
41219 | " * stored at location pointed by p. Compute\n" |
41220 | " * (old + val) and store result at location\n" |
41221 | " * pointed by p. The function returns old.\n" |
41222 | " */\n" |
41223 | "int __ovld atomic_add(volatile __global int *p, int val);\n" |
41224 | "unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val);\n" |
41225 | "int __ovld atomic_add(volatile __local int *p, int val);\n" |
41226 | "unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val);\n" |
41227 | "\n" |
41228 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
41229 | "int __ovld atom_add(volatile __global int *p, int val);\n" |
41230 | "unsigned int __ovld atom_add(volatile __global unsigned int *p, unsigned int val);\n" |
41231 | "#endif\n" |
41232 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
41233 | "int __ovld atom_add(volatile __local int *p, int val);\n" |
41234 | "unsigned int __ovld atom_add(volatile __local unsigned int *p, unsigned int val);\n" |
41235 | "#endif\n" |
41236 | "\n" |
41237 | "#if defined(cl_khr_int64_base_atomics)\n" |
41238 | "long __ovld atom_add(volatile __global long *p, long val);\n" |
41239 | "unsigned long __ovld atom_add(volatile __global unsigned long *p, unsigned long val);\n" |
41240 | "long __ovld atom_add(volatile __local long *p, long val);\n" |
41241 | "unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long val);\n" |
41242 | "#endif\n" |
41243 | "\n" |
41244 | "/**\n" |
41245 | " * Read the 32-bit value (referred to as old) stored at location pointed by p.\n" |
41246 | " * Compute (old - val) and store result at location pointed by p. The function\n" |
41247 | " * returns old.\n" |
41248 | " */\n" |
41249 | "int __ovld atomic_sub(volatile __global int *p, int val);\n" |
41250 | "unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val);\n" |
41251 | "int __ovld atomic_sub(volatile __local int *p, int val);\n" |
41252 | "unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val);\n" |
41253 | "\n" |
41254 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
41255 | "int __ovld atom_sub(volatile __global int *p, int val);\n" |
41256 | "unsigned int __ovld atom_sub(volatile __global unsigned int *p, unsigned int val);\n" |
41257 | "#endif\n" |
41258 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
41259 | "int __ovld atom_sub(volatile __local int *p, int val);\n" |
41260 | "unsigned int __ovld atom_sub(volatile __local unsigned int *p, unsigned int val);\n" |
41261 | "#endif\n" |
41262 | "\n" |
41263 | "#if defined(cl_khr_int64_base_atomics)\n" |
41264 | "long __ovld atom_sub(volatile __global long *p, long val);\n" |
41265 | "unsigned long __ovld atom_sub(volatile __global unsigned long *p, unsigned long val);\n" |
41266 | "long __ovld atom_sub(volatile __local long *p, long val);\n" |
41267 | "unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long val);\n" |
41268 | "#endif\n" |
41269 | "\n" |
41270 | "/**\n" |
41271 | " * Swaps the old value stored at location p\n" |
41272 | " * with new value given by val. Returns old\n" |
41273 | " * value.\n" |
41274 | " */\n" |
41275 | "int __ovld atomic_xchg(volatile __global int *p, int val);\n" |
41276 | "unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val);\n" |
41277 | "int __ovld atomic_xchg(volatile __local int *p, int val);\n" |
41278 | "unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val);\n" |
41279 | "float __ovld atomic_xchg(volatile __global float *p, float val);\n" |
41280 | "float __ovld atomic_xchg(volatile __local float *p, float val);\n" |
41281 | "\n" |
41282 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
41283 | "int __ovld atom_xchg(volatile __global int *p, int val);\n" |
41284 | "unsigned int __ovld atom_xchg(volatile __global unsigned int *p, unsigned int val);\n" |
41285 | "#endif\n" |
41286 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
41287 | "int __ovld atom_xchg(volatile __local int *p, int val);\n" |
41288 | "unsigned int __ovld atom_xchg(volatile __local unsigned int *p, unsigned int val);\n" |
41289 | "#endif\n" |
41290 | "\n" |
41291 | "#if defined(cl_khr_int64_base_atomics)\n" |
41292 | "long __ovld atom_xchg(volatile __global long *p, long val);\n" |
41293 | "long __ovld atom_xchg(volatile __local long *p, long val);\n" |
41294 | "unsigned long __ovld atom_xchg(volatile __global unsigned long *p, unsigned long val);\n" |
41295 | "unsigned long __ovld atom_xchg(volatile __local unsigned long *p, unsigned long val);\n" |
41296 | "#endif\n" |
41297 | "\n" |
41298 | "/**\n" |
41299 | " * Read the 32-bit value (referred to as old)\n" |
41300 | " * stored at location pointed by p. Compute\n" |
41301 | " * (old + 1) and store result at location\n" |
41302 | " * pointed by p. The function returns old.\n" |
41303 | " */\n" |
41304 | "int __ovld atomic_inc(volatile __global int *p);\n" |
41305 | "unsigned int __ovld atomic_inc(volatile __global unsigned int *p);\n" |
41306 | "int __ovld atomic_inc(volatile __local int *p);\n" |
41307 | "unsigned int __ovld atomic_inc(volatile __local unsigned int *p);\n" |
41308 | "\n" |
41309 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
41310 | "int __ovld atom_inc(volatile __global int *p);\n" |
41311 | "unsigned int __ovld atom_inc(volatile __global unsigned int *p);\n" |
41312 | "#endif\n" |
41313 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
41314 | "int __ovld atom_inc(volatile __local int *p);\n" |
41315 | "unsigned int __ovld atom_inc(volatile __local unsigned int *p);\n" |
41316 | "#endif\n" |
41317 | "\n" |
41318 | "#if defined(cl_khr_int64_base_atomics)\n" |
41319 | "long __ovld atom_inc(volatile __global long *p);\n" |
41320 | "unsigned long __ovld atom_inc(volatile __global unsigned long *p);\n" |
41321 | "long __ovld atom_inc(volatile __local long *p);\n" |
41322 | "unsigned long __ovld atom_inc(volatile __local unsigned long *p);\n" |
41323 | "#endif\n" |
41324 | "\n" |
41325 | "/**\n" |
41326 | " * Read the 32-bit value (referred to as old)\n" |
41327 | " * stored at location pointed by p. Compute\n" |
41328 | " * (old - 1) and store result at location\n" |
41329 | " * pointed by p. The function returns old.\n" |
41330 | " */\n" |
41331 | "int __ovld atomic_dec(volatile __global int *p);\n" |
41332 | "unsigned int __ovld atomic_dec(volatile __global unsigned int *p);\n" |
41333 | "int __ovld atomic_dec(volatile __local int *p);\n" |
41334 | "unsigned int __ovld atomic_dec(volatile __local unsigned int *p);\n" |
41335 | "\n" |
41336 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
41337 | "int __ovld atom_dec(volatile __global int *p);\n" |
41338 | "unsigned int __ovld atom_dec(volatile __global unsigned int *p);\n" |
41339 | "#endif\n" |
41340 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
41341 | "int __ovld atom_dec(volatile __local int *p);\n" |
41342 | "unsigned int __ovld atom_dec(volatile __local unsigned int *p);\n" |
41343 | "#endif\n" |
41344 | "\n" |
41345 | "#if defined(cl_khr_int64_base_atomics)\n" |
41346 | "long __ovld atom_dec(volatile __global long *p);\n" |
41347 | "unsigned long __ovld atom_dec(volatile __global unsigned long *p);\n" |
41348 | "long __ovld atom_dec(volatile __local long *p);\n" |
41349 | "unsigned long __ovld atom_dec(volatile __local unsigned long *p);\n" |
41350 | "#endif\n" |
41351 | "\n" |
41352 | "/**\n" |
41353 | " * Read the 32-bit value (referred to as old)\n" |
41354 | " * stored at location pointed by p. Compute\n" |
41355 | " * (old == cmp) ? val : old and store result at\n" |
41356 | " * location pointed by p. The function\n" |
41357 | " * returns old.\n" |
41358 | " */\n" |
41359 | "int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val);\n" |
41360 | "unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n" |
41361 | "int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val);\n" |
41362 | "unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n" |
41363 | "\n" |
41364 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
41365 | "int __ovld atom_cmpxchg(volatile __global int *p, int cmp, int val);\n" |
41366 | "unsigned int __ovld atom_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n" |
41367 | "#endif\n" |
41368 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
41369 | "int __ovld atom_cmpxchg(volatile __local int *p, int cmp, int val);\n" |
41370 | "unsigned int __ovld atom_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n" |
41371 | "#endif\n" |
41372 | "\n" |
41373 | "#if defined(cl_khr_int64_base_atomics)\n" |
41374 | "long __ovld atom_cmpxchg(volatile __global long *p, long cmp, long val);\n" |
41375 | "unsigned long __ovld atom_cmpxchg(volatile __global unsigned long *p, unsigned long cmp, unsigned long val);\n" |
41376 | "long __ovld atom_cmpxchg(volatile __local long *p, long cmp, long val);\n" |
41377 | "unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned long cmp, unsigned long val);\n" |
41378 | "#endif\n" |
41379 | "\n" |
41380 | "/**\n" |
41381 | " * Read the 32-bit value (referred to as old)\n" |
41382 | " * stored at location pointed by p. Compute\n" |
41383 | " * min(old, val) and store minimum value at\n" |
41384 | " * location pointed by p. The function\n" |
41385 | " * returns old.\n" |
41386 | " */\n" |
41387 | "int __ovld atomic_min(volatile __global int *p, int val);\n" |
41388 | "unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val);\n" |
41389 | "int __ovld atomic_min(volatile __local int *p, int val);\n" |
41390 | "unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val);\n" |
41391 | "\n" |
41392 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
41393 | "int __ovld atom_min(volatile __global int *p, int val);\n" |
41394 | "unsigned int __ovld atom_min(volatile __global unsigned int *p, unsigned int val);\n" |
41395 | "#endif\n" |
41396 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
41397 | "int __ovld atom_min(volatile __local int *p, int val);\n" |
41398 | "unsigned int __ovld atom_min(volatile __local unsigned int *p, unsigned int val);\n" |
41399 | "#endif\n" |
41400 | "\n" |
41401 | "#if defined(cl_khr_int64_extended_atomics)\n" |
41402 | "long __ovld atom_min(volatile __global long *p, long val);\n" |
41403 | "unsigned long __ovld atom_min(volatile __global unsigned long *p, unsigned long val);\n" |
41404 | "long __ovld atom_min(volatile __local long *p, long val);\n" |
41405 | "unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long val);\n" |
41406 | "#endif\n" |
41407 | "\n" |
41408 | "/**\n" |
41409 | " * Read the 32-bit value (referred to as old)\n" |
41410 | " * stored at location pointed by p. Compute\n" |
41411 | " * max(old, val) and store maximum value at\n" |
41412 | " * location pointed by p. The function\n" |
41413 | " * returns old.\n" |
41414 | " */\n" |
41415 | "int __ovld atomic_max(volatile __global int *p, int val);\n" |
41416 | "unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val);\n" |
41417 | "int __ovld atomic_max(volatile __local int *p, int val);\n" |
41418 | "unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val);\n" |
41419 | "\n" |
41420 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
41421 | "int __ovld atom_max(volatile __global int *p, int val);\n" |
41422 | "unsigned int __ovld atom_max(volatile __global unsigned int *p, unsigned int val);\n" |
41423 | "#endif\n" |
41424 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
41425 | "int __ovld atom_max(volatile __local int *p, int val);\n" |
41426 | "unsigned int __ovld atom_max(volatile __local unsigned int *p, unsigned int val);\n" |
41427 | "#endif\n" |
41428 | "\n" |
41429 | "#if defined(cl_khr_int64_extended_atomics)\n" |
41430 | "long __ovld atom_max(volatile __global long *p, long val);\n" |
41431 | "unsigned long __ovld atom_max(volatile __global unsigned long *p, unsigned long val);\n" |
41432 | "long __ovld atom_max(volatile __local long *p, long val);\n" |
41433 | "unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long val);\n" |
41434 | "#endif\n" |
41435 | "\n" |
41436 | "/**\n" |
41437 | " * Read the 32-bit value (referred to as old)\n" |
41438 | " * stored at location pointed by p. Compute\n" |
41439 | " * (old & val) and store result at location\n" |
41440 | " * pointed by p. The function returns old.\n" |
41441 | " */\n" |
41442 | "int __ovld atomic_and(volatile __global int *p, int val);\n" |
41443 | "unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val);\n" |
41444 | "int __ovld atomic_and(volatile __local int *p, int val);\n" |
41445 | "unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val);\n" |
41446 | "\n" |
41447 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
41448 | "int __ovld atom_and(volatile __global int *p, int val);\n" |
41449 | "unsigned int __ovld atom_and(volatile __global unsigned int *p, unsigned int val);\n" |
41450 | "#endif\n" |
41451 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
41452 | "int __ovld atom_and(volatile __local int *p, int val);\n" |
41453 | "unsigned int __ovld atom_and(volatile __local unsigned int *p, unsigned int val);\n" |
41454 | "#endif\n" |
41455 | "\n" |
41456 | "#if defined(cl_khr_int64_extended_atomics)\n" |
41457 | "long __ovld atom_and(volatile __global long *p, long val);\n" |
41458 | "unsigned long __ovld atom_and(volatile __global unsigned long *p, unsigned long val);\n" |
41459 | "long __ovld atom_and(volatile __local long *p, long val);\n" |
41460 | "unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long val);\n" |
41461 | "#endif\n" |
41462 | "\n" |
41463 | "/**\n" |
41464 | " * Read the 32-bit value (referred to as old)\n" |
41465 | " * stored at location pointed by p. Compute\n" |
41466 | " * (old | val) and store result at location\n" |
41467 | " * pointed by p. The function returns old.\n" |
41468 | " */\n" |
41469 | "int __ovld atomic_or(volatile __global int *p, int val);\n" |
41470 | "unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val);\n" |
41471 | "int __ovld atomic_or(volatile __local int *p, int val);\n" |
41472 | "unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val);\n" |
41473 | "\n" |
41474 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
41475 | "int __ovld atom_or(volatile __global int *p, int val);\n" |
41476 | "unsigned int __ovld atom_or(volatile __global unsigned int *p, unsigned int val);\n" |
41477 | "#endif\n" |
41478 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
41479 | "int __ovld atom_or(volatile __local int *p, int val);\n" |
41480 | "unsigned int __ovld atom_or(volatile __local unsigned int *p, unsigned int val);\n" |
41481 | "#endif\n" |
41482 | "\n" |
41483 | "#if defined(cl_khr_int64_extended_atomics)\n" |
41484 | "long __ovld atom_or(volatile __global long *p, long val);\n" |
41485 | "unsigned long __ovld atom_or(volatile __global unsigned long *p, unsigned long val);\n" |
41486 | "long __ovld atom_or(volatile __local long *p, long val);\n" |
41487 | "unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long val);\n" |
41488 | "#endif\n" |
41489 | "\n" |
41490 | "/**\n" |
41491 | " * Read the 32-bit value (referred to as old)\n" |
41492 | " * stored at location pointed by p. Compute\n" |
41493 | " * (old ^ val) and store result at location\n" |
41494 | " * pointed by p. The function returns old.\n" |
41495 | " */\n" |
41496 | "int __ovld atomic_xor(volatile __global int *p, int val);\n" |
41497 | "unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val);\n" |
41498 | "int __ovld atomic_xor(volatile __local int *p, int val);\n" |
41499 | "unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val);\n" |
41500 | "\n" |
41501 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
41502 | "int __ovld atom_xor(volatile __global int *p, int val);\n" |
41503 | "unsigned int __ovld atom_xor(volatile __global unsigned int *p, unsigned int val);\n" |
41504 | "#endif\n" |
41505 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
41506 | "int __ovld atom_xor(volatile __local int *p, int val);\n" |
41507 | "unsigned int __ovld atom_xor(volatile __local unsigned int *p, unsigned int val);\n" |
41508 | "#endif\n" |
41509 | "\n" |
41510 | "#if defined(cl_khr_int64_extended_atomics)\n" |
41511 | "long __ovld atom_xor(volatile __global long *p, long val);\n" |
41512 | "unsigned long __ovld atom_xor(volatile __global unsigned long *p, unsigned long val);\n" |
41513 | "long __ovld atom_xor(volatile __local long *p, long val);\n" |
41514 | "unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long val);\n" |
41515 | "#endif\n" |
41516 | "\n" |
41517 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41518 | "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable\n" |
41519 | "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : disable\n" |
41520 | "#endif\n" |
41521 | "\n" |
41522 | "// OpenCL v2.0 s6.13.11 - Atomics Functions\n" |
41523 | "\n" |
41524 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
41525 | "#ifndef ATOMIC_VAR_INIT\n" |
41526 | "#define ATOMIC_VAR_INIT(x) (x)\n" |
41527 | "#endif //ATOMIC_VAR_INIT\n" |
41528 | "#define ATOMIC_FLAG_INIT 0\n" |
41529 | "\n" |
41530 | "// enum values aligned with what clang uses in EmitAtomicExpr()\n" |
41531 | "typedef enum memory_order\n" |
41532 | "{\n" |
41533 | " memory_order_relaxed = __ATOMIC_RELAXED,\n" |
41534 | " memory_order_acquire = __ATOMIC_ACQUIRE,\n" |
41535 | " memory_order_release = __ATOMIC_RELEASE,\n" |
41536 | " memory_order_acq_rel = __ATOMIC_ACQ_REL,\n" |
41537 | " memory_order_seq_cst = __ATOMIC_SEQ_CST\n" |
41538 | "} memory_order;\n" |
41539 | "\n" |
41540 | "// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics\n" |
41541 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41542 | "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" |
41543 | "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n" |
41544 | "#endif\n" |
41545 | "\n" |
41546 | "// atomic_init()\n" |
41547 | "void __ovld atomic_init(volatile atomic_int *object, int value);\n" |
41548 | "void __ovld atomic_init(volatile atomic_uint *object, uint value);\n" |
41549 | "void __ovld atomic_init(volatile atomic_float *object, float value);\n" |
41550 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41551 | "void __ovld atomic_init(volatile atomic_long *object, long value);\n" |
41552 | "void __ovld atomic_init(volatile atomic_ulong *object, ulong value);\n" |
41553 | "#ifdef cl_khr_fp64\n" |
41554 | "void __ovld atomic_init(volatile atomic_double *object, double value);\n" |
41555 | "#endif //cl_khr_fp64\n" |
41556 | "#endif\n" |
41557 | "\n" |
41558 | "// atomic_work_item_fence()\n" |
41559 | "void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope);\n" |
41560 | "\n" |
41561 | "// atomic_fetch()\n" |
41562 | "\n" |
41563 | "int __ovld atomic_fetch_add(volatile atomic_int *object, int operand);\n" |
41564 | "int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
41565 | "int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
41566 | "uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand);\n" |
41567 | "uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
41568 | "uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
41569 | "int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand);\n" |
41570 | "int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
41571 | "int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
41572 | "uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand);\n" |
41573 | "uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
41574 | "uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
41575 | "int __ovld atomic_fetch_or(volatile atomic_int *object, int operand);\n" |
41576 | "int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
41577 | "int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
41578 | "uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand);\n" |
41579 | "uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
41580 | "uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
41581 | "int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand);\n" |
41582 | "int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
41583 | "int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
41584 | "uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand);\n" |
41585 | "uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
41586 | "uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
41587 | "int __ovld atomic_fetch_and(volatile atomic_int *object, int operand);\n" |
41588 | "int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
41589 | "int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
41590 | "uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand);\n" |
41591 | "uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
41592 | "uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
41593 | "int __ovld atomic_fetch_min(volatile atomic_int *object, int operand);\n" |
41594 | "int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
41595 | "int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
41596 | "uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand);\n" |
41597 | "uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
41598 | "uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
41599 | "uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand);\n" |
41600 | "uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order);\n" |
41601 | "uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n" |
41602 | "int __ovld atomic_fetch_max(volatile atomic_int *object, int operand);\n" |
41603 | "int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
41604 | "int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
41605 | "uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand);\n" |
41606 | "uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
41607 | "uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
41608 | "uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand);\n" |
41609 | "uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order);\n" |
41610 | "uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n" |
41611 | "\n" |
41612 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41613 | "long __ovld atomic_fetch_add(volatile atomic_long *object, long operand);\n" |
41614 | "long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
41615 | "long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
41616 | "ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand);\n" |
41617 | "ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
41618 | "ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
41619 | "long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand);\n" |
41620 | "long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
41621 | "long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
41622 | "ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand);\n" |
41623 | "ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
41624 | "ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
41625 | "long __ovld atomic_fetch_or(volatile atomic_long *object, long operand);\n" |
41626 | "long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
41627 | "long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
41628 | "ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand);\n" |
41629 | "ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
41630 | "ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
41631 | "long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand);\n" |
41632 | "long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
41633 | "long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
41634 | "ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand);\n" |
41635 | "ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
41636 | "ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
41637 | "long __ovld atomic_fetch_and(volatile atomic_long *object, long operand);\n" |
41638 | "long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
41639 | "long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
41640 | "ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand);\n" |
41641 | "ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
41642 | "ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
41643 | "long __ovld atomic_fetch_min(volatile atomic_long *object, long operand);\n" |
41644 | "long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
41645 | "long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
41646 | "ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand);\n" |
41647 | "ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
41648 | "ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
41649 | "ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand);\n" |
41650 | "ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n" |
41651 | "ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n" |
41652 | "long __ovld atomic_fetch_max(volatile atomic_long *object, long operand);\n" |
41653 | "long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
41654 | "long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
41655 | "ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand);\n" |
41656 | "ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
41657 | "ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
41658 | "ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand);\n" |
41659 | "ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n" |
41660 | "ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n" |
41661 | "#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41662 | "\n" |
41663 | "// OpenCL v2.0 s6.13.11.7.5:\n" |
41664 | "// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.\n" |
41665 | "// or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.\n" |
41666 | "\n" |
41667 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41668 | "uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n" |
41669 | "uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n" |
41670 | "uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n" |
41671 | "uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n" |
41672 | "uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n" |
41673 | "uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n" |
41674 | "\n" |
41675 | "uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand);\n" |
41676 | "uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n" |
41677 | "uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n" |
41678 | "uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand);\n" |
41679 | "uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n" |
41680 | "uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n" |
41681 | "uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand);\n" |
41682 | "uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n" |
41683 | "uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n" |
41684 | "uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax);\n" |
41685 | "uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n" |
41686 | "uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n" |
41687 | "uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax);\n" |
41688 | "uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n" |
41689 | "uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n" |
41690 | "\n" |
41691 | "intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand);\n" |
41692 | "intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n" |
41693 | "intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n" |
41694 | "intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand);\n" |
41695 | "intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n" |
41696 | "intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n" |
41697 | "intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand);\n" |
41698 | "intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n" |
41699 | "intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n" |
41700 | "intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax);\n" |
41701 | "intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n" |
41702 | "intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n" |
41703 | "intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax);\n" |
41704 | "intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n" |
41705 | "intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n" |
41706 | "#endif\n" |
41707 | "\n" |
41708 | "// atomic_store()\n" |
41709 | "\n" |
41710 | "void __ovld atomic_store(volatile atomic_int *object, int desired);\n" |
41711 | "void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order);\n" |
41712 | "void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n" |
41713 | "void __ovld atomic_store(volatile atomic_uint *object, uint desired);\n" |
41714 | "void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n" |
41715 | "void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n" |
41716 | "void __ovld atomic_store(volatile atomic_float *object, float desired);\n" |
41717 | "void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order);\n" |
41718 | "void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n" |
41719 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41720 | "#ifdef cl_khr_fp64\n" |
41721 | "void __ovld atomic_store(volatile atomic_double *object, double desired);\n" |
41722 | "void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order);\n" |
41723 | "void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n" |
41724 | "#endif //cl_khr_fp64\n" |
41725 | "void __ovld atomic_store(volatile atomic_long *object, long desired);\n" |
41726 | "void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order);\n" |
41727 | "void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n" |
41728 | "void __ovld atomic_store(volatile atomic_ulong *object, ulong desired);\n" |
41729 | "void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n" |
41730 | "void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n" |
41731 | "#endif\n" |
41732 | "\n" |
41733 | "// atomic_load()\n" |
41734 | "\n" |
41735 | "int __ovld atomic_load(volatile atomic_int *object);\n" |
41736 | "int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order);\n" |
41737 | "int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope);\n" |
41738 | "uint __ovld atomic_load(volatile atomic_uint *object);\n" |
41739 | "uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order);\n" |
41740 | "uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope);\n" |
41741 | "float __ovld atomic_load(volatile atomic_float *object);\n" |
41742 | "float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order);\n" |
41743 | "float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope);\n" |
41744 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41745 | "#ifdef cl_khr_fp64\n" |
41746 | "double __ovld atomic_load(volatile atomic_double *object);\n" |
41747 | "double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order);\n" |
41748 | "double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope);\n" |
41749 | "#endif //cl_khr_fp64\n" |
41750 | "long __ovld atomic_load(volatile atomic_long *object);\n" |
41751 | "long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order);\n" |
41752 | "long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope);\n" |
41753 | "ulong __ovld atomic_load(volatile atomic_ulong *object);\n" |
41754 | "ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order);\n" |
41755 | "ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope);\n" |
41756 | "#endif\n" |
41757 | "\n" |
41758 | "// atomic_exchange()\n" |
41759 | "\n" |
41760 | "int __ovld atomic_exchange(volatile atomic_int *object, int desired);\n" |
41761 | "int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order);\n" |
41762 | "int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n" |
41763 | "uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired);\n" |
41764 | "uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n" |
41765 | "uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n" |
41766 | "float __ovld atomic_exchange(volatile atomic_float *object, float desired);\n" |
41767 | "float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order);\n" |
41768 | "float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n" |
41769 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41770 | "#ifdef cl_khr_fp64\n" |
41771 | "double __ovld atomic_exchange(volatile atomic_double *object, double desired);\n" |
41772 | "double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order);\n" |
41773 | "double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n" |
41774 | "#endif //cl_khr_fp64\n" |
41775 | "long __ovld atomic_exchange(volatile atomic_long *object, long desired);\n" |
41776 | "long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order);\n" |
41777 | "long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n" |
41778 | "ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired);\n" |
41779 | "ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n" |
41780 | "ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n" |
41781 | "#endif\n" |
41782 | "\n" |
41783 | "// atomic_compare_exchange_strong() and atomic_compare_exchange_weak()\n" |
41784 | "\n" |
41785 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired);\n" |
41786 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n" |
41787 | " int desired, memory_order success, memory_order failure);\n" |
41788 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n" |
41789 | " int desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41790 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired);\n" |
41791 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n" |
41792 | " uint desired, memory_order success, memory_order failure);\n" |
41793 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n" |
41794 | " uint desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41795 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired);\n" |
41796 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n" |
41797 | " int desired, memory_order success, memory_order failure);\n" |
41798 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n" |
41799 | " int desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41800 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired);\n" |
41801 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n" |
41802 | " uint desired, memory_order success, memory_order failure);\n" |
41803 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n" |
41804 | " uint desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41805 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired);\n" |
41806 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n" |
41807 | " float desired, memory_order success, memory_order failure);\n" |
41808 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n" |
41809 | " float desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41810 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired);\n" |
41811 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n" |
41812 | " float desired, memory_order success, memory_order failure);\n" |
41813 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n" |
41814 | " float desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41815 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
41816 | "#ifdef cl_khr_fp64\n" |
41817 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired);\n" |
41818 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n" |
41819 | " double desired, memory_order success, memory_order failure);\n" |
41820 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n" |
41821 | " double desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41822 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired);\n" |
41823 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n" |
41824 | " double desired, memory_order success, memory_order failure);\n" |
41825 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n" |
41826 | " double desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41827 | "#endif //cl_khr_fp64\n" |
41828 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired);\n" |
41829 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n" |
41830 | " long desired, memory_order success, memory_order failure);\n" |
41831 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n" |
41832 | " long desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41833 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired);\n" |
41834 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n" |
41835 | " long desired, memory_order success, memory_order failure);\n" |
41836 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n" |
41837 | " long desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41838 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired);\n" |
41839 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n" |
41840 | " ulong desired, memory_order success, memory_order failure);\n" |
41841 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n" |
41842 | " ulong desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41843 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired);\n" |
41844 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n" |
41845 | " ulong desired, memory_order success, memory_order failure);\n" |
41846 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n" |
41847 | " ulong desired, memory_order success, memory_order failure, memory_scope scope);\n" |
41848 | "#endif\n" |
41849 | "\n" |
41850 | "// atomic_flag_test_and_set() and atomic_flag_clear()\n" |
41851 | "\n" |
41852 | "bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object);\n" |
41853 | "bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order);\n" |
41854 | "bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n" |
41855 | "void __ovld atomic_flag_clear(volatile atomic_flag *object);\n" |
41856 | "void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order);\n" |
41857 | "void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n" |
41858 | "\n" |
41859 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
41860 | "\n" |
41861 | "// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions\n" |
41862 | "\n" |
41863 | "/**\n" |
41864 | " * The shuffle and shuffle2 built-in functions construct\n" |
41865 | " * a permutation of elements from one or two input\n" |
41866 | " * vectors respectively that are of the same type,\n" |
41867 | " * returning a vector with the same element type as the\n" |
41868 | " * input and length that is the same as the shuffle mask.\n" |
41869 | " * The size of each element in the mask must match the\n" |
41870 | " * size of each element in the result. For shuffle, only\n" |
41871 | " * the ilogb(2m-1) least significant bits of each mask\n" |
41872 | " * element are considered. For shuffle2, only the\n" |
41873 | " * ilogb(2m-1)+1 least significant bits of each mask\n" |
41874 | " * element are considered. Other bits in the mask shall\n" |
41875 | " * be ignored.\n" |
41876 | " * The elements of the input vectors are numbered from\n" |
41877 | " * left to right across one or both of the vectors. For this\n" |
41878 | " * purpose, the number of elements in a vector is given\n" |
41879 | " * by vec_step(gentypem). The shuffle mask operand\n" |
41880 | " * specifies, for each element of the result vector, which\n" |
41881 | " * element of the one or two input vectors the result\n" |
41882 | " * element gets.\n" |
41883 | " * Examples:\n" |
41884 | " * uint4 mask = (uint4)(3, 2,\n" |
41885 | " * 1, 0);\n" |
41886 | " * float4 a;\n" |
41887 | " * float4 r = shuffle(a, mask);\n" |
41888 | " * // r.s0123 = a.wzyx\n" |
41889 | " * uint8 mask = (uint8)(0, 1, 2, 3,\n" |
41890 | " * 4, 5, 6, 7);\n" |
41891 | " * float4 a, b;\n" |
41892 | " * float8 r = shuffle2(a, b, mask);\n" |
41893 | " * // r.s0123 = a.xyzw\n" |
41894 | " * // r.s4567 = b.xyzw\n" |
41895 | " * uint4 mask;\n" |
41896 | " * float8 a;\n" |
41897 | " * float4 b;\n" |
41898 | " * b = shuffle(a, mask);\n" |
41899 | " * Examples that are not valid are:\n" |
41900 | " * uint8 mask;\n" |
41901 | " * short16 a;\n" |
41902 | " * short8 b;\n" |
41903 | " * b = shuffle(a, mask); <- not valid\n" |
41904 | " */\n" |
41905 | "char2 __ovld __cnfn shuffle(char2 x, uchar2 mask);\n" |
41906 | "char2 __ovld __cnfn shuffle(char4 x, uchar2 mask);\n" |
41907 | "char2 __ovld __cnfn shuffle(char8 x, uchar2 mask);\n" |
41908 | "char2 __ovld __cnfn shuffle(char16 x, uchar2 mask);\n" |
41909 | "\n" |
41910 | "uchar2 __ovld __cnfn shuffle(uchar2 x, uchar2 mask);\n" |
41911 | "uchar2 __ovld __cnfn shuffle(uchar4 x, uchar2 mask);\n" |
41912 | "uchar2 __ovld __cnfn shuffle(uchar8 x, uchar2 mask);\n" |
41913 | "uchar2 __ovld __cnfn shuffle(uchar16 x, uchar2 mask);\n" |
41914 | "\n" |
41915 | "short2 __ovld __cnfn shuffle(short2 x, ushort2 mask);\n" |
41916 | "short2 __ovld __cnfn shuffle(short4 x, ushort2 mask);\n" |
41917 | "short2 __ovld __cnfn shuffle(short8 x, ushort2 mask);\n" |
41918 | "short2 __ovld __cnfn shuffle(short16 x, ushort2 mask);\n" |
41919 | "\n" |
41920 | "ushort2 __ovld __cnfn shuffle(ushort2 x, ushort2 mask);\n" |
41921 | "ushort2 __ovld __cnfn shuffle(ushort4 x, ushort2 mask);\n" |
41922 | "ushort2 __ovld __cnfn shuffle(ushort8 x, ushort2 mask);\n" |
41923 | "ushort2 __ovld __cnfn shuffle(ushort16 x, ushort2 mask);\n" |
41924 | "\n" |
41925 | "int2 __ovld __cnfn shuffle(int2 x, uint2 mask);\n" |
41926 | "int2 __ovld __cnfn shuffle(int4 x, uint2 mask);\n" |
41927 | "int2 __ovld __cnfn shuffle(int8 x, uint2 mask);\n" |
41928 | "int2 __ovld __cnfn shuffle(int16 x, uint2 mask);\n" |
41929 | "\n" |
41930 | "uint2 __ovld __cnfn shuffle(uint2 x, uint2 mask);\n" |
41931 | "uint2 __ovld __cnfn shuffle(uint4 x, uint2 mask);\n" |
41932 | "uint2 __ovld __cnfn shuffle(uint8 x, uint2 mask);\n" |
41933 | "uint2 __ovld __cnfn shuffle(uint16 x, uint2 mask);\n" |
41934 | "\n" |
41935 | "long2 __ovld __cnfn shuffle(long2 x, ulong2 mask);\n" |
41936 | "long2 __ovld __cnfn shuffle(long4 x, ulong2 mask);\n" |
41937 | "long2 __ovld __cnfn shuffle(long8 x, ulong2 mask);\n" |
41938 | "long2 __ovld __cnfn shuffle(long16 x, ulong2 mask);\n" |
41939 | "\n" |
41940 | "ulong2 __ovld __cnfn shuffle(ulong2 x, ulong2 mask);\n" |
41941 | "ulong2 __ovld __cnfn shuffle(ulong4 x, ulong2 mask);\n" |
41942 | "ulong2 __ovld __cnfn shuffle(ulong8 x, ulong2 mask);\n" |
41943 | "ulong2 __ovld __cnfn shuffle(ulong16 x, ulong2 mask);\n" |
41944 | "\n" |
41945 | "float2 __ovld __cnfn shuffle(float2 x, uint2 mask);\n" |
41946 | "float2 __ovld __cnfn shuffle(float4 x, uint2 mask);\n" |
41947 | "float2 __ovld __cnfn shuffle(float8 x, uint2 mask);\n" |
41948 | "float2 __ovld __cnfn shuffle(float16 x, uint2 mask);\n" |
41949 | "\n" |
41950 | "char4 __ovld __cnfn shuffle(char2 x, uchar4 mask);\n" |
41951 | "char4 __ovld __cnfn shuffle(char4 x, uchar4 mask);\n" |
41952 | "char4 __ovld __cnfn shuffle(char8 x, uchar4 mask);\n" |
41953 | "char4 __ovld __cnfn shuffle(char16 x, uchar4 mask);\n" |
41954 | "\n" |
41955 | "uchar4 __ovld __cnfn shuffle(uchar2 x, uchar4 mask);\n" |
41956 | "uchar4 __ovld __cnfn shuffle(uchar4 x, uchar4 mask);\n" |
41957 | "uchar4 __ovld __cnfn shuffle(uchar8 x, uchar4 mask);\n" |
41958 | "uchar4 __ovld __cnfn shuffle(uchar16 x, uchar4 mask);\n" |
41959 | "\n" |
41960 | "short4 __ovld __cnfn shuffle(short2 x, ushort4 mask);\n" |
41961 | "short4 __ovld __cnfn shuffle(short4 x, ushort4 mask);\n" |
41962 | "short4 __ovld __cnfn shuffle(short8 x, ushort4 mask);\n" |
41963 | "short4 __ovld __cnfn shuffle(short16 x, ushort4 mask);\n" |
41964 | "\n" |
41965 | "ushort4 __ovld __cnfn shuffle(ushort2 x, ushort4 mask);\n" |
41966 | "ushort4 __ovld __cnfn shuffle(ushort4 x, ushort4 mask);\n" |
41967 | "ushort4 __ovld __cnfn shuffle(ushort8 x, ushort4 mask);\n" |
41968 | "ushort4 __ovld __cnfn shuffle(ushort16 x, ushort4 mask);\n" |
41969 | "\n" |
41970 | "int4 __ovld __cnfn shuffle(int2 x, uint4 mask);\n" |
41971 | "int4 __ovld __cnfn shuffle(int4 x, uint4 mask);\n" |
41972 | "int4 __ovld __cnfn shuffle(int8 x, uint4 mask);\n" |
41973 | "int4 __ovld __cnfn shuffle(int16 x, uint4 mask);\n" |
41974 | "\n" |
41975 | "uint4 __ovld __cnfn shuffle(uint2 x, uint4 mask);\n" |
41976 | "uint4 __ovld __cnfn shuffle(uint4 x, uint4 mask);\n" |
41977 | "uint4 __ovld __cnfn shuffle(uint8 x, uint4 mask);\n" |
41978 | "uint4 __ovld __cnfn shuffle(uint16 x, uint4 mask);\n" |
41979 | "\n" |
41980 | "long4 __ovld __cnfn shuffle(long2 x, ulong4 mask);\n" |
41981 | "long4 __ovld __cnfn shuffle(long4 x, ulong4 mask);\n" |
41982 | "long4 __ovld __cnfn shuffle(long8 x, ulong4 mask);\n" |
41983 | "long4 __ovld __cnfn shuffle(long16 x, ulong4 mask);\n" |
41984 | "\n" |
41985 | "ulong4 __ovld __cnfn shuffle(ulong2 x, ulong4 mask);\n" |
41986 | "ulong4 __ovld __cnfn shuffle(ulong4 x, ulong4 mask);\n" |
41987 | "ulong4 __ovld __cnfn shuffle(ulong8 x, ulong4 mask);\n" |
41988 | "ulong4 __ovld __cnfn shuffle(ulong16 x, ulong4 mask);\n" |
41989 | "\n" |
41990 | "float4 __ovld __cnfn shuffle(float2 x, uint4 mask);\n" |
41991 | "float4 __ovld __cnfn shuffle(float4 x, uint4 mask);\n" |
41992 | "float4 __ovld __cnfn shuffle(float8 x, uint4 mask);\n" |
41993 | "float4 __ovld __cnfn shuffle(float16 x, uint4 mask);\n" |
41994 | "\n" |
41995 | "char8 __ovld __cnfn shuffle(char2 x, uchar8 mask);\n" |
41996 | "char8 __ovld __cnfn shuffle(char4 x, uchar8 mask);\n" |
41997 | "char8 __ovld __cnfn shuffle(char8 x, uchar8 mask);\n" |
41998 | "char8 __ovld __cnfn shuffle(char16 x, uchar8 mask);\n" |
41999 | "\n" |
42000 | "uchar8 __ovld __cnfn shuffle(uchar2 x, uchar8 mask);\n" |
42001 | "uchar8 __ovld __cnfn shuffle(uchar4 x, uchar8 mask);\n" |
42002 | "uchar8 __ovld __cnfn shuffle(uchar8 x, uchar8 mask);\n" |
42003 | "uchar8 __ovld __cnfn shuffle(uchar16 x, uchar8 mask);\n" |
42004 | "\n" |
42005 | "short8 __ovld __cnfn shuffle(short2 x, ushort8 mask);\n" |
42006 | "short8 __ovld __cnfn shuffle(short4 x, ushort8 mask);\n" |
42007 | "short8 __ovld __cnfn shuffle(short8 x, ushort8 mask);\n" |
42008 | "short8 __ovld __cnfn shuffle(short16 x, ushort8 mask);\n" |
42009 | "\n" |
42010 | "ushort8 __ovld __cnfn shuffle(ushort2 x, ushort8 mask);\n" |
42011 | "ushort8 __ovld __cnfn shuffle(ushort4 x, ushort8 mask);\n" |
42012 | "ushort8 __ovld __cnfn shuffle(ushort8 x, ushort8 mask);\n" |
42013 | "ushort8 __ovld __cnfn shuffle(ushort16 x, ushort8 mask);\n" |
42014 | "\n" |
42015 | "int8 __ovld __cnfn shuffle(int2 x, uint8 mask);\n" |
42016 | "int8 __ovld __cnfn shuffle(int4 x, uint8 mask);\n" |
42017 | "int8 __ovld __cnfn shuffle(int8 x, uint8 mask);\n" |
42018 | "int8 __ovld __cnfn shuffle(int16 x, uint8 mask);\n" |
42019 | "\n" |
42020 | "uint8 __ovld __cnfn shuffle(uint2 x, uint8 mask);\n" |
42021 | "uint8 __ovld __cnfn shuffle(uint4 x, uint8 mask);\n" |
42022 | "uint8 __ovld __cnfn shuffle(uint8 x, uint8 mask);\n" |
42023 | "uint8 __ovld __cnfn shuffle(uint16 x, uint8 mask);\n" |
42024 | "\n" |
42025 | "long8 __ovld __cnfn shuffle(long2 x, ulong8 mask);\n" |
42026 | "long8 __ovld __cnfn shuffle(long4 x, ulong8 mask);\n" |
42027 | "long8 __ovld __cnfn shuffle(long8 x, ulong8 mask);\n" |
42028 | "long8 __ovld __cnfn shuffle(long16 x, ulong8 mask);\n" |
42029 | "\n" |
42030 | "ulong8 __ovld __cnfn shuffle(ulong2 x, ulong8 mask);\n" |
42031 | "ulong8 __ovld __cnfn shuffle(ulong4 x, ulong8 mask);\n" |
42032 | "ulong8 __ovld __cnfn shuffle(ulong8 x, ulong8 mask);\n" |
42033 | "ulong8 __ovld __cnfn shuffle(ulong16 x, ulong8 mask);\n" |
42034 | "\n" |
42035 | "float8 __ovld __cnfn shuffle(float2 x, uint8 mask);\n" |
42036 | "float8 __ovld __cnfn shuffle(float4 x, uint8 mask);\n" |
42037 | "float8 __ovld __cnfn shuffle(float8 x, uint8 mask);\n" |
42038 | "float8 __ovld __cnfn shuffle(float16 x, uint8 mask);\n" |
42039 | "\n" |
42040 | "char16 __ovld __cnfn shuffle(char2 x, uchar16 mask);\n" |
42041 | "char16 __ovld __cnfn shuffle(char4 x, uchar16 mask);\n" |
42042 | "char16 __ovld __cnfn shuffle(char8 x, uchar16 mask);\n" |
42043 | "char16 __ovld __cnfn shuffle(char16 x, uchar16 mask);\n" |
42044 | "\n" |
42045 | "uchar16 __ovld __cnfn shuffle(uchar2 x, uchar16 mask);\n" |
42046 | "uchar16 __ovld __cnfn shuffle(uchar4 x, uchar16 mask);\n" |
42047 | "uchar16 __ovld __cnfn shuffle(uchar8 x, uchar16 mask);\n" |
42048 | "uchar16 __ovld __cnfn shuffle(uchar16 x, uchar16 mask);\n" |
42049 | "\n" |
42050 | "short16 __ovld __cnfn shuffle(short2 x, ushort16 mask);\n" |
42051 | "short16 __ovld __cnfn shuffle(short4 x, ushort16 mask);\n" |
42052 | "short16 __ovld __cnfn shuffle(short8 x, ushort16 mask);\n" |
42053 | "short16 __ovld __cnfn shuffle(short16 x, ushort16 mask);\n" |
42054 | "\n" |
42055 | "ushort16 __ovld __cnfn shuffle(ushort2 x, ushort16 mask);\n" |
42056 | "ushort16 __ovld __cnfn shuffle(ushort4 x, ushort16 mask);\n" |
42057 | "ushort16 __ovld __cnfn shuffle(ushort8 x, ushort16 mask);\n" |
42058 | "ushort16 __ovld __cnfn shuffle(ushort16 x, ushort16 mask);\n" |
42059 | "\n" |
42060 | "int16 __ovld __cnfn shuffle(int2 x, uint16 mask);\n" |
42061 | "int16 __ovld __cnfn shuffle(int4 x, uint16 mask);\n" |
42062 | "int16 __ovld __cnfn shuffle(int8 x, uint16 mask);\n" |
42063 | "int16 __ovld __cnfn shuffle(int16 x, uint16 mask);\n" |
42064 | "\n" |
42065 | "uint16 __ovld __cnfn shuffle(uint2 x, uint16 mask);\n" |
42066 | "uint16 __ovld __cnfn shuffle(uint4 x, uint16 mask);\n" |
42067 | "uint16 __ovld __cnfn shuffle(uint8 x, uint16 mask);\n" |
42068 | "uint16 __ovld __cnfn shuffle(uint16 x, uint16 mask);\n" |
42069 | "\n" |
42070 | "long16 __ovld __cnfn shuffle(long2 x, ulong16 mask);\n" |
42071 | "long16 __ovld __cnfn shuffle(long4 x, ulong16 mask);\n" |
42072 | "long16 __ovld __cnfn shuffle(long8 x, ulong16 mask);\n" |
42073 | "long16 __ovld __cnfn shuffle(long16 x, ulong16 mask);\n" |
42074 | "\n" |
42075 | "ulong16 __ovld __cnfn shuffle(ulong2 x, ulong16 mask);\n" |
42076 | "ulong16 __ovld __cnfn shuffle(ulong4 x, ulong16 mask);\n" |
42077 | "ulong16 __ovld __cnfn shuffle(ulong8 x, ulong16 mask);\n" |
42078 | "ulong16 __ovld __cnfn shuffle(ulong16 x, ulong16 mask);\n" |
42079 | "\n" |
42080 | "float16 __ovld __cnfn shuffle(float2 x, uint16 mask);\n" |
42081 | "float16 __ovld __cnfn shuffle(float4 x, uint16 mask);\n" |
42082 | "float16 __ovld __cnfn shuffle(float8 x, uint16 mask);\n" |
42083 | "float16 __ovld __cnfn shuffle(float16 x, uint16 mask);\n" |
42084 | "\n" |
42085 | "#ifdef cl_khr_fp64\n" |
42086 | "double2 __ovld __cnfn shuffle(double2 x, ulong2 mask);\n" |
42087 | "double2 __ovld __cnfn shuffle(double4 x, ulong2 mask);\n" |
42088 | "double2 __ovld __cnfn shuffle(double8 x, ulong2 mask);\n" |
42089 | "double2 __ovld __cnfn shuffle(double16 x, ulong2 mask);\n" |
42090 | "\n" |
42091 | "double4 __ovld __cnfn shuffle(double2 x, ulong4 mask);\n" |
42092 | "double4 __ovld __cnfn shuffle(double4 x, ulong4 mask);\n" |
42093 | "double4 __ovld __cnfn shuffle(double8 x, ulong4 mask);\n" |
42094 | "double4 __ovld __cnfn shuffle(double16 x, ulong4 mask);\n" |
42095 | "\n" |
42096 | "double8 __ovld __cnfn shuffle(double2 x, ulong8 mask);\n" |
42097 | "double8 __ovld __cnfn shuffle(double4 x, ulong8 mask);\n" |
42098 | "double8 __ovld __cnfn shuffle(double8 x, ulong8 mask);\n" |
42099 | "double8 __ovld __cnfn shuffle(double16 x, ulong8 mask);\n" |
42100 | "\n" |
42101 | "double16 __ovld __cnfn shuffle(double2 x, ulong16 mask);\n" |
42102 | "double16 __ovld __cnfn shuffle(double4 x, ulong16 mask);\n" |
42103 | "double16 __ovld __cnfn shuffle(double8 x, ulong16 mask);\n" |
42104 | "double16 __ovld __cnfn shuffle(double16 x, ulong16 mask);\n" |
42105 | "#endif //cl_khr_fp64\n" |
42106 | "\n" |
42107 | "#ifdef cl_khr_fp16\n" |
42108 | "half2 __ovld __cnfn shuffle(half2 x, ushort2 mask);\n" |
42109 | "half2 __ovld __cnfn shuffle(half4 x, ushort2 mask);\n" |
42110 | "half2 __ovld __cnfn shuffle(half8 x, ushort2 mask);\n" |
42111 | "half2 __ovld __cnfn shuffle(half16 x, ushort2 mask);\n" |
42112 | "\n" |
42113 | "half4 __ovld __cnfn shuffle(half2 x, ushort4 mask);\n" |
42114 | "half4 __ovld __cnfn shuffle(half4 x, ushort4 mask);\n" |
42115 | "half4 __ovld __cnfn shuffle(half8 x, ushort4 mask);\n" |
42116 | "half4 __ovld __cnfn shuffle(half16 x, ushort4 mask);\n" |
42117 | "\n" |
42118 | "half8 __ovld __cnfn shuffle(half2 x, ushort8 mask);\n" |
42119 | "half8 __ovld __cnfn shuffle(half4 x, ushort8 mask);\n" |
42120 | "half8 __ovld __cnfn shuffle(half8 x, ushort8 mask);\n" |
42121 | "half8 __ovld __cnfn shuffle(half16 x, ushort8 mask);\n" |
42122 | "\n" |
42123 | "half16 __ovld __cnfn shuffle(half2 x, ushort16 mask);\n" |
42124 | "half16 __ovld __cnfn shuffle(half4 x, ushort16 mask);\n" |
42125 | "half16 __ovld __cnfn shuffle(half8 x, ushort16 mask);\n" |
42126 | "half16 __ovld __cnfn shuffle(half16 x, ushort16 mask);\n" |
42127 | "#endif //cl_khr_fp16\n" |
42128 | "\n" |
42129 | "char2 __ovld __cnfn shuffle2(char2 x, char2 y, uchar2 mask);\n" |
42130 | "char2 __ovld __cnfn shuffle2(char4 x, char4 y, uchar2 mask);\n" |
42131 | "char2 __ovld __cnfn shuffle2(char8 x, char8 y, uchar2 mask);\n" |
42132 | "char2 __ovld __cnfn shuffle2(char16 x, char16 y, uchar2 mask);\n" |
42133 | "\n" |
42134 | "uchar2 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar2 mask);\n" |
42135 | "uchar2 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar2 mask);\n" |
42136 | "uchar2 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar2 mask);\n" |
42137 | "uchar2 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar2 mask);\n" |
42138 | "\n" |
42139 | "short2 __ovld __cnfn shuffle2(short2 x, short2 y, ushort2 mask);\n" |
42140 | "short2 __ovld __cnfn shuffle2(short4 x, short4 y, ushort2 mask);\n" |
42141 | "short2 __ovld __cnfn shuffle2(short8 x, short8 y, ushort2 mask);\n" |
42142 | "short2 __ovld __cnfn shuffle2(short16 x, short16 y, ushort2 mask);\n" |
42143 | "\n" |
42144 | "ushort2 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort2 mask);\n" |
42145 | "ushort2 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort2 mask);\n" |
42146 | "ushort2 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort2 mask);\n" |
42147 | "ushort2 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort2 mask);\n" |
42148 | "\n" |
42149 | "int2 __ovld __cnfn shuffle2(int2 x, int2 y, uint2 mask);\n" |
42150 | "int2 __ovld __cnfn shuffle2(int4 x, int4 y, uint2 mask);\n" |
42151 | "int2 __ovld __cnfn shuffle2(int8 x, int8 y, uint2 mask);\n" |
42152 | "int2 __ovld __cnfn shuffle2(int16 x, int16 y, uint2 mask);\n" |
42153 | "\n" |
42154 | "uint2 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint2 mask);\n" |
42155 | "uint2 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint2 mask);\n" |
42156 | "uint2 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint2 mask);\n" |
42157 | "uint2 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint2 mask);\n" |
42158 | "\n" |
42159 | "long2 __ovld __cnfn shuffle2(long2 x, long2 y, ulong2 mask);\n" |
42160 | "long2 __ovld __cnfn shuffle2(long4 x, long4 y, ulong2 mask);\n" |
42161 | "long2 __ovld __cnfn shuffle2(long8 x, long8 y, ulong2 mask);\n" |
42162 | "long2 __ovld __cnfn shuffle2(long16 x, long16 y, ulong2 mask);\n" |
42163 | "\n" |
42164 | "ulong2 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong2 mask);\n" |
42165 | "ulong2 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong2 mask);\n" |
42166 | "ulong2 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong2 mask);\n" |
42167 | "ulong2 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong2 mask);\n" |
42168 | "\n" |
42169 | "float2 __ovld __cnfn shuffle2(float2 x, float2 y, uint2 mask);\n" |
42170 | "float2 __ovld __cnfn shuffle2(float4 x, float4 y, uint2 mask);\n" |
42171 | "float2 __ovld __cnfn shuffle2(float8 x, float8 y, uint2 mask);\n" |
42172 | "float2 __ovld __cnfn shuffle2(float16 x, float16 y, uint2 mask);\n" |
42173 | "\n" |
42174 | "char4 __ovld __cnfn shuffle2(char2 x, char2 y, uchar4 mask);\n" |
42175 | "char4 __ovld __cnfn shuffle2(char4 x, char4 y, uchar4 mask);\n" |
42176 | "char4 __ovld __cnfn shuffle2(char8 x, char8 y, uchar4 mask);\n" |
42177 | "char4 __ovld __cnfn shuffle2(char16 x, char16 y, uchar4 mask);\n" |
42178 | "\n" |
42179 | "uchar4 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar4 mask);\n" |
42180 | "uchar4 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar4 mask);\n" |
42181 | "uchar4 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar4 mask);\n" |
42182 | "uchar4 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar4 mask);\n" |
42183 | "\n" |
42184 | "short4 __ovld __cnfn shuffle2(short2 x, short2 y, ushort4 mask);\n" |
42185 | "short4 __ovld __cnfn shuffle2(short4 x, short4 y, ushort4 mask);\n" |
42186 | "short4 __ovld __cnfn shuffle2(short8 x, short8 y, ushort4 mask);\n" |
42187 | "short4 __ovld __cnfn shuffle2(short16 x, short16 y, ushort4 mask);\n" |
42188 | "\n" |
42189 | "ushort4 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort4 mask);\n" |
42190 | "ushort4 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort4 mask);\n" |
42191 | "ushort4 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort4 mask);\n" |
42192 | "ushort4 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort4 mask);\n" |
42193 | "\n" |
42194 | "int4 __ovld __cnfn shuffle2(int2 x, int2 y, uint4 mask);\n" |
42195 | "int4 __ovld __cnfn shuffle2(int4 x, int4 y, uint4 mask);\n" |
42196 | "int4 __ovld __cnfn shuffle2(int8 x, int8 y, uint4 mask);\n" |
42197 | "int4 __ovld __cnfn shuffle2(int16 x, int16 y, uint4 mask);\n" |
42198 | "\n" |
42199 | "uint4 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint4 mask);\n" |
42200 | "uint4 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint4 mask);\n" |
42201 | "uint4 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint4 mask);\n" |
42202 | "uint4 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint4 mask);\n" |
42203 | "\n" |
42204 | "long4 __ovld __cnfn shuffle2(long2 x, long2 y, ulong4 mask);\n" |
42205 | "long4 __ovld __cnfn shuffle2(long4 x, long4 y, ulong4 mask);\n" |
42206 | "long4 __ovld __cnfn shuffle2(long8 x, long8 y, ulong4 mask);\n" |
42207 | "long4 __ovld __cnfn shuffle2(long16 x, long16 y, ulong4 mask);\n" |
42208 | "\n" |
42209 | "ulong4 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong4 mask);\n" |
42210 | "ulong4 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong4 mask);\n" |
42211 | "ulong4 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong4 mask);\n" |
42212 | "ulong4 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong4 mask);\n" |
42213 | "\n" |
42214 | "float4 __ovld __cnfn shuffle2(float2 x, float2 y, uint4 mask);\n" |
42215 | "float4 __ovld __cnfn shuffle2(float4 x, float4 y, uint4 mask);\n" |
42216 | "float4 __ovld __cnfn shuffle2(float8 x, float8 y, uint4 mask);\n" |
42217 | "float4 __ovld __cnfn shuffle2(float16 x, float16 y, uint4 mask);\n" |
42218 | "\n" |
42219 | "char8 __ovld __cnfn shuffle2(char2 x, char2 y, uchar8 mask);\n" |
42220 | "char8 __ovld __cnfn shuffle2(char4 x, char4 y, uchar8 mask);\n" |
42221 | "char8 __ovld __cnfn shuffle2(char8 x, char8 y, uchar8 mask);\n" |
42222 | "char8 __ovld __cnfn shuffle2(char16 x, char16 y, uchar8 mask);\n" |
42223 | "\n" |
42224 | "uchar8 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar8 mask);\n" |
42225 | "uchar8 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar8 mask);\n" |
42226 | "uchar8 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar8 mask);\n" |
42227 | "uchar8 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar8 mask);\n" |
42228 | "\n" |
42229 | "short8 __ovld __cnfn shuffle2(short2 x, short2 y, ushort8 mask);\n" |
42230 | "short8 __ovld __cnfn shuffle2(short4 x, short4 y, ushort8 mask);\n" |
42231 | "short8 __ovld __cnfn shuffle2(short8 x, short8 y, ushort8 mask);\n" |
42232 | "short8 __ovld __cnfn shuffle2(short16 x, short16 y, ushort8 mask);\n" |
42233 | "\n" |
42234 | "ushort8 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort8 mask);\n" |
42235 | "ushort8 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort8 mask);\n" |
42236 | "ushort8 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort8 mask);\n" |
42237 | "ushort8 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort8 mask);\n" |
42238 | "\n" |
42239 | "int8 __ovld __cnfn shuffle2(int2 x, int2 y, uint8 mask);\n" |
42240 | "int8 __ovld __cnfn shuffle2(int4 x, int4 y, uint8 mask);\n" |
42241 | "int8 __ovld __cnfn shuffle2(int8 x, int8 y, uint8 mask);\n" |
42242 | "int8 __ovld __cnfn shuffle2(int16 x, int16 y, uint8 mask);\n" |
42243 | "\n" |
42244 | "uint8 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint8 mask);\n" |
42245 | "uint8 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint8 mask);\n" |
42246 | "uint8 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint8 mask);\n" |
42247 | "uint8 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint8 mask);\n" |
42248 | "\n" |
42249 | "long8 __ovld __cnfn shuffle2(long2 x, long2 y, ulong8 mask);\n" |
42250 | "long8 __ovld __cnfn shuffle2(long4 x, long4 y, ulong8 mask);\n" |
42251 | "long8 __ovld __cnfn shuffle2(long8 x, long8 y, ulong8 mask);\n" |
42252 | "long8 __ovld __cnfn shuffle2(long16 x, long16 y, ulong8 mask);\n" |
42253 | "\n" |
42254 | "ulong8 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong8 mask);\n" |
42255 | "ulong8 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong8 mask);\n" |
42256 | "ulong8 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong8 mask);\n" |
42257 | "ulong8 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong8 mask);\n" |
42258 | "\n" |
42259 | "float8 __ovld __cnfn shuffle2(float2 x, float2 y, uint8 mask);\n" |
42260 | "float8 __ovld __cnfn shuffle2(float4 x, float4 y, uint8 mask);\n" |
42261 | "float8 __ovld __cnfn shuffle2(float8 x, float8 y, uint8 mask);\n" |
42262 | "float8 __ovld __cnfn shuffle2(float16 x, float16 y, uint8 mask);\n" |
42263 | "\n" |
42264 | "char16 __ovld __cnfn shuffle2(char2 x, char2 y, uchar16 mask);\n" |
42265 | "char16 __ovld __cnfn shuffle2(char4 x, char4 y, uchar16 mask);\n" |
42266 | "char16 __ovld __cnfn shuffle2(char8 x, char8 y, uchar16 mask);\n" |
42267 | "char16 __ovld __cnfn shuffle2(char16 x, char16 y, uchar16 mask);\n" |
42268 | "\n" |
42269 | "uchar16 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar16 mask);\n" |
42270 | "uchar16 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar16 mask);\n" |
42271 | "uchar16 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar16 mask);\n" |
42272 | "uchar16 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar16 mask);\n" |
42273 | "\n" |
42274 | "short16 __ovld __cnfn shuffle2(short2 x, short2 y, ushort16 mask);\n" |
42275 | "short16 __ovld __cnfn shuffle2(short4 x, short4 y, ushort16 mask);\n" |
42276 | "short16 __ovld __cnfn shuffle2(short8 x, short8 y, ushort16 mask);\n" |
42277 | "short16 __ovld __cnfn shuffle2(short16 x, short16 y, ushort16 mask);\n" |
42278 | "\n" |
42279 | "ushort16 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort16 mask);\n" |
42280 | "ushort16 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort16 mask);\n" |
42281 | "ushort16 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort16 mask);\n" |
42282 | "ushort16 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort16 mask);\n" |
42283 | "\n" |
42284 | "int16 __ovld __cnfn shuffle2(int2 x, int2 y, uint16 mask);\n" |
42285 | "int16 __ovld __cnfn shuffle2(int4 x, int4 y, uint16 mask);\n" |
42286 | "int16 __ovld __cnfn shuffle2(int8 x, int8 y, uint16 mask);\n" |
42287 | "int16 __ovld __cnfn shuffle2(int16 x, int16 y, uint16 mask);\n" |
42288 | "\n" |
42289 | "uint16 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint16 mask);\n" |
42290 | "uint16 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint16 mask);\n" |
42291 | "uint16 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint16 mask);\n" |
42292 | "uint16 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint16 mask);\n" |
42293 | "\n" |
42294 | "long16 __ovld __cnfn shuffle2(long2 x, long2 y, ulong16 mask);\n" |
42295 | "long16 __ovld __cnfn shuffle2(long4 x, long4 y, ulong16 mask);\n" |
42296 | "long16 __ovld __cnfn shuffle2(long8 x, long8 y, ulong16 mask);\n" |
42297 | "long16 __ovld __cnfn shuffle2(long16 x, long16 y, ulong16 mask);\n" |
42298 | "\n" |
42299 | "ulong16 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong16 mask);\n" |
42300 | "ulong16 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong16 mask);\n" |
42301 | "ulong16 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong16 mask);\n" |
42302 | "ulong16 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong16 mask);\n" |
42303 | "\n" |
42304 | "float16 __ovld __cnfn shuffle2(float2 x, float2 y, uint16 mask);\n" |
42305 | "float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask);\n" |
42306 | "float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask);\n" |
42307 | "float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask);\n" |
42308 | "\n" |
42309 | "#ifdef cl_khr_fp64\n" |
42310 | "double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask);\n" |
42311 | "double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask);\n" |
42312 | "double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask);\n" |
42313 | "double2 __ovld __cnfn shuffle2(double16 x, double16 y, ulong2 mask);\n" |
42314 | "\n" |
42315 | "double4 __ovld __cnfn shuffle2(double2 x, double2 y, ulong4 mask);\n" |
42316 | "double4 __ovld __cnfn shuffle2(double4 x, double4 y, ulong4 mask);\n" |
42317 | "double4 __ovld __cnfn shuffle2(double8 x, double8 y, ulong4 mask);\n" |
42318 | "double4 __ovld __cnfn shuffle2(double16 x, double16 y, ulong4 mask);\n" |
42319 | "\n" |
42320 | "double8 __ovld __cnfn shuffle2(double2 x, double2 y, ulong8 mask);\n" |
42321 | "double8 __ovld __cnfn shuffle2(double4 x, double4 y, ulong8 mask);\n" |
42322 | "double8 __ovld __cnfn shuffle2(double8 x, double8 y, ulong8 mask);\n" |
42323 | "double8 __ovld __cnfn shuffle2(double16 x, double16 y, ulong8 mask);\n" |
42324 | "\n" |
42325 | "double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask);\n" |
42326 | "double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask);\n" |
42327 | "double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask);\n" |
42328 | "double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask);\n" |
42329 | "#endif //cl_khr_fp64\n" |
42330 | "\n" |
42331 | "#ifdef cl_khr_fp16\n" |
42332 | "half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask);\n" |
42333 | "half2 __ovld __cnfn shuffle2(half4 x, half4 y, ushort2 mask);\n" |
42334 | "half2 __ovld __cnfn shuffle2(half8 x, half8 y, ushort2 mask);\n" |
42335 | "half2 __ovld __cnfn shuffle2(half16 x, half16 y, ushort2 mask);\n" |
42336 | "\n" |
42337 | "half4 __ovld __cnfn shuffle2(half2 x, half2 y, ushort4 mask);\n" |
42338 | "half4 __ovld __cnfn shuffle2(half4 x, half4 y, ushort4 mask);\n" |
42339 | "half4 __ovld __cnfn shuffle2(half8 x, half8 y, ushort4 mask);\n" |
42340 | "half4 __ovld __cnfn shuffle2(half16 x, half16 y, ushort4 mask);\n" |
42341 | "\n" |
42342 | "half8 __ovld __cnfn shuffle2(half2 x, half2 y, ushort8 mask);\n" |
42343 | "half8 __ovld __cnfn shuffle2(half4 x, half4 y, ushort8 mask);\n" |
42344 | "half8 __ovld __cnfn shuffle2(half8 x, half8 y, ushort8 mask);\n" |
42345 | "half8 __ovld __cnfn shuffle2(half16 x, half16 y, ushort8 mask);\n" |
42346 | "\n" |
42347 | "half16 __ovld __cnfn shuffle2(half2 x, half2 y, ushort16 mask);\n" |
42348 | "half16 __ovld __cnfn shuffle2(half4 x, half4 y, ushort16 mask);\n" |
42349 | "half16 __ovld __cnfn shuffle2(half8 x, half8 y, ushort16 mask);\n" |
42350 | "half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask);\n" |
42351 | "#endif //cl_khr_fp16\n" |
42352 | "\n" |
42353 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
42354 | "// OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf\n" |
42355 | "\n" |
42356 | "int printf(__constant const char* st, ...);\n" |
42357 | "#endif\n" |
42358 | "\n" |
42359 | "// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions\n" |
42360 | "\n" |
42361 | "// These values need to match the runtime equivalent\n" |
42362 | "//\n" |
42363 | "// Addressing Mode.\n" |
42364 | "//\n" |
42365 | "#define CLK_ADDRESS_NONE 0\n" |
42366 | "#define CLK_ADDRESS_CLAMP_TO_EDGE 2\n" |
42367 | "#define CLK_ADDRESS_CLAMP 4\n" |
42368 | "#define CLK_ADDRESS_REPEAT 6\n" |
42369 | "#define CLK_ADDRESS_MIRRORED_REPEAT 8\n" |
42370 | "\n" |
42371 | "//\n" |
42372 | "// Coordination Normalization\n" |
42373 | "//\n" |
42374 | "#define CLK_NORMALIZED_COORDS_FALSE 0\n" |
42375 | "#define CLK_NORMALIZED_COORDS_TRUE 1\n" |
42376 | "\n" |
42377 | "//\n" |
42378 | "// Filtering Mode.\n" |
42379 | "//\n" |
42380 | "#define CLK_FILTER_NEAREST 0x10\n" |
42381 | "#define CLK_FILTER_LINEAR 0x20\n" |
42382 | "\n" |
42383 | "#ifdef cl_khr_gl_msaa_sharing\n" |
42384 | "#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" |
42385 | "#endif //cl_khr_gl_msaa_sharing\n" |
42386 | "\n" |
42387 | "/**\n" |
42388 | " * Use the coordinate (coord.xy) to do an element lookup in\n" |
42389 | " * the 2D image object specified by image.\n" |
42390 | " *\n" |
42391 | " * Use the coordinate (coord.x, coord.y, coord.z) to do\n" |
42392 | " * an element lookup in the 3D image object specified\n" |
42393 | " * by image. coord.w is ignored.\n" |
42394 | " *\n" |
42395 | " * Use the coordinate (coord.z) to index into the\n" |
42396 | " * 2D image array object specified by image_array\n" |
42397 | " * and (coord.x, coord.y) to do an element lookup in\n" |
42398 | " * the 2D image object specified by image.\n" |
42399 | " *\n" |
42400 | " * Use the coordinate (x) to do an element lookup in\n" |
42401 | " * the 1D image object specified by image.\n" |
42402 | " *\n" |
42403 | " * Use the coordinate (coord.y) to index into the\n" |
42404 | " * 1D image array object specified by image_array\n" |
42405 | " * and (coord.x) to do an element lookup in\n" |
42406 | " * the 1D image object specified by image.\n" |
42407 | " *\n" |
42408 | " * Use the coordinate (cood.xy) and sample to do an\n" |
42409 | " * element lookup in the 2D multi-sample image specified\n" |
42410 | " * by image.\n" |
42411 | " *\n" |
42412 | " * Use coord.xy and sample to do an element\n" |
42413 | " * lookup in the 2D multi-sample image layer\n" |
42414 | " * identified by index coord.z in the 2D multi-sample\n" |
42415 | " * image array specified by image.\n" |
42416 | " *\n" |
42417 | " * For mipmap images, use the mip-level specified by\n" |
42418 | " * the Level-of-Detail (lod) or use gradients for LOD\n" |
42419 | " * computation.\n" |
42420 | " *\n" |
42421 | " * read_imagef returns floating-point values in the\n" |
42422 | " * range [0.0 ... 1.0] for image objects created with\n" |
42423 | " * image_channel_data_type set to one of the predefined\n" |
42424 | " * packed formats or CL_UNORM_INT8, or\n" |
42425 | " * CL_UNORM_INT16.\n" |
42426 | " *\n" |
42427 | " * read_imagef returns floating-point values in the\n" |
42428 | " * range [-1.0 ... 1.0] for image objects created with\n" |
42429 | " * image_channel_data_type set to CL_SNORM_INT8,\n" |
42430 | " * or CL_SNORM_INT16.\n" |
42431 | " *\n" |
42432 | " * read_imagef returns floating-point values for image\n" |
42433 | " * objects created with image_channel_data_type set to\n" |
42434 | " * CL_HALF_FLOAT or CL_FLOAT.\n" |
42435 | " *\n" |
42436 | " * read_imagei and read_imageui return\n" |
42437 | " * unnormalized signed integer and unsigned integer\n" |
42438 | " * values respectively. Each channel will be stored in a\n" |
42439 | " * 32-bit integer.\n" |
42440 | " *\n" |
42441 | " * read_imagei can only be used with image objects\n" |
42442 | " * created with image_channel_data_type set to one of\n" |
42443 | " * the following values:\n" |
42444 | " * CL_SIGNED_INT8,\n" |
42445 | " * CL_SIGNED_INT16 and\n" |
42446 | " * CL_SIGNED_INT32.\n" |
42447 | " * If the image_channel_data_type is not one of the\n" |
42448 | " * above values, the values returned by read_imagei\n" |
42449 | " * are undefined.\n" |
42450 | " *\n" |
42451 | " * read_imageui can only be used with image objects\n" |
42452 | " * created with image_channel_data_type set to one of\n" |
42453 | " * the following values:\n" |
42454 | " * CL_UNSIGNED_INT8,\n" |
42455 | " * CL_UNSIGNED_INT16 and\n" |
42456 | " * CL_UNSIGNED_INT32.\n" |
42457 | " * If the image_channel_data_type is not one of the\n" |
42458 | " * above values, the values returned by read_imageui\n" |
42459 | " * are undefined.\n" |
42460 | " *\n" |
42461 | " * The read_image{i|ui} calls support a nearest filter\n" |
42462 | " * only. The filter_mode specified in sampler\n" |
42463 | " * must be set to CLK_FILTER_NEAREST; otherwise\n" |
42464 | " * the values returned are undefined.\n" |
42465 | "\n" |
42466 | " * The read_image{f|i|ui} calls that take\n" |
42467 | " * integer coordinates must use a sampler with\n" |
42468 | " * normalized coordinates set to\n" |
42469 | " * CLK_NORMALIZED_COORDS_FALSE and\n" |
42470 | " * addressing mode set to\n" |
42471 | " * CLK_ADDRESS_CLAMP_TO_EDGE,\n" |
42472 | " * CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE;\n" |
42473 | " * otherwise the values returned are undefined.\n" |
42474 | " *\n" |
42475 | " * Values returned by read_imagef for image objects\n" |
42476 | " * with image_channel_data_type values not specified\n" |
42477 | " * in the description above are undefined.\n" |
42478 | " */\n" |
42479 | "\n" |
42480 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, int2 coord);\n" |
42481 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord);\n" |
42482 | "\n" |
42483 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, int2 coord);\n" |
42484 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord);\n" |
42485 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, int2 coord);\n" |
42486 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord);\n" |
42487 | "\n" |
42488 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, int4 coord);\n" |
42489 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord);\n" |
42490 | "\n" |
42491 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, int4 coord);\n" |
42492 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord);\n" |
42493 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);\n" |
42494 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);\n" |
42495 | "\n" |
42496 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
42497 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n" |
42498 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n" |
42499 | "\n" |
42500 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n" |
42501 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n" |
42502 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n" |
42503 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n" |
42504 | "#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
42505 | "\n" |
42506 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);\n" |
42507 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);\n" |
42508 | "\n" |
42509 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, int coord);\n" |
42510 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord);\n" |
42511 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);\n" |
42512 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);\n" |
42513 | "\n" |
42514 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
42515 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n" |
42516 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n" |
42517 | "\n" |
42518 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n" |
42519 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n" |
42520 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n" |
42521 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n" |
42522 | "#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
42523 | "\n" |
42524 | "#ifdef cl_khr_depth_images\n" |
42525 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);\n" |
42526 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, int2 coord);\n" |
42527 | "\n" |
42528 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord);\n" |
42529 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, int4 coord);\n" |
42530 | "#endif //cl_khr_depth_images\n" |
42531 | "\n" |
42532 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
42533 | "float4 __purefn __ovld read_imagef(read_only image2d_msaa_t image, int2 coord, int sample);\n" |
42534 | "int4 __purefn __ovld read_imagei(read_only image2d_msaa_t image, int2 coord, int sample);\n" |
42535 | "uint4 __purefn __ovld read_imageui(read_only image2d_msaa_t image, int2 coord, int sample);\n" |
42536 | "\n" |
42537 | "float __purefn __ovld read_imagef(read_only image2d_msaa_depth_t image, int2 coord, int sample);\n" |
42538 | "\n" |
42539 | "float4 __purefn __ovld read_imagef(read_only image2d_array_msaa_t image, int4 coord, int sample);\n" |
42540 | "int4 __purefn __ovld read_imagei(read_only image2d_array_msaa_t image, int4 coord, int sample);\n" |
42541 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_msaa_t image, int4 coord, int sample);\n" |
42542 | "\n" |
42543 | "float __purefn __ovld read_imagef(read_only image2d_array_msaa_depth_t image, int4 coord, int sample);\n" |
42544 | "#endif //cl_khr_gl_msaa_sharing\n" |
42545 | "\n" |
42546 | "// OpenCL Extension v2.0 s9.18 - Mipmaps\n" |
42547 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42548 | "#ifdef cl_khr_mipmap_image\n" |
42549 | "\n" |
42550 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42551 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42552 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42553 | "\n" |
42554 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42555 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42556 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42557 | "\n" |
42558 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42559 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42560 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42561 | "\n" |
42562 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n" |
42563 | "\n" |
42564 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42565 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42566 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42567 | "\n" |
42568 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n" |
42569 | "\n" |
42570 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42571 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42572 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42573 | "\n" |
42574 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
42575 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
42576 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
42577 | "\n" |
42578 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
42579 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
42580 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
42581 | "\n" |
42582 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
42583 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
42584 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
42585 | "\n" |
42586 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
42587 | "\n" |
42588 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
42589 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
42590 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
42591 | "\n" |
42592 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
42593 | "\n" |
42594 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
42595 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
42596 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
42597 | "\n" |
42598 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42599 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42600 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42601 | "\n" |
42602 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42603 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42604 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42605 | "\n" |
42606 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42607 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42608 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42609 | "\n" |
42610 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n" |
42611 | "\n" |
42612 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42613 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42614 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42615 | "\n" |
42616 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n" |
42617 | "\n" |
42618 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42619 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42620 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42621 | "\n" |
42622 | "#endif //cl_khr_mipmap_image\n" |
42623 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42624 | "\n" |
42625 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
42626 | "\n" |
42627 | "/**\n" |
42628 | "* Sampler-less Image Access\n" |
42629 | "*/\n" |
42630 | "\n" |
42631 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, int coord);\n" |
42632 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, int coord);\n" |
42633 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, int coord);\n" |
42634 | "\n" |
42635 | "float4 __purefn __ovld read_imagef(read_only image1d_buffer_t image, int coord);\n" |
42636 | "int4 __purefn __ovld read_imagei(read_only image1d_buffer_t image, int coord);\n" |
42637 | "uint4 __purefn __ovld read_imageui(read_only image1d_buffer_t image, int coord);\n" |
42638 | "\n" |
42639 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image, int2 coord);\n" |
42640 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image, int2 coord);\n" |
42641 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image, int2 coord);\n" |
42642 | "\n" |
42643 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, int2 coord);\n" |
42644 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, int2 coord);\n" |
42645 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, int2 coord);\n" |
42646 | "\n" |
42647 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image, int4 coord);\n" |
42648 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image, int4 coord);\n" |
42649 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image, int4 coord);\n" |
42650 | "\n" |
42651 | "#ifdef cl_khr_depth_images\n" |
42652 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, int2 coord);\n" |
42653 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, int4 coord);\n" |
42654 | "#endif //cl_khr_depth_images\n" |
42655 | "\n" |
42656 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);\n" |
42657 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);\n" |
42658 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);\n" |
42659 | "\n" |
42660 | "#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
42661 | "\n" |
42662 | "// Image read functions returning half4 type\n" |
42663 | "#ifdef cl_khr_fp16\n" |
42664 | "half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, int coord);\n" |
42665 | "half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, float coord);\n" |
42666 | "half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, int2 coord);\n" |
42667 | "half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);\n" |
42668 | "half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);\n" |
42669 | "half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);\n" |
42670 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
42671 | "half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);\n" |
42672 | "half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);\n" |
42673 | "half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);\n" |
42674 | "half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, float4 coord);\n" |
42675 | "/**\n" |
42676 | " * Sampler-less Image Access\n" |
42677 | " */\n" |
42678 | "half4 __purefn __ovld read_imageh(read_only image1d_t image, int coord);\n" |
42679 | "half4 __purefn __ovld read_imageh(read_only image2d_t image, int2 coord);\n" |
42680 | "half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);\n" |
42681 | "half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);\n" |
42682 | "half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);\n" |
42683 | "half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);\n" |
42684 | "#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
42685 | "#endif //cl_khr_fp16\n" |
42686 | "\n" |
42687 | "// Image read functions for read_write images\n" |
42688 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42689 | "float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);\n" |
42690 | "int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);\n" |
42691 | "uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);\n" |
42692 | "\n" |
42693 | "float4 __purefn __ovld read_imagef(read_write image1d_buffer_t image, int coord);\n" |
42694 | "int4 __purefn __ovld read_imagei(read_write image1d_buffer_t image, int coord);\n" |
42695 | "uint4 __purefn __ovld read_imageui(read_write image1d_buffer_t image, int coord);\n" |
42696 | "\n" |
42697 | "float4 __purefn __ovld read_imagef(read_write image1d_array_t image, int2 coord);\n" |
42698 | "int4 __purefn __ovld read_imagei(read_write image1d_array_t image, int2 coord);\n" |
42699 | "uint4 __purefn __ovld read_imageui(read_write image1d_array_t image, int2 coord);\n" |
42700 | "\n" |
42701 | "float4 __purefn __ovld read_imagef(read_write image2d_t image, int2 coord);\n" |
42702 | "int4 __purefn __ovld read_imagei(read_write image2d_t image, int2 coord);\n" |
42703 | "uint4 __purefn __ovld read_imageui(read_write image2d_t image, int2 coord);\n" |
42704 | "\n" |
42705 | "float4 __purefn __ovld read_imagef(read_write image2d_array_t image, int4 coord);\n" |
42706 | "int4 __purefn __ovld read_imagei(read_write image2d_array_t image, int4 coord);\n" |
42707 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_t image, int4 coord);\n" |
42708 | "\n" |
42709 | "float4 __purefn __ovld read_imagef(read_write image3d_t image, int4 coord);\n" |
42710 | "int4 __purefn __ovld read_imagei(read_write image3d_t image, int4 coord);\n" |
42711 | "uint4 __purefn __ovld read_imageui(read_write image3d_t image, int4 coord);\n" |
42712 | "\n" |
42713 | "#ifdef cl_khr_depth_images\n" |
42714 | "float __purefn __ovld read_imagef(read_write image2d_depth_t image, int2 coord);\n" |
42715 | "float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, int4 coord);\n" |
42716 | "#endif //cl_khr_depth_images\n" |
42717 | "\n" |
42718 | "#if cl_khr_gl_msaa_sharing\n" |
42719 | "float4 __purefn __ovld read_imagef(read_write image2d_msaa_t image, int2 coord, int sample);\n" |
42720 | "int4 __purefn __ovld read_imagei(read_write image2d_msaa_t image, int2 coord, int sample);\n" |
42721 | "uint4 __purefn __ovld read_imageui(read_write image2d_msaa_t image, int2 coord, int sample);\n" |
42722 | "\n" |
42723 | "float4 __purefn __ovld read_imagef(read_write image2d_array_msaa_t image, int4 coord, int sample);\n" |
42724 | "int4 __purefn __ovld read_imagei(read_write image2d_array_msaa_t image, int4 coord, int sample);\n" |
42725 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_msaa_t image, int4 coord, int sample);\n" |
42726 | "\n" |
42727 | "float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 coord, int sample);\n" |
42728 | "float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);\n" |
42729 | "#endif //cl_khr_gl_msaa_sharing\n" |
42730 | "\n" |
42731 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42732 | "#ifdef cl_khr_mipmap_image\n" |
42733 | "float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42734 | "int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42735 | "uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42736 | "\n" |
42737 | "float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42738 | "int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42739 | "uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42740 | "\n" |
42741 | "float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42742 | "int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42743 | "uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42744 | "\n" |
42745 | "float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n" |
42746 | "\n" |
42747 | "float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42748 | "int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42749 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42750 | "\n" |
42751 | "float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n" |
42752 | "\n" |
42753 | "float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42754 | "int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42755 | "uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42756 | "\n" |
42757 | "float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
42758 | "int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
42759 | "uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
42760 | "\n" |
42761 | "float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
42762 | "int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
42763 | "uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
42764 | "\n" |
42765 | "float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
42766 | "int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
42767 | "uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
42768 | "\n" |
42769 | "float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
42770 | "\n" |
42771 | "float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
42772 | "int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
42773 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
42774 | "\n" |
42775 | "float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
42776 | "\n" |
42777 | "float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
42778 | "int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
42779 | "uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
42780 | "\n" |
42781 | "float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42782 | "int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42783 | "uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
42784 | "\n" |
42785 | "float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42786 | "int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42787 | "uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
42788 | "\n" |
42789 | "float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42790 | "int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42791 | "uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
42792 | "\n" |
42793 | "float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n" |
42794 | "\n" |
42795 | "float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42796 | "int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42797 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
42798 | "\n" |
42799 | "float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n" |
42800 | "\n" |
42801 | "float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42802 | "int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42803 | "uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
42804 | "#endif //cl_khr_mipmap_image\n" |
42805 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42806 | "\n" |
42807 | "// Image read functions returning half4 type\n" |
42808 | "#ifdef cl_khr_fp16\n" |
42809 | "half4 __purefn __ovld read_imageh(read_write image1d_t image, int coord);\n" |
42810 | "half4 __purefn __ovld read_imageh(read_write image2d_t image, int2 coord);\n" |
42811 | "half4 __purefn __ovld read_imageh(read_write image3d_t image, int4 coord);\n" |
42812 | "half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);\n" |
42813 | "half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);\n" |
42814 | "half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);\n" |
42815 | "#endif //cl_khr_fp16\n" |
42816 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42817 | "\n" |
42818 | "/**\n" |
42819 | " * Write color value to location specified by coordinate\n" |
42820 | " * (coord.x, coord.y) in the 2D image object specified by image.\n" |
42821 | " * (coord.x, coord.y) are considered to be unnormalized coordinates\n" |
42822 | " * and must be in the range 0 ... image width - 1, and 0\n" |
42823 | " * ... image height - 1.\n" |
42824 | "\n" |
42825 | " * Write color value to location specified by coordinate\n" |
42826 | " * (coord.x, coord.y) in the 2D image object specified by index\n" |
42827 | " * (coord.z) of the 2D image array object image_array.\n" |
42828 | " * (coord.x, coord.y) are considered to be unnormalized\n" |
42829 | " * coordinates and must be in the range 0 ... image width\n" |
42830 | " * - 1.\n" |
42831 | " *\n" |
42832 | " * Write color value to location specified by coordinate\n" |
42833 | " * (coord) in the 1D image (buffer) object specified by image.\n" |
42834 | " * coord is considered to be unnormalized coordinates\n" |
42835 | " * and must be in the range 0 ... image width - 1.\n" |
42836 | " *\n" |
42837 | " * Write color value to location specified by coordinate\n" |
42838 | " * (coord.x) in the 1D image object specified by index\n" |
42839 | " * (coord.y) of the 1D image array object image_array.\n" |
42840 | " * x is considered to be unnormalized coordinates\n" |
42841 | " * and must be in the range 0 ... image width - 1.\n" |
42842 | " *\n" |
42843 | " * Write color value to location specified by coordinate\n" |
42844 | " * (coord.x, coord.y, coord.z) in the 3D image object specified by image.\n" |
42845 | " * coord.x & coord.y are considered to be unnormalized coordinates\n" |
42846 | " * and must be in the range 0 ... image width - 1, and 0\n" |
42847 | " * ... image height - 1.\n" |
42848 | " *\n" |
42849 | " * For mipmap images, use mip-level specified by lod.\n" |
42850 | " *\n" |
42851 | " * Appropriate data format conversion to the specified\n" |
42852 | " * image format is done before writing the color value.\n" |
42853 | " *\n" |
42854 | " * write_imagef can only be used with image objects\n" |
42855 | " * created with image_channel_data_type set to one of\n" |
42856 | " * the pre-defined packed formats or set to\n" |
42857 | " * CL_SNORM_INT8, CL_UNORM_INT8,\n" |
42858 | " * CL_SNORM_INT16, CL_UNORM_INT16,\n" |
42859 | " * CL_HALF_FLOAT or CL_FLOAT. Appropriate data\n" |
42860 | " * format conversion will be done to convert channel\n" |
42861 | " * data from a floating-point value to actual data format\n" |
42862 | " * in which the channels are stored.\n" |
42863 | " *\n" |
42864 | " * write_imagei can only be used with image objects\n" |
42865 | " * created with image_channel_data_type set to one of\n" |
42866 | " * the following values:\n" |
42867 | " * CL_SIGNED_INT8,\n" |
42868 | " * CL_SIGNED_INT16 and\n" |
42869 | " * CL_SIGNED_INT32.\n" |
42870 | " *\n" |
42871 | " * write_imageui can only be used with image objects\n" |
42872 | " * created with image_channel_data_type set to one of\n" |
42873 | " * the following values:\n" |
42874 | " * CL_UNSIGNED_INT8,\n" |
42875 | " * CL_UNSIGNED_INT16 and\n" |
42876 | " * CL_UNSIGNED_INT32.\n" |
42877 | " *\n" |
42878 | " * The behavior of write_imagef, write_imagei and\n" |
42879 | " * write_imageui for image objects created with\n" |
42880 | " * image_channel_data_type values not specified in\n" |
42881 | " * the description above or with (x, y) coordinate\n" |
42882 | " * values that are not in the range (0 ... image width -1,\n" |
42883 | " * 0 ... image height - 1), respectively, is undefined.\n" |
42884 | " */\n" |
42885 | "void __ovld write_imagef(write_only image2d_t image, int2 coord, float4 color);\n" |
42886 | "void __ovld write_imagei(write_only image2d_t image, int2 coord, int4 color);\n" |
42887 | "void __ovld write_imageui(write_only image2d_t image, int2 coord, uint4 color);\n" |
42888 | "\n" |
42889 | "void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, float4 color);\n" |
42890 | "void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int4 color);\n" |
42891 | "void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, uint4 color);\n" |
42892 | "\n" |
42893 | "void __ovld write_imagef(write_only image1d_t image, int coord, float4 color);\n" |
42894 | "void __ovld write_imagei(write_only image1d_t image, int coord, int4 color);\n" |
42895 | "void __ovld write_imageui(write_only image1d_t image, int coord, uint4 color);\n" |
42896 | "\n" |
42897 | "void __ovld write_imagef(write_only image1d_buffer_t image, int coord, float4 color);\n" |
42898 | "void __ovld write_imagei(write_only image1d_buffer_t image, int coord, int4 color);\n" |
42899 | "void __ovld write_imageui(write_only image1d_buffer_t image, int coord, uint4 color);\n" |
42900 | "\n" |
42901 | "void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, float4 color);\n" |
42902 | "void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color);\n" |
42903 | "void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color);\n" |
42904 | "\n" |
42905 | "#ifdef cl_khr_3d_image_writes\n" |
42906 | "void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color);\n" |
42907 | "void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color);\n" |
42908 | "void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color);\n" |
42909 | "#endif\n" |
42910 | "\n" |
42911 | "#ifdef cl_khr_depth_images\n" |
42912 | "void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, float color);\n" |
42913 | "void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, float color);\n" |
42914 | "#endif //cl_khr_depth_images\n" |
42915 | "\n" |
42916 | "// OpenCL Extension v2.0 s9.18 - Mipmaps\n" |
42917 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42918 | "#ifdef cl_khr_mipmap_image\n" |
42919 | "void __ovld write_imagef(write_only image1d_t image, int coord, int lod, float4 color);\n" |
42920 | "void __ovld write_imagei(write_only image1d_t image, int coord, int lod, int4 color);\n" |
42921 | "void __ovld write_imageui(write_only image1d_t image, int coord, int lod, uint4 color);\n" |
42922 | "\n" |
42923 | "void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, int lod, float4 color);\n" |
42924 | "void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int lod, int4 color);\n" |
42925 | "void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, int lod, uint4 color);\n" |
42926 | "\n" |
42927 | "void __ovld write_imagef(write_only image2d_t image, int2 coord, int lod, float4 color);\n" |
42928 | "void __ovld write_imagei(write_only image2d_t image, int2 coord, int lod, int4 color);\n" |
42929 | "void __ovld write_imageui(write_only image2d_t image, int2 coord, int lod, uint4 color);\n" |
42930 | "\n" |
42931 | "void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, int lod, float4 color);\n" |
42932 | "void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int lod, int4 color);\n" |
42933 | "void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, int lod, uint4 color);\n" |
42934 | "\n" |
42935 | "void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float color);\n" |
42936 | "void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float color);\n" |
42937 | "\n" |
42938 | "#ifdef cl_khr_3d_image_writes\n" |
42939 | "void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color);\n" |
42940 | "void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color);\n" |
42941 | "void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color);\n" |
42942 | "#endif\n" |
42943 | "#endif //cl_khr_mipmap_image\n" |
42944 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42945 | "\n" |
42946 | "// Image write functions for half4 type\n" |
42947 | "#ifdef cl_khr_fp16\n" |
42948 | "void __ovld write_imageh(write_only image1d_t image, int coord, half4 color);\n" |
42949 | "void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color);\n" |
42950 | "#ifdef cl_khr_3d_image_writes\n" |
42951 | "void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color);\n" |
42952 | "#endif\n" |
42953 | "void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color);\n" |
42954 | "void __ovld write_imageh(write_only image2d_array_t image, int4 coord, half4 color);\n" |
42955 | "void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 color);\n" |
42956 | "#endif //cl_khr_fp16\n" |
42957 | "\n" |
42958 | "// Image write functions for read_write images\n" |
42959 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42960 | "void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);\n" |
42961 | "void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);\n" |
42962 | "void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);\n" |
42963 | "\n" |
42964 | "void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, float4 color);\n" |
42965 | "void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int4 color);\n" |
42966 | "void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, uint4 color);\n" |
42967 | "\n" |
42968 | "void __ovld write_imagef(read_write image1d_t image, int coord, float4 color);\n" |
42969 | "void __ovld write_imagei(read_write image1d_t image, int coord, int4 color);\n" |
42970 | "void __ovld write_imageui(read_write image1d_t image, int coord, uint4 color);\n" |
42971 | "\n" |
42972 | "void __ovld write_imagef(read_write image1d_buffer_t image, int coord, float4 color);\n" |
42973 | "void __ovld write_imagei(read_write image1d_buffer_t image, int coord, int4 color);\n" |
42974 | "void __ovld write_imageui(read_write image1d_buffer_t image, int coord, uint4 color);\n" |
42975 | "\n" |
42976 | "void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, float4 color);\n" |
42977 | "void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color);\n" |
42978 | "void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color);\n" |
42979 | "\n" |
42980 | "#ifdef cl_khr_3d_image_writes\n" |
42981 | "void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color);\n" |
42982 | "void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color);\n" |
42983 | "void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color);\n" |
42984 | "#endif\n" |
42985 | "\n" |
42986 | "#ifdef cl_khr_depth_images\n" |
42987 | "void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float color);\n" |
42988 | "void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);\n" |
42989 | "#endif //cl_khr_depth_images\n" |
42990 | "\n" |
42991 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
42992 | "#ifdef cl_khr_mipmap_image\n" |
42993 | "void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);\n" |
42994 | "void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);\n" |
42995 | "void __ovld write_imageui(read_write image1d_t image, int coord, int lod, uint4 color);\n" |
42996 | "\n" |
42997 | "void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, int lod, float4 color);\n" |
42998 | "void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int lod, int4 color);\n" |
42999 | "void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, int lod, uint4 color);\n" |
43000 | "\n" |
43001 | "void __ovld write_imagef(read_write image2d_t image, int2 coord, int lod, float4 color);\n" |
43002 | "void __ovld write_imagei(read_write image2d_t image, int2 coord, int lod, int4 color);\n" |
43003 | "void __ovld write_imageui(read_write image2d_t image, int2 coord, int lod, uint4 color);\n" |
43004 | "\n" |
43005 | "void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, int lod, float4 color);\n" |
43006 | "void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int lod, int4 color);\n" |
43007 | "void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, int lod, uint4 color);\n" |
43008 | "\n" |
43009 | "void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color);\n" |
43010 | "void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color);\n" |
43011 | "\n" |
43012 | "#ifdef cl_khr_3d_image_writes\n" |
43013 | "void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color);\n" |
43014 | "void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color);\n" |
43015 | "void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color);\n" |
43016 | "#endif\n" |
43017 | "#endif //cl_khr_mipmap_image\n" |
43018 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43019 | "\n" |
43020 | "// Image write functions for half4 type\n" |
43021 | "#ifdef cl_khr_fp16\n" |
43022 | "void __ovld write_imageh(read_write image1d_t image, int coord, half4 color);\n" |
43023 | "void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color);\n" |
43024 | "#ifdef cl_khr_3d_image_writes\n" |
43025 | "void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color);\n" |
43026 | "#endif\n" |
43027 | "void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color);\n" |
43028 | "void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);\n" |
43029 | "void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);\n" |
43030 | "#endif //cl_khr_fp16\n" |
43031 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43032 | "\n" |
43033 | "// Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have\n" |
43034 | "// access qualifier, which by default assume read_only access qualifier. Image query builtin\n" |
43035 | "// functions with write_only image argument should also be declared.\n" |
43036 | "\n" |
43037 | "/**\n" |
43038 | " * Return the image width in pixels.\n" |
43039 | " *\n" |
43040 | " */\n" |
43041 | "int __ovld __cnfn get_image_width(read_only image1d_t image);\n" |
43042 | "int __ovld __cnfn get_image_width(read_only image1d_buffer_t image);\n" |
43043 | "int __ovld __cnfn get_image_width(read_only image2d_t image);\n" |
43044 | "#ifdef cl_khr_3d_image_writes\n" |
43045 | "int __ovld __cnfn get_image_width(read_only image3d_t image);\n" |
43046 | "#endif\n" |
43047 | "int __ovld __cnfn get_image_width(read_only image1d_array_t image);\n" |
43048 | "int __ovld __cnfn get_image_width(read_only image2d_array_t image);\n" |
43049 | "#ifdef cl_khr_depth_images\n" |
43050 | "int __ovld __cnfn get_image_width(read_only image2d_depth_t image);\n" |
43051 | "int __ovld __cnfn get_image_width(read_only image2d_array_depth_t image);\n" |
43052 | "#endif //cl_khr_depth_images\n" |
43053 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43054 | "int __ovld __cnfn get_image_width(read_only image2d_msaa_t image);\n" |
43055 | "int __ovld __cnfn get_image_width(read_only image2d_msaa_depth_t image);\n" |
43056 | "int __ovld __cnfn get_image_width(read_only image2d_array_msaa_t image);\n" |
43057 | "int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image);\n" |
43058 | "#endif //cl_khr_gl_msaa_sharing\n" |
43059 | "\n" |
43060 | "int __ovld __cnfn get_image_width(write_only image1d_t image);\n" |
43061 | "int __ovld __cnfn get_image_width(write_only image1d_buffer_t image);\n" |
43062 | "int __ovld __cnfn get_image_width(write_only image2d_t image);\n" |
43063 | "#ifdef cl_khr_3d_image_writes\n" |
43064 | "int __ovld __cnfn get_image_width(write_only image3d_t image);\n" |
43065 | "#endif\n" |
43066 | "int __ovld __cnfn get_image_width(write_only image1d_array_t image);\n" |
43067 | "int __ovld __cnfn get_image_width(write_only image2d_array_t image);\n" |
43068 | "#ifdef cl_khr_depth_images\n" |
43069 | "int __ovld __cnfn get_image_width(write_only image2d_depth_t image);\n" |
43070 | "int __ovld __cnfn get_image_width(write_only image2d_array_depth_t image);\n" |
43071 | "#endif //cl_khr_depth_images\n" |
43072 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43073 | "int __ovld __cnfn get_image_width(write_only image2d_msaa_t image);\n" |
43074 | "int __ovld __cnfn get_image_width(write_only image2d_msaa_depth_t image);\n" |
43075 | "int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image);\n" |
43076 | "int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);\n" |
43077 | "#endif //cl_khr_gl_msaa_sharing\n" |
43078 | "\n" |
43079 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43080 | "int __ovld __cnfn get_image_width(read_write image1d_t image);\n" |
43081 | "int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);\n" |
43082 | "int __ovld __cnfn get_image_width(read_write image2d_t image);\n" |
43083 | "int __ovld __cnfn get_image_width(read_write image3d_t image);\n" |
43084 | "int __ovld __cnfn get_image_width(read_write image1d_array_t image);\n" |
43085 | "int __ovld __cnfn get_image_width(read_write image2d_array_t image);\n" |
43086 | "#ifdef cl_khr_depth_images\n" |
43087 | "int __ovld __cnfn get_image_width(read_write image2d_depth_t image);\n" |
43088 | "int __ovld __cnfn get_image_width(read_write image2d_array_depth_t image);\n" |
43089 | "#endif //cl_khr_depth_images\n" |
43090 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43091 | "int __ovld __cnfn get_image_width(read_write image2d_msaa_t image);\n" |
43092 | "int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);\n" |
43093 | "int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);\n" |
43094 | "int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);\n" |
43095 | "#endif //cl_khr_gl_msaa_sharing\n" |
43096 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43097 | "\n" |
43098 | "/**\n" |
43099 | " * Return the image height in pixels.\n" |
43100 | " */\n" |
43101 | "int __ovld __cnfn get_image_height(read_only image2d_t image);\n" |
43102 | "int __ovld __cnfn get_image_height(read_only image3d_t image);\n" |
43103 | "int __ovld __cnfn get_image_height(read_only image2d_array_t image);\n" |
43104 | "#ifdef cl_khr_depth_images\n" |
43105 | "int __ovld __cnfn get_image_height(read_only image2d_depth_t image);\n" |
43106 | "int __ovld __cnfn get_image_height(read_only image2d_array_depth_t image);\n" |
43107 | "#endif //cl_khr_depth_images\n" |
43108 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43109 | "int __ovld __cnfn get_image_height(read_only image2d_msaa_t image);\n" |
43110 | "int __ovld __cnfn get_image_height(read_only image2d_msaa_depth_t image);\n" |
43111 | "int __ovld __cnfn get_image_height(read_only image2d_array_msaa_t image);\n" |
43112 | "int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image);\n" |
43113 | "#endif //cl_khr_gl_msaa_sharing\n" |
43114 | "\n" |
43115 | "int __ovld __cnfn get_image_height(write_only image2d_t image);\n" |
43116 | "#ifdef cl_khr_3d_image_writes\n" |
43117 | "int __ovld __cnfn get_image_height(write_only image3d_t image);\n" |
43118 | "#endif\n" |
43119 | "int __ovld __cnfn get_image_height(write_only image2d_array_t image);\n" |
43120 | "#ifdef cl_khr_depth_images\n" |
43121 | "int __ovld __cnfn get_image_height(write_only image2d_depth_t image);\n" |
43122 | "int __ovld __cnfn get_image_height(write_only image2d_array_depth_t image);\n" |
43123 | "#endif //cl_khr_depth_images\n" |
43124 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43125 | "int __ovld __cnfn get_image_height(write_only image2d_msaa_t image);\n" |
43126 | "int __ovld __cnfn get_image_height(write_only image2d_msaa_depth_t image);\n" |
43127 | "int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image);\n" |
43128 | "int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);\n" |
43129 | "#endif //cl_khr_gl_msaa_sharing\n" |
43130 | "\n" |
43131 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43132 | "int __ovld __cnfn get_image_height(read_write image2d_t image);\n" |
43133 | "int __ovld __cnfn get_image_height(read_write image3d_t image);\n" |
43134 | "int __ovld __cnfn get_image_height(read_write image2d_array_t image);\n" |
43135 | "#ifdef cl_khr_depth_images\n" |
43136 | "int __ovld __cnfn get_image_height(read_write image2d_depth_t image);\n" |
43137 | "int __ovld __cnfn get_image_height(read_write image2d_array_depth_t image);\n" |
43138 | "#endif //cl_khr_depth_images\n" |
43139 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43140 | "int __ovld __cnfn get_image_height(read_write image2d_msaa_t image);\n" |
43141 | "int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);\n" |
43142 | "int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);\n" |
43143 | "int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);\n" |
43144 | "#endif //cl_khr_gl_msaa_sharing\n" |
43145 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43146 | "\n" |
43147 | "/**\n" |
43148 | " * Return the image depth in pixels.\n" |
43149 | " */\n" |
43150 | "int __ovld __cnfn get_image_depth(read_only image3d_t image);\n" |
43151 | "\n" |
43152 | "#ifdef cl_khr_3d_image_writes\n" |
43153 | "int __ovld __cnfn get_image_depth(write_only image3d_t image);\n" |
43154 | "#endif\n" |
43155 | "\n" |
43156 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43157 | "int __ovld __cnfn get_image_depth(read_write image3d_t image);\n" |
43158 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43159 | "\n" |
43160 | "// OpenCL Extension v2.0 s9.18 - Mipmaps\n" |
43161 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43162 | "#ifdef cl_khr_mipmap_image\n" |
43163 | "/**\n" |
43164 | " * Return the image miplevels.\n" |
43165 | " */\n" |
43166 | "\n" |
43167 | "int __ovld get_image_num_mip_levels(read_only image1d_t image);\n" |
43168 | "int __ovld get_image_num_mip_levels(read_only image2d_t image);\n" |
43169 | "int __ovld get_image_num_mip_levels(read_only image3d_t image);\n" |
43170 | "\n" |
43171 | "int __ovld get_image_num_mip_levels(write_only image1d_t image);\n" |
43172 | "int __ovld get_image_num_mip_levels(write_only image2d_t image);\n" |
43173 | "#ifdef cl_khr_3d_image_writes\n" |
43174 | "int __ovld get_image_num_mip_levels(write_only image3d_t image);\n" |
43175 | "#endif\n" |
43176 | "\n" |
43177 | "int __ovld get_image_num_mip_levels(read_write image1d_t image);\n" |
43178 | "int __ovld get_image_num_mip_levels(read_write image2d_t image);\n" |
43179 | "int __ovld get_image_num_mip_levels(read_write image3d_t image);\n" |
43180 | "\n" |
43181 | "int __ovld get_image_num_mip_levels(read_only image1d_array_t image);\n" |
43182 | "int __ovld get_image_num_mip_levels(read_only image2d_array_t image);\n" |
43183 | "int __ovld get_image_num_mip_levels(read_only image2d_array_depth_t image);\n" |
43184 | "int __ovld get_image_num_mip_levels(read_only image2d_depth_t image);\n" |
43185 | "\n" |
43186 | "int __ovld get_image_num_mip_levels(write_only image1d_array_t image);\n" |
43187 | "int __ovld get_image_num_mip_levels(write_only image2d_array_t image);\n" |
43188 | "int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image);\n" |
43189 | "int __ovld get_image_num_mip_levels(write_only image2d_depth_t image);\n" |
43190 | "\n" |
43191 | "int __ovld get_image_num_mip_levels(read_write image1d_array_t image);\n" |
43192 | "int __ovld get_image_num_mip_levels(read_write image2d_array_t image);\n" |
43193 | "int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);\n" |
43194 | "int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);\n" |
43195 | "\n" |
43196 | "#endif //cl_khr_mipmap_image\n" |
43197 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43198 | "\n" |
43199 | "/**\n" |
43200 | " * Return the channel data type. Valid values are:\n" |
43201 | " * CLK_SNORM_INT8\n" |
43202 | " * CLK_SNORM_INT16\n" |
43203 | " * CLK_UNORM_INT8\n" |
43204 | " * CLK_UNORM_INT16\n" |
43205 | " * CLK_UNORM_SHORT_565\n" |
43206 | " * CLK_UNORM_SHORT_555\n" |
43207 | " * CLK_UNORM_SHORT_101010\n" |
43208 | " * CLK_SIGNED_INT8\n" |
43209 | " * CLK_SIGNED_INT16\n" |
43210 | " * CLK_SIGNED_INT32\n" |
43211 | " * CLK_UNSIGNED_INT8\n" |
43212 | " * CLK_UNSIGNED_INT16\n" |
43213 | " * CLK_UNSIGNED_INT32\n" |
43214 | " * CLK_HALF_FLOAT\n" |
43215 | " * CLK_FLOAT\n" |
43216 | " */\n" |
43217 | "\n" |
43218 | "//\n" |
43219 | "// Channel Datatype.\n" |
43220 | "//\n" |
43221 | "#define CLK_SNORM_INT8 0x10D0\n" |
43222 | "#define CLK_SNORM_INT16 0x10D1\n" |
43223 | "#define CLK_UNORM_INT8 0x10D2\n" |
43224 | "#define CLK_UNORM_INT16 0x10D3\n" |
43225 | "#define CLK_UNORM_SHORT_565 0x10D4\n" |
43226 | "#define CLK_UNORM_SHORT_555 0x10D5\n" |
43227 | "#define CLK_UNORM_INT_101010 0x10D6\n" |
43228 | "#define CLK_SIGNED_INT8 0x10D7\n" |
43229 | "#define CLK_SIGNED_INT16 0x10D8\n" |
43230 | "#define CLK_SIGNED_INT32 0x10D9\n" |
43231 | "#define CLK_UNSIGNED_INT8 0x10DA\n" |
43232 | "#define CLK_UNSIGNED_INT16 0x10DB\n" |
43233 | "#define CLK_UNSIGNED_INT32 0x10DC\n" |
43234 | "#define CLK_HALF_FLOAT 0x10DD\n" |
43235 | "#define CLK_FLOAT 0x10DE\n" |
43236 | "#define CLK_UNORM_INT24 0x10DF\n" |
43237 | "\n" |
43238 | "int __ovld __cnfn get_image_channel_data_type(read_only image1d_t image);\n" |
43239 | "int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t image);\n" |
43240 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_t image);\n" |
43241 | "int __ovld __cnfn get_image_channel_data_type(read_only image3d_t image);\n" |
43242 | "int __ovld __cnfn get_image_channel_data_type(read_only image1d_array_t image);\n" |
43243 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_t image);\n" |
43244 | "#ifdef cl_khr_depth_images\n" |
43245 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_depth_t image);\n" |
43246 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_depth_t image);\n" |
43247 | "#endif //cl_khr_depth_images\n" |
43248 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43249 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_t image);\n" |
43250 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_depth_t image);\n" |
43251 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_t image);\n" |
43252 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth_t image);\n" |
43253 | "#endif //cl_khr_gl_msaa_sharing\n" |
43254 | "\n" |
43255 | "int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image);\n" |
43256 | "int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image);\n" |
43257 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image);\n" |
43258 | "#ifdef cl_khr_3d_image_writes\n" |
43259 | "int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image);\n" |
43260 | "#endif\n" |
43261 | "int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image);\n" |
43262 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_t image);\n" |
43263 | "#ifdef cl_khr_depth_images\n" |
43264 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_depth_t image);\n" |
43265 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_depth_t image);\n" |
43266 | "#endif //cl_khr_depth_images\n" |
43267 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43268 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_t image);\n" |
43269 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_depth_t image);\n" |
43270 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t image);\n" |
43271 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image);\n" |
43272 | "#endif //cl_khr_gl_msaa_sharing\n" |
43273 | "\n" |
43274 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43275 | "int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);\n" |
43276 | "int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);\n" |
43277 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);\n" |
43278 | "int __ovld __cnfn get_image_channel_data_type(read_write image3d_t image);\n" |
43279 | "int __ovld __cnfn get_image_channel_data_type(read_write image1d_array_t image);\n" |
43280 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_t image);\n" |
43281 | "#ifdef cl_khr_depth_images\n" |
43282 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_depth_t image);\n" |
43283 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_depth_t image);\n" |
43284 | "#endif //cl_khr_depth_images\n" |
43285 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43286 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_t image);\n" |
43287 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t image);\n" |
43288 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);\n" |
43289 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);\n" |
43290 | "#endif //cl_khr_gl_msaa_sharing\n" |
43291 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43292 | "\n" |
43293 | "/**\n" |
43294 | " * Return the image channel order. Valid values are:\n" |
43295 | " * CLK_A\n" |
43296 | " * CLK_R\n" |
43297 | " * CLK_Rx\n" |
43298 | " * CLK_RG\n" |
43299 | " * CLK_RGx\n" |
43300 | " * CLK_RA\n" |
43301 | " * CLK_RGB\n" |
43302 | " * CLK_RGBx\n" |
43303 | " * CLK_RGBA\n" |
43304 | " * CLK_ARGB\n" |
43305 | " * CLK_BGRA\n" |
43306 | " * CLK_INTENSITY\n" |
43307 | " * CLK_LUMINANCE\n" |
43308 | " */\n" |
43309 | "// Channel order, numbering must be aligned with cl_channel_order in cl.h\n" |
43310 | "//\n" |
43311 | "#define CLK_R 0x10B0\n" |
43312 | "#define CLK_A 0x10B1\n" |
43313 | "#define CLK_RG 0x10B2\n" |
43314 | "#define CLK_RA 0x10B3\n" |
43315 | "#define CLK_RGB 0x10B4\n" |
43316 | "#define CLK_RGBA 0x10B5\n" |
43317 | "#define CLK_BGRA 0x10B6\n" |
43318 | "#define CLK_ARGB 0x10B7\n" |
43319 | "#define CLK_INTENSITY 0x10B8\n" |
43320 | "#define CLK_LUMINANCE 0x10B9\n" |
43321 | "#define CLK_Rx 0x10BA\n" |
43322 | "#define CLK_RGx 0x10BB\n" |
43323 | "#define CLK_RGBx 0x10BC\n" |
43324 | "#define CLK_DEPTH 0x10BD\n" |
43325 | "#define CLK_DEPTH_STENCIL 0x10BE\n" |
43326 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43327 | "#define CLK_sRGB 0x10BF\n" |
43328 | "#define CLK_sRGBx 0x10C0\n" |
43329 | "#define CLK_sRGBA 0x10C1\n" |
43330 | "#define CLK_sBGRA 0x10C2\n" |
43331 | "#define CLK_ABGR 0x10C3\n" |
43332 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43333 | "\n" |
43334 | "int __ovld __cnfn get_image_channel_order(read_only image1d_t image);\n" |
43335 | "int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t image);\n" |
43336 | "int __ovld __cnfn get_image_channel_order(read_only image2d_t image);\n" |
43337 | "int __ovld __cnfn get_image_channel_order(read_only image3d_t image);\n" |
43338 | "int __ovld __cnfn get_image_channel_order(read_only image1d_array_t image);\n" |
43339 | "int __ovld __cnfn get_image_channel_order(read_only image2d_array_t image);\n" |
43340 | "#ifdef cl_khr_depth_images\n" |
43341 | "int __ovld __cnfn get_image_channel_order(read_only image2d_depth_t image);\n" |
43342 | "int __ovld __cnfn get_image_channel_order(read_only image2d_array_depth_t image);\n" |
43343 | "#endif //cl_khr_depth_images\n" |
43344 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43345 | "int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_t image);\n" |
43346 | "int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_depth_t image);\n" |
43347 | "int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_t image);\n" |
43348 | "int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t image);\n" |
43349 | "#endif //cl_khr_gl_msaa_sharing\n" |
43350 | "\n" |
43351 | "int __ovld __cnfn get_image_channel_order(write_only image1d_t image);\n" |
43352 | "int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image);\n" |
43353 | "int __ovld __cnfn get_image_channel_order(write_only image2d_t image);\n" |
43354 | "#ifdef cl_khr_3d_image_writes\n" |
43355 | "int __ovld __cnfn get_image_channel_order(write_only image3d_t image);\n" |
43356 | "#endif\n" |
43357 | "int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image);\n" |
43358 | "int __ovld __cnfn get_image_channel_order(write_only image2d_array_t image);\n" |
43359 | "#ifdef cl_khr_depth_images\n" |
43360 | "int __ovld __cnfn get_image_channel_order(write_only image2d_depth_t image);\n" |
43361 | "int __ovld __cnfn get_image_channel_order(write_only image2d_array_depth_t image);\n" |
43362 | "#endif //cl_khr_depth_images\n" |
43363 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43364 | "int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_t image);\n" |
43365 | "int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_depth_t image);\n" |
43366 | "int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image);\n" |
43367 | "int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image);\n" |
43368 | "#endif //cl_khr_gl_msaa_sharing\n" |
43369 | "\n" |
43370 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43371 | "int __ovld __cnfn get_image_channel_order(read_write image1d_t image);\n" |
43372 | "int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);\n" |
43373 | "int __ovld __cnfn get_image_channel_order(read_write image2d_t image);\n" |
43374 | "int __ovld __cnfn get_image_channel_order(read_write image3d_t image);\n" |
43375 | "int __ovld __cnfn get_image_channel_order(read_write image1d_array_t image);\n" |
43376 | "int __ovld __cnfn get_image_channel_order(read_write image2d_array_t image);\n" |
43377 | "#ifdef cl_khr_depth_images\n" |
43378 | "int __ovld __cnfn get_image_channel_order(read_write image2d_depth_t image);\n" |
43379 | "int __ovld __cnfn get_image_channel_order(read_write image2d_array_depth_t image);\n" |
43380 | "#endif //cl_khr_depth_images\n" |
43381 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43382 | "int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_t image);\n" |
43383 | "int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image);\n" |
43384 | "int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);\n" |
43385 | "int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);\n" |
43386 | "#endif //cl_khr_gl_msaa_sharing\n" |
43387 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43388 | "\n" |
43389 | "/**\n" |
43390 | " * Return the 2D image width and height as an int2\n" |
43391 | " * type. The width is returned in the x component, and\n" |
43392 | " * the height in the y component.\n" |
43393 | " */\n" |
43394 | "int2 __ovld __cnfn get_image_dim(read_only image2d_t image);\n" |
43395 | "int2 __ovld __cnfn get_image_dim(read_only image2d_array_t image);\n" |
43396 | "#ifdef cl_khr_depth_images\n" |
43397 | "int2 __ovld __cnfn get_image_dim(read_only image2d_array_depth_t image);\n" |
43398 | "int2 __ovld __cnfn get_image_dim(read_only image2d_depth_t image);\n" |
43399 | "#endif //cl_khr_depth_images\n" |
43400 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43401 | "int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_t image);\n" |
43402 | "int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_depth_t image);\n" |
43403 | "int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_t image);\n" |
43404 | "int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_depth_t image);\n" |
43405 | "#endif //cl_khr_gl_msaa_sharing\n" |
43406 | "\n" |
43407 | "int2 __ovld __cnfn get_image_dim(write_only image2d_t image);\n" |
43408 | "int2 __ovld __cnfn get_image_dim(write_only image2d_array_t image);\n" |
43409 | "#ifdef cl_khr_depth_images\n" |
43410 | "int2 __ovld __cnfn get_image_dim(write_only image2d_array_depth_t image);\n" |
43411 | "int2 __ovld __cnfn get_image_dim(write_only image2d_depth_t image);\n" |
43412 | "#endif //cl_khr_depth_images\n" |
43413 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43414 | "int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_t image);\n" |
43415 | "int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_depth_t image);\n" |
43416 | "int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image);\n" |
43417 | "int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);\n" |
43418 | "#endif //cl_khr_gl_msaa_sharing\n" |
43419 | "\n" |
43420 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43421 | "int2 __ovld __cnfn get_image_dim(read_write image2d_t image);\n" |
43422 | "int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);\n" |
43423 | "#ifdef cl_khr_depth_images\n" |
43424 | "int2 __ovld __cnfn get_image_dim(read_write image2d_array_depth_t image);\n" |
43425 | "int2 __ovld __cnfn get_image_dim(read_write image2d_depth_t image);\n" |
43426 | "#endif //cl_khr_depth_images\n" |
43427 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43428 | "int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_t image);\n" |
43429 | "int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);\n" |
43430 | "int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);\n" |
43431 | "int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);\n" |
43432 | "#endif //cl_khr_gl_msaa_sharing\n" |
43433 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43434 | "\n" |
43435 | "/**\n" |
43436 | " * Return the 3D image width, height, and depth as an\n" |
43437 | " * int4 type. The width is returned in the x\n" |
43438 | " * component, height in the y component, depth in the z\n" |
43439 | " * component and the w component is 0.\n" |
43440 | " */\n" |
43441 | "int4 __ovld __cnfn get_image_dim(read_only image3d_t image);\n" |
43442 | "#ifdef cl_khr_3d_image_writes\n" |
43443 | "int4 __ovld __cnfn get_image_dim(write_only image3d_t image);\n" |
43444 | "#endif\n" |
43445 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43446 | "int4 __ovld __cnfn get_image_dim(read_write image3d_t image);\n" |
43447 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43448 | "\n" |
43449 | "/**\n" |
43450 | " * Return the image array size.\n" |
43451 | " */\n" |
43452 | "\n" |
43453 | "size_t __ovld __cnfn get_image_array_size(read_only image1d_array_t image_array);\n" |
43454 | "size_t __ovld __cnfn get_image_array_size(read_only image2d_array_t image_array);\n" |
43455 | "#ifdef cl_khr_depth_images\n" |
43456 | "size_t __ovld __cnfn get_image_array_size(read_only image2d_array_depth_t image_array);\n" |
43457 | "#endif //cl_khr_depth_images\n" |
43458 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43459 | "size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_t image_array);\n" |
43460 | "size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_depth_t image_array);\n" |
43461 | "#endif //cl_khr_gl_msaa_sharing\n" |
43462 | "\n" |
43463 | "size_t __ovld __cnfn get_image_array_size(write_only image1d_array_t image_array);\n" |
43464 | "size_t __ovld __cnfn get_image_array_size(write_only image2d_array_t image_array);\n" |
43465 | "#ifdef cl_khr_depth_images\n" |
43466 | "size_t __ovld __cnfn get_image_array_size(write_only image2d_array_depth_t image_array);\n" |
43467 | "#endif //cl_khr_depth_images\n" |
43468 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43469 | "size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_array);\n" |
43470 | "size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array);\n" |
43471 | "#endif //cl_khr_gl_msaa_sharing\n" |
43472 | "\n" |
43473 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43474 | "size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);\n" |
43475 | "size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);\n" |
43476 | "#ifdef cl_khr_depth_images\n" |
43477 | "size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image_array);\n" |
43478 | "#endif //cl_khr_depth_images\n" |
43479 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43480 | "size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);\n" |
43481 | "size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);\n" |
43482 | "#endif //cl_khr_gl_msaa_sharing\n" |
43483 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43484 | "\n" |
43485 | "/**\n" |
43486 | "* Return the number of samples associated with image\n" |
43487 | "*/\n" |
43488 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
43489 | "int __ovld get_image_num_samples(read_only image2d_msaa_t image);\n" |
43490 | "int __ovld get_image_num_samples(read_only image2d_msaa_depth_t image);\n" |
43491 | "int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n" |
43492 | "int __ovld get_image_num_samples(read_only image2d_array_msaa_t image);\n" |
43493 | "int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n" |
43494 | "\n" |
43495 | "int __ovld get_image_num_samples(write_only image2d_msaa_t image);\n" |
43496 | "int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);\n" |
43497 | "int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n" |
43498 | "int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);\n" |
43499 | "int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n" |
43500 | "\n" |
43501 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43502 | "int __ovld get_image_num_samples(read_write image2d_msaa_t image);\n" |
43503 | "int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);\n" |
43504 | "int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n" |
43505 | "int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);\n" |
43506 | "int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n" |
43507 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43508 | "#endif\n" |
43509 | "\n" |
43510 | "// OpenCL v2.0 s6.13.15 - Work-group Functions\n" |
43511 | "\n" |
43512 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43513 | "int __ovld __conv work_group_all(int predicate);\n" |
43514 | "int __ovld __conv work_group_any(int predicate);\n" |
43515 | "\n" |
43516 | "#ifdef cl_khr_fp16\n" |
43517 | "half __ovld __conv work_group_broadcast(half a, size_t local_id);\n" |
43518 | "half __ovld __conv work_group_broadcast(half a, size_t x, size_t y);\n" |
43519 | "half __ovld __conv work_group_broadcast(half a, size_t x, size_t y, size_t z);\n" |
43520 | "#endif\n" |
43521 | "int __ovld __conv work_group_broadcast(int a, size_t local_id);\n" |
43522 | "int __ovld __conv work_group_broadcast(int a, size_t x, size_t y);\n" |
43523 | "int __ovld __conv work_group_broadcast(int a, size_t x, size_t y, size_t z);\n" |
43524 | "uint __ovld __conv work_group_broadcast(uint a, size_t local_id);\n" |
43525 | "uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y);\n" |
43526 | "uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y, size_t z);\n" |
43527 | "long __ovld __conv work_group_broadcast(long a, size_t local_id);\n" |
43528 | "long __ovld __conv work_group_broadcast(long a, size_t x, size_t y);\n" |
43529 | "long __ovld __conv work_group_broadcast(long a, size_t x, size_t y, size_t z);\n" |
43530 | "ulong __ovld __conv work_group_broadcast(ulong a, size_t local_id);\n" |
43531 | "ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y);\n" |
43532 | "ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z);\n" |
43533 | "float __ovld __conv work_group_broadcast(float a, size_t local_id);\n" |
43534 | "float __ovld __conv work_group_broadcast(float a, size_t x, size_t y);\n" |
43535 | "float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z);\n" |
43536 | "#ifdef cl_khr_fp64\n" |
43537 | "double __ovld __conv work_group_broadcast(double a, size_t local_id);\n" |
43538 | "double __ovld __conv work_group_broadcast(double a, size_t x, size_t y);\n" |
43539 | "double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z);\n" |
43540 | "#endif //cl_khr_fp64\n" |
43541 | "\n" |
43542 | "#ifdef cl_khr_fp16\n" |
43543 | "half __ovld __conv work_group_reduce_add(half x);\n" |
43544 | "half __ovld __conv work_group_reduce_min(half x);\n" |
43545 | "half __ovld __conv work_group_reduce_max(half x);\n" |
43546 | "half __ovld __conv work_group_scan_exclusive_add(half x);\n" |
43547 | "half __ovld __conv work_group_scan_exclusive_min(half x);\n" |
43548 | "half __ovld __conv work_group_scan_exclusive_max(half x);\n" |
43549 | "half __ovld __conv work_group_scan_inclusive_add(half x);\n" |
43550 | "half __ovld __conv work_group_scan_inclusive_min(half x);\n" |
43551 | "half __ovld __conv work_group_scan_inclusive_max(half x);\n" |
43552 | "#endif\n" |
43553 | "int __ovld __conv work_group_reduce_add(int x);\n" |
43554 | "int __ovld __conv work_group_reduce_min(int x);\n" |
43555 | "int __ovld __conv work_group_reduce_max(int x);\n" |
43556 | "int __ovld __conv work_group_scan_exclusive_add(int x);\n" |
43557 | "int __ovld __conv work_group_scan_exclusive_min(int x);\n" |
43558 | "int __ovld __conv work_group_scan_exclusive_max(int x);\n" |
43559 | "int __ovld __conv work_group_scan_inclusive_add(int x);\n" |
43560 | "int __ovld __conv work_group_scan_inclusive_min(int x);\n" |
43561 | "int __ovld __conv work_group_scan_inclusive_max(int x);\n" |
43562 | "uint __ovld __conv work_group_reduce_add(uint x);\n" |
43563 | "uint __ovld __conv work_group_reduce_min(uint x);\n" |
43564 | "uint __ovld __conv work_group_reduce_max(uint x);\n" |
43565 | "uint __ovld __conv work_group_scan_exclusive_add(uint x);\n" |
43566 | "uint __ovld __conv work_group_scan_exclusive_min(uint x);\n" |
43567 | "uint __ovld __conv work_group_scan_exclusive_max(uint x);\n" |
43568 | "uint __ovld __conv work_group_scan_inclusive_add(uint x);\n" |
43569 | "uint __ovld __conv work_group_scan_inclusive_min(uint x);\n" |
43570 | "uint __ovld __conv work_group_scan_inclusive_max(uint x);\n" |
43571 | "long __ovld __conv work_group_reduce_add(long x);\n" |
43572 | "long __ovld __conv work_group_reduce_min(long x);\n" |
43573 | "long __ovld __conv work_group_reduce_max(long x);\n" |
43574 | "long __ovld __conv work_group_scan_exclusive_add(long x);\n" |
43575 | "long __ovld __conv work_group_scan_exclusive_min(long x);\n" |
43576 | "long __ovld __conv work_group_scan_exclusive_max(long x);\n" |
43577 | "long __ovld __conv work_group_scan_inclusive_add(long x);\n" |
43578 | "long __ovld __conv work_group_scan_inclusive_min(long x);\n" |
43579 | "long __ovld __conv work_group_scan_inclusive_max(long x);\n" |
43580 | "ulong __ovld __conv work_group_reduce_add(ulong x);\n" |
43581 | "ulong __ovld __conv work_group_reduce_min(ulong x);\n" |
43582 | "ulong __ovld __conv work_group_reduce_max(ulong x);\n" |
43583 | "ulong __ovld __conv work_group_scan_exclusive_add(ulong x);\n" |
43584 | "ulong __ovld __conv work_group_scan_exclusive_min(ulong x);\n" |
43585 | "ulong __ovld __conv work_group_scan_exclusive_max(ulong x);\n" |
43586 | "ulong __ovld __conv work_group_scan_inclusive_add(ulong x);\n" |
43587 | "ulong __ovld __conv work_group_scan_inclusive_min(ulong x);\n" |
43588 | "ulong __ovld __conv work_group_scan_inclusive_max(ulong x);\n" |
43589 | "float __ovld __conv work_group_reduce_add(float x);\n" |
43590 | "float __ovld __conv work_group_reduce_min(float x);\n" |
43591 | "float __ovld __conv work_group_reduce_max(float x);\n" |
43592 | "float __ovld __conv work_group_scan_exclusive_add(float x);\n" |
43593 | "float __ovld __conv work_group_scan_exclusive_min(float x);\n" |
43594 | "float __ovld __conv work_group_scan_exclusive_max(float x);\n" |
43595 | "float __ovld __conv work_group_scan_inclusive_add(float x);\n" |
43596 | "float __ovld __conv work_group_scan_inclusive_min(float x);\n" |
43597 | "float __ovld __conv work_group_scan_inclusive_max(float x);\n" |
43598 | "#ifdef cl_khr_fp64\n" |
43599 | "double __ovld __conv work_group_reduce_add(double x);\n" |
43600 | "double __ovld __conv work_group_reduce_min(double x);\n" |
43601 | "double __ovld __conv work_group_reduce_max(double x);\n" |
43602 | "double __ovld __conv work_group_scan_exclusive_add(double x);\n" |
43603 | "double __ovld __conv work_group_scan_exclusive_min(double x);\n" |
43604 | "double __ovld __conv work_group_scan_exclusive_max(double x);\n" |
43605 | "double __ovld __conv work_group_scan_inclusive_add(double x);\n" |
43606 | "double __ovld __conv work_group_scan_inclusive_min(double x);\n" |
43607 | "double __ovld __conv work_group_scan_inclusive_max(double x);\n" |
43608 | "#endif //cl_khr_fp64\n" |
43609 | "\n" |
43610 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43611 | "\n" |
43612 | "// OpenCL v2.0 s6.13.16 - Pipe Functions\n" |
43613 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43614 | "#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))\n" |
43615 | "bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);\n" |
43616 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43617 | "\n" |
43618 | "\n" |
43619 | "// OpenCL v2.0 s6.13.17 - Enqueue Kernels\n" |
43620 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43621 | "\n" |
43622 | "#define CL_COMPLETE 0x0\n" |
43623 | "#define CL_RUNNING 0x1\n" |
43624 | "#define CL_SUBMITTED 0x2\n" |
43625 | "#define CL_QUEUED 0x3\n" |
43626 | "\n" |
43627 | "#define CLK_SUCCESS 0\n" |
43628 | "#define CLK_ENQUEUE_FAILURE -101\n" |
43629 | "#define CLK_INVALID_QUEUE -102\n" |
43630 | "#define CLK_INVALID_NDRANGE -160\n" |
43631 | "#define CLK_INVALID_EVENT_WAIT_LIST -57\n" |
43632 | "#define CLK_DEVICE_QUEUE_FULL -161\n" |
43633 | "#define CLK_INVALID_ARG_SIZE -51\n" |
43634 | "#define CLK_EVENT_ALLOCATION_FAILURE -100\n" |
43635 | "#define CLK_OUT_OF_RESOURCES -5\n" |
43636 | "\n" |
43637 | "#define CLK_NULL_QUEUE 0\n" |
43638 | "#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))\n" |
43639 | "\n" |
43640 | "// execution model related definitions\n" |
43641 | "#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0\n" |
43642 | "#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1\n" |
43643 | "#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2\n" |
43644 | "\n" |
43645 | "typedef int kernel_enqueue_flags_t;\n" |
43646 | "typedef int clk_profiling_info;\n" |
43647 | "\n" |
43648 | "// Profiling info name (see capture_event_profiling_info)\n" |
43649 | "#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1\n" |
43650 | "\n" |
43651 | "#define MAX_WORK_DIM 3\n" |
43652 | "\n" |
43653 | "typedef struct {\n" |
43654 | " unsigned int workDimension;\n" |
43655 | " size_t globalWorkOffset[MAX_WORK_DIM];\n" |
43656 | " size_t globalWorkSize[MAX_WORK_DIM];\n" |
43657 | " size_t localWorkSize[MAX_WORK_DIM];\n" |
43658 | "} ndrange_t;\n" |
43659 | "\n" |
43660 | "ndrange_t __ovld ndrange_1D(size_t);\n" |
43661 | "ndrange_t __ovld ndrange_1D(size_t, size_t);\n" |
43662 | "ndrange_t __ovld ndrange_1D(size_t, size_t, size_t);\n" |
43663 | "\n" |
43664 | "ndrange_t __ovld ndrange_2D(const size_t[2]);\n" |
43665 | "ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2]);\n" |
43666 | "ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2], const size_t[2]);\n" |
43667 | "\n" |
43668 | "ndrange_t __ovld ndrange_3D(const size_t[3]);\n" |
43669 | "ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3]);\n" |
43670 | "ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3], const size_t[3]);\n" |
43671 | "\n" |
43672 | "int __ovld enqueue_marker(queue_t, uint, const __private clk_event_t*, __private clk_event_t*);\n" |
43673 | "\n" |
43674 | "void __ovld retain_event(clk_event_t);\n" |
43675 | "\n" |
43676 | "void __ovld release_event(clk_event_t);\n" |
43677 | "\n" |
43678 | "clk_event_t __ovld create_user_event(void);\n" |
43679 | "\n" |
43680 | "void __ovld set_user_event_status(clk_event_t e, int state);\n" |
43681 | "\n" |
43682 | "bool __ovld is_valid_event (clk_event_t event);\n" |
43683 | "\n" |
43684 | "void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value);\n" |
43685 | "\n" |
43686 | "queue_t __ovld get_default_queue(void);\n" |
43687 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43688 | "\n" |
43689 | "// OpenCL Extension v2.0 s9.17 - Sub-groups\n" |
43690 | "\n" |
43691 | "#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n" |
43692 | "// Shared Sub Group Functions\n" |
43693 | "uint __ovld get_sub_group_size(void);\n" |
43694 | "uint __ovld get_max_sub_group_size(void);\n" |
43695 | "uint __ovld get_num_sub_groups(void);\n" |
43696 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43697 | "uint __ovld get_enqueued_num_sub_groups(void);\n" |
43698 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43699 | "uint __ovld get_sub_group_id(void);\n" |
43700 | "uint __ovld get_sub_group_local_id(void);\n" |
43701 | "\n" |
43702 | "void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags);\n" |
43703 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43704 | "void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n" |
43705 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
43706 | "\n" |
43707 | "int __ovld __conv sub_group_all(int predicate);\n" |
43708 | "int __ovld __conv sub_group_any(int predicate);\n" |
43709 | "\n" |
43710 | "int __ovld __conv sub_group_broadcast(int x, uint sub_group_local_id);\n" |
43711 | "uint __ovld __conv sub_group_broadcast(uint x, uint sub_group_local_id);\n" |
43712 | "long __ovld __conv sub_group_broadcast(long x, uint sub_group_local_id);\n" |
43713 | "ulong __ovld __conv sub_group_broadcast(ulong x, uint sub_group_local_id);\n" |
43714 | "float __ovld __conv sub_group_broadcast(float x, uint sub_group_local_id);\n" |
43715 | "\n" |
43716 | "int __ovld __conv sub_group_reduce_add(int x);\n" |
43717 | "uint __ovld __conv sub_group_reduce_add(uint x);\n" |
43718 | "long __ovld __conv sub_group_reduce_add(long x);\n" |
43719 | "ulong __ovld __conv sub_group_reduce_add(ulong x);\n" |
43720 | "float __ovld __conv sub_group_reduce_add(float x);\n" |
43721 | "int __ovld __conv sub_group_reduce_min(int x);\n" |
43722 | "uint __ovld __conv sub_group_reduce_min(uint x);\n" |
43723 | "long __ovld __conv sub_group_reduce_min(long x);\n" |
43724 | "ulong __ovld __conv sub_group_reduce_min(ulong x);\n" |
43725 | "float __ovld __conv sub_group_reduce_min(float x);\n" |
43726 | "int __ovld __conv sub_group_reduce_max(int x);\n" |
43727 | "uint __ovld __conv sub_group_reduce_max(uint x);\n" |
43728 | "long __ovld __conv sub_group_reduce_max(long x);\n" |
43729 | "ulong __ovld __conv sub_group_reduce_max(ulong x);\n" |
43730 | "float __ovld __conv sub_group_reduce_max(float x);\n" |
43731 | "\n" |
43732 | "int __ovld __conv sub_group_scan_exclusive_add(int x);\n" |
43733 | "uint __ovld __conv sub_group_scan_exclusive_add(uint x);\n" |
43734 | "long __ovld __conv sub_group_scan_exclusive_add(long x);\n" |
43735 | "ulong __ovld __conv sub_group_scan_exclusive_add(ulong x);\n" |
43736 | "float __ovld __conv sub_group_scan_exclusive_add(float x);\n" |
43737 | "int __ovld __conv sub_group_scan_exclusive_min(int x);\n" |
43738 | "uint __ovld __conv sub_group_scan_exclusive_min(uint x);\n" |
43739 | "long __ovld __conv sub_group_scan_exclusive_min(long x);\n" |
43740 | "ulong __ovld __conv sub_group_scan_exclusive_min(ulong x);\n" |
43741 | "float __ovld __conv sub_group_scan_exclusive_min(float x);\n" |
43742 | "int __ovld __conv sub_group_scan_exclusive_max(int x);\n" |
43743 | "uint __ovld __conv sub_group_scan_exclusive_max(uint x);\n" |
43744 | "long __ovld __conv sub_group_scan_exclusive_max(long x);\n" |
43745 | "ulong __ovld __conv sub_group_scan_exclusive_max(ulong x);\n" |
43746 | "float __ovld __conv sub_group_scan_exclusive_max(float x);\n" |
43747 | "\n" |
43748 | "int __ovld __conv sub_group_scan_inclusive_add(int x);\n" |
43749 | "uint __ovld __conv sub_group_scan_inclusive_add(uint x);\n" |
43750 | "long __ovld __conv sub_group_scan_inclusive_add(long x);\n" |
43751 | "ulong __ovld __conv sub_group_scan_inclusive_add(ulong x);\n" |
43752 | "float __ovld __conv sub_group_scan_inclusive_add(float x);\n" |
43753 | "int __ovld __conv sub_group_scan_inclusive_min(int x);\n" |
43754 | "uint __ovld __conv sub_group_scan_inclusive_min(uint x);\n" |
43755 | "long __ovld __conv sub_group_scan_inclusive_min(long x);\n" |
43756 | "ulong __ovld __conv sub_group_scan_inclusive_min(ulong x);\n" |
43757 | "float __ovld __conv sub_group_scan_inclusive_min(float x);\n" |
43758 | "int __ovld __conv sub_group_scan_inclusive_max(int x);\n" |
43759 | "uint __ovld __conv sub_group_scan_inclusive_max(uint x);\n" |
43760 | "long __ovld __conv sub_group_scan_inclusive_max(long x);\n" |
43761 | "ulong __ovld __conv sub_group_scan_inclusive_max(ulong x);\n" |
43762 | "float __ovld __conv sub_group_scan_inclusive_max(float x);\n" |
43763 | "\n" |
43764 | "#ifdef cl_khr_fp16\n" |
43765 | "half __ovld __conv sub_group_broadcast(half x, uint sub_group_local_id);\n" |
43766 | "half __ovld __conv sub_group_reduce_add(half x);\n" |
43767 | "half __ovld __conv sub_group_reduce_min(half x);\n" |
43768 | "half __ovld __conv sub_group_reduce_max(half x);\n" |
43769 | "half __ovld __conv sub_group_scan_exclusive_add(half x);\n" |
43770 | "half __ovld __conv sub_group_scan_exclusive_min(half x);\n" |
43771 | "half __ovld __conv sub_group_scan_exclusive_max(half x);\n" |
43772 | "half __ovld __conv sub_group_scan_inclusive_add(half x);\n" |
43773 | "half __ovld __conv sub_group_scan_inclusive_min(half x);\n" |
43774 | "half __ovld __conv sub_group_scan_inclusive_max(half x);\n" |
43775 | "#endif //cl_khr_fp16\n" |
43776 | "\n" |
43777 | "#ifdef cl_khr_fp64\n" |
43778 | "double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id);\n" |
43779 | "double __ovld __conv sub_group_reduce_add(double x);\n" |
43780 | "double __ovld __conv sub_group_reduce_min(double x);\n" |
43781 | "double __ovld __conv sub_group_reduce_max(double x);\n" |
43782 | "double __ovld __conv sub_group_scan_exclusive_add(double x);\n" |
43783 | "double __ovld __conv sub_group_scan_exclusive_min(double x);\n" |
43784 | "double __ovld __conv sub_group_scan_exclusive_max(double x);\n" |
43785 | "double __ovld __conv sub_group_scan_inclusive_add(double x);\n" |
43786 | "double __ovld __conv sub_group_scan_inclusive_min(double x);\n" |
43787 | "double __ovld __conv sub_group_scan_inclusive_max(double x);\n" |
43788 | "#endif //cl_khr_fp64\n" |
43789 | "\n" |
43790 | "#endif //cl_khr_subgroups cl_intel_subgroups\n" |
43791 | "\n" |
43792 | "#if defined(cl_intel_subgroups)\n" |
43793 | "// Intel-Specific Sub Group Functions\n" |
43794 | "float __ovld __conv intel_sub_group_shuffle( float x, uint c );\n" |
43795 | "float2 __ovld __conv intel_sub_group_shuffle( float2 x, uint c );\n" |
43796 | "float3 __ovld __conv intel_sub_group_shuffle( float3 x, uint c );\n" |
43797 | "float4 __ovld __conv intel_sub_group_shuffle( float4 x, uint c );\n" |
43798 | "float8 __ovld __conv intel_sub_group_shuffle( float8 x, uint c );\n" |
43799 | "float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );\n" |
43800 | "\n" |
43801 | "int __ovld __conv intel_sub_group_shuffle( int x, uint c );\n" |
43802 | "int2 __ovld __conv intel_sub_group_shuffle( int2 x, uint c );\n" |
43803 | "int3 __ovld __conv intel_sub_group_shuffle( int3 x, uint c );\n" |
43804 | "int4 __ovld __conv intel_sub_group_shuffle( int4 x, uint c );\n" |
43805 | "int8 __ovld __conv intel_sub_group_shuffle( int8 x, uint c );\n" |
43806 | "int16 __ovld __conv intel_sub_group_shuffle( int16 x, uint c );\n" |
43807 | "\n" |
43808 | "uint __ovld __conv intel_sub_group_shuffle( uint x, uint c );\n" |
43809 | "uint2 __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );\n" |
43810 | "uint3 __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );\n" |
43811 | "uint4 __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );\n" |
43812 | "uint8 __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );\n" |
43813 | "uint16 __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );\n" |
43814 | "\n" |
43815 | "long __ovld __conv intel_sub_group_shuffle( long x, uint c );\n" |
43816 | "ulong __ovld __conv intel_sub_group_shuffle( ulong x, uint c );\n" |
43817 | "\n" |
43818 | "float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint c );\n" |
43819 | "float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );\n" |
43820 | "float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );\n" |
43821 | "float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );\n" |
43822 | "float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );\n" |
43823 | "float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );\n" |
43824 | "\n" |
43825 | "int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint c );\n" |
43826 | "int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );\n" |
43827 | "int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );\n" |
43828 | "int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );\n" |
43829 | "int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );\n" |
43830 | "int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );\n" |
43831 | "\n" |
43832 | "uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint c );\n" |
43833 | "uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );\n" |
43834 | "uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );\n" |
43835 | "uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );\n" |
43836 | "uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );\n" |
43837 | "uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );\n" |
43838 | "\n" |
43839 | "long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );\n" |
43840 | "ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );\n" |
43841 | "\n" |
43842 | "float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint c );\n" |
43843 | "float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );\n" |
43844 | "float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );\n" |
43845 | "float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );\n" |
43846 | "float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );\n" |
43847 | "float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );\n" |
43848 | "\n" |
43849 | "int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint c );\n" |
43850 | "int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );\n" |
43851 | "int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );\n" |
43852 | "int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );\n" |
43853 | "int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );\n" |
43854 | "int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );\n" |
43855 | "\n" |
43856 | "uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint c );\n" |
43857 | "uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );\n" |
43858 | "uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );\n" |
43859 | "uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );\n" |
43860 | "uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );\n" |
43861 | "uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );\n" |
43862 | "\n" |
43863 | "long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );\n" |
43864 | "ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );\n" |
43865 | "\n" |
43866 | "float __ovld __conv intel_sub_group_shuffle_xor( float x, uint c );\n" |
43867 | "float2 __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );\n" |
43868 | "float3 __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );\n" |
43869 | "float4 __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );\n" |
43870 | "float8 __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );\n" |
43871 | "float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );\n" |
43872 | "\n" |
43873 | "int __ovld __conv intel_sub_group_shuffle_xor( int x, uint c );\n" |
43874 | "int2 __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );\n" |
43875 | "int3 __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );\n" |
43876 | "int4 __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );\n" |
43877 | "int8 __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );\n" |
43878 | "int16 __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );\n" |
43879 | "\n" |
43880 | "uint __ovld __conv intel_sub_group_shuffle_xor( uint x, uint c );\n" |
43881 | "uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );\n" |
43882 | "uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );\n" |
43883 | "uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );\n" |
43884 | "uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );\n" |
43885 | "uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );\n" |
43886 | "\n" |
43887 | "long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );\n" |
43888 | "ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );\n" |
43889 | "\n" |
43890 | "uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );\n" |
43891 | "uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );\n" |
43892 | "uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );\n" |
43893 | "uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );\n" |
43894 | "\n" |
43895 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
43896 | "uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);\n" |
43897 | "uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);\n" |
43898 | "uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);\n" |
43899 | "uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);\n" |
43900 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
43901 | "\n" |
43902 | "uint __ovld __conv intel_sub_group_block_read( const __global uint* p );\n" |
43903 | "uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );\n" |
43904 | "uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );\n" |
43905 | "uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );\n" |
43906 | "\n" |
43907 | "void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);\n" |
43908 | "void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);\n" |
43909 | "void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);\n" |
43910 | "void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);\n" |
43911 | "\n" |
43912 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
43913 | "void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);\n" |
43914 | "void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);\n" |
43915 | "void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);\n" |
43916 | "void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);\n" |
43917 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
43918 | "\n" |
43919 | "void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );\n" |
43920 | "void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );\n" |
43921 | "void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );\n" |
43922 | "void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );\n" |
43923 | "\n" |
43924 | "#ifdef cl_khr_fp16\n" |
43925 | "half __ovld __conv intel_sub_group_shuffle( half x, uint c );\n" |
43926 | "half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );\n" |
43927 | "half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );\n" |
43928 | "half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );\n" |
43929 | "#endif\n" |
43930 | "\n" |
43931 | "#if defined(cl_khr_fp64)\n" |
43932 | "double __ovld __conv intel_sub_group_shuffle( double x, uint c );\n" |
43933 | "double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );\n" |
43934 | "double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );\n" |
43935 | "double __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );\n" |
43936 | "#endif\n" |
43937 | "\n" |
43938 | "#endif //cl_intel_subgroups\n" |
43939 | "\n" |
43940 | "#if defined(cl_intel_subgroups_short)\n" |
43941 | "short __ovld __conv intel_sub_group_broadcast( short x, uint sub_group_local_id );\n" |
43942 | "short2 __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );\n" |
43943 | "short3 __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );\n" |
43944 | "short4 __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );\n" |
43945 | "short8 __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );\n" |
43946 | "\n" |
43947 | "ushort __ovld __conv intel_sub_group_broadcast( ushort x, uint sub_group_local_id );\n" |
43948 | "ushort2 __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );\n" |
43949 | "ushort3 __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );\n" |
43950 | "ushort4 __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );\n" |
43951 | "ushort8 __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );\n" |
43952 | "\n" |
43953 | "short __ovld __conv intel_sub_group_shuffle( short x, uint c );\n" |
43954 | "short2 __ovld __conv intel_sub_group_shuffle( short2 x, uint c );\n" |
43955 | "short3 __ovld __conv intel_sub_group_shuffle( short3 x, uint c );\n" |
43956 | "short4 __ovld __conv intel_sub_group_shuffle( short4 x, uint c );\n" |
43957 | "short8 __ovld __conv intel_sub_group_shuffle( short8 x, uint c );\n" |
43958 | "short16 __ovld __conv intel_sub_group_shuffle( short16 x, uint c);\n" |
43959 | "\n" |
43960 | "ushort __ovld __conv intel_sub_group_shuffle( ushort x, uint c );\n" |
43961 | "ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 x, uint c );\n" |
43962 | "ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 x, uint c );\n" |
43963 | "ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 x, uint c );\n" |
43964 | "ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 x, uint c );\n" |
43965 | "ushort16 __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );\n" |
43966 | "\n" |
43967 | "short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint c );\n" |
43968 | "short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint c );\n" |
43969 | "short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint c );\n" |
43970 | "short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint c );\n" |
43971 | "short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint c );\n" |
43972 | "short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );\n" |
43973 | "\n" |
43974 | "ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint c );\n" |
43975 | "ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint c );\n" |
43976 | "ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint c );\n" |
43977 | "ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint c );\n" |
43978 | "ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint c );\n" |
43979 | "ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );\n" |
43980 | "\n" |
43981 | "short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint c );\n" |
43982 | "short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint c );\n" |
43983 | "short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint c );\n" |
43984 | "short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint c );\n" |
43985 | "short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint c );\n" |
43986 | "short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );\n" |
43987 | "\n" |
43988 | "ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint c );\n" |
43989 | "ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint c );\n" |
43990 | "ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint c );\n" |
43991 | "ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint c );\n" |
43992 | "ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint c );\n" |
43993 | "ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );\n" |
43994 | "\n" |
43995 | "short __ovld __conv intel_sub_group_shuffle_xor( short x, uint c );\n" |
43996 | "short2 __ovld __conv intel_sub_group_shuffle_xor( short2 x, uint c );\n" |
43997 | "short3 __ovld __conv intel_sub_group_shuffle_xor( short3 x, uint c );\n" |
43998 | "short4 __ovld __conv intel_sub_group_shuffle_xor( short4 x, uint c );\n" |
43999 | "short8 __ovld __conv intel_sub_group_shuffle_xor( short8 x, uint c );\n" |
44000 | "short16 __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );\n" |
44001 | "\n" |
44002 | "ushort __ovld __conv intel_sub_group_shuffle_xor( ushort x, uint c );\n" |
44003 | "ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 x, uint c );\n" |
44004 | "ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 x, uint c );\n" |
44005 | "ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 x, uint c );\n" |
44006 | "ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 x, uint c );\n" |
44007 | "ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );\n" |
44008 | "\n" |
44009 | "short __ovld __conv intel_sub_group_reduce_add( short x );\n" |
44010 | "ushort __ovld __conv intel_sub_group_reduce_add( ushort x );\n" |
44011 | "short __ovld __conv intel_sub_group_reduce_min( short x );\n" |
44012 | "ushort __ovld __conv intel_sub_group_reduce_min( ushort x );\n" |
44013 | "short __ovld __conv intel_sub_group_reduce_max( short x );\n" |
44014 | "ushort __ovld __conv intel_sub_group_reduce_max( ushort x );\n" |
44015 | "\n" |
44016 | "short __ovld __conv intel_sub_group_scan_exclusive_add( short x );\n" |
44017 | "ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x );\n" |
44018 | "short __ovld __conv intel_sub_group_scan_exclusive_min( short x );\n" |
44019 | "ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x );\n" |
44020 | "short __ovld __conv intel_sub_group_scan_exclusive_max( short x );\n" |
44021 | "ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x );\n" |
44022 | "\n" |
44023 | "short __ovld __conv intel_sub_group_scan_inclusive_add( short x );\n" |
44024 | "ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x );\n" |
44025 | "short __ovld __conv intel_sub_group_scan_inclusive_min( short x );\n" |
44026 | "ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );\n" |
44027 | "short __ovld __conv intel_sub_group_scan_inclusive_max( short x );\n" |
44028 | "ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );\n" |
44029 | "\n" |
44030 | "uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );\n" |
44031 | "uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );\n" |
44032 | "uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );\n" |
44033 | "uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );\n" |
44034 | "\n" |
44035 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
44036 | "uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );\n" |
44037 | "uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );\n" |
44038 | "uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );\n" |
44039 | "uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );\n" |
44040 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
44041 | "\n" |
44042 | "uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );\n" |
44043 | "uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );\n" |
44044 | "uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );\n" |
44045 | "uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );\n" |
44046 | "\n" |
44047 | "void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );\n" |
44048 | "void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );\n" |
44049 | "void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );\n" |
44050 | "void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );\n" |
44051 | "\n" |
44052 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
44053 | "void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );\n" |
44054 | "void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );\n" |
44055 | "void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );\n" |
44056 | "void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );\n" |
44057 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
44058 | "\n" |
44059 | "void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );\n" |
44060 | "void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );\n" |
44061 | "void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );\n" |
44062 | "void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );\n" |
44063 | "\n" |
44064 | "ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );\n" |
44065 | "ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );\n" |
44066 | "ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );\n" |
44067 | "ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );\n" |
44068 | "\n" |
44069 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
44070 | "ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);\n" |
44071 | "ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);\n" |
44072 | "ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);\n" |
44073 | "ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);\n" |
44074 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
44075 | "\n" |
44076 | "ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );\n" |
44077 | "ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );\n" |
44078 | "ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );\n" |
44079 | "ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );\n" |
44080 | "\n" |
44081 | "void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);\n" |
44082 | "void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);\n" |
44083 | "void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);\n" |
44084 | "void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);\n" |
44085 | "\n" |
44086 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
44087 | "void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);\n" |
44088 | "void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);\n" |
44089 | "void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);\n" |
44090 | "void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);\n" |
44091 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
44092 | "\n" |
44093 | "void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );\n" |
44094 | "void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );\n" |
44095 | "void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );\n" |
44096 | "void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );\n" |
44097 | "#endif // cl_intel_subgroups_short\n" |
44098 | "\n" |
44099 | "#ifdef cl_intel_device_side_avc_motion_estimation\n" |
44100 | "#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin\n" |
44101 | "\n" |
44102 | "#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0\n" |
44103 | "#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1\n" |
44104 | "#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2\n" |
44105 | "#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3\n" |
44106 | "\n" |
44107 | "#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0\n" |
44108 | "#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1\n" |
44109 | "#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2\n" |
44110 | "#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3\n" |
44111 | "\n" |
44112 | "#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0\n" |
44113 | "#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1\n" |
44114 | "#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2\n" |
44115 | "\n" |
44116 | "#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0\n" |
44117 | "#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E\n" |
44118 | "#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D\n" |
44119 | "#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B\n" |
44120 | "#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77\n" |
44121 | "#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F\n" |
44122 | "#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F\n" |
44123 | "#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F\n" |
44124 | "\n" |
44125 | "#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0\n" |
44126 | "#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1\n" |
44127 | "#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2\n" |
44128 | "\n" |
44129 | "#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0\n" |
44130 | "#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1\n" |
44131 | "#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2\n" |
44132 | "#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3\n" |
44133 | "#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4\n" |
44134 | "#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5\n" |
44135 | "#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6\n" |
44136 | "#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7\n" |
44137 | "#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8\n" |
44138 | "\n" |
44139 | "#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0\n" |
44140 | "#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2\n" |
44141 | "\n" |
44142 | "#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0\n" |
44143 | "#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1\n" |
44144 | "#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3\n" |
44145 | "\n" |
44146 | "#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0\n" |
44147 | "#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1\n" |
44148 | "#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2\n" |
44149 | "#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3\n" |
44150 | "\n" |
44151 | "#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10\n" |
44152 | "#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15\n" |
44153 | "#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20\n" |
44154 | "#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B\n" |
44155 | "#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30\n" |
44156 | "\n" |
44157 | "#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0\n" |
44158 | "#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2\n" |
44159 | "#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4\n" |
44160 | "#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8\n" |
44161 | "\n" |
44162 | "#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0\n" |
44163 | "#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1\n" |
44164 | "#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2\n" |
44165 | "\n" |
44166 | "#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0\n" |
44167 | "#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000\n" |
44168 | "\n" |
44169 | "#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24)\n" |
44170 | "#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24)\n" |
44171 | "#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24)\n" |
44172 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24)\n" |
44173 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24)\n" |
44174 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24)\n" |
44175 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24)\n" |
44176 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24)\n" |
44177 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26)\n" |
44178 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26)\n" |
44179 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28)\n" |
44180 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28)\n" |
44181 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30)\n" |
44182 | "#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30)\n" |
44183 | "\n" |
44184 | "#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00\n" |
44185 | "#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80\n" |
44186 | "\n" |
44187 | "#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0\n" |
44188 | "#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6\n" |
44189 | "#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5\n" |
44190 | "#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3\n" |
44191 | "\n" |
44192 | "#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60\n" |
44193 | "#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10\n" |
44194 | "#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8\n" |
44195 | "#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4\n" |
44196 | "\n" |
44197 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0\n" |
44198 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1\n" |
44199 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2\n" |
44200 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3\n" |
44201 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4\n" |
44202 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4\n" |
44203 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5\n" |
44204 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6\n" |
44205 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7\n" |
44206 | "#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8\n" |
44207 | "#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0\n" |
44208 | "#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1\n" |
44209 | "#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2\n" |
44210 | "#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3\n" |
44211 | "\n" |
44212 | "#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1\n" |
44213 | "#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2\n" |
44214 | "#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3\n" |
44215 | "\n" |
44216 | "#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0\n" |
44217 | "#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1\n" |
44218 | "\n" |
44219 | "#define CLK_AVC_ME_INITIALIZE_INTEL 0x0\n" |
44220 | "\n" |
44221 | "#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0\n" |
44222 | "#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0\n" |
44223 | "#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0\n" |
44224 | "\n" |
44225 | "#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0\n" |
44226 | "#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0\n" |
44227 | "#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0\n" |
44228 | "\n" |
44229 | "#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0\n" |
44230 | "#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0\n" |
44231 | "#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0\n" |
44232 | "#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0\n" |
44233 | "\n" |
44234 | "// MCE built-in functions\n" |
44235 | "uchar __ovld\n" |
44236 | "intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty(\n" |
44237 | " uchar slice_type, uchar qp);\n" |
44238 | "ulong __ovld intel_sub_group_avc_mce_get_default_inter_shape_penalty(\n" |
44239 | " uchar slice_type, uchar qp);\n" |
44240 | "uchar __ovld intel_sub_group_avc_mce_get_default_inter_direction_penalty(\n" |
44241 | " uchar slice_type, uchar qp);\n" |
44242 | "uint __ovld intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty(\n" |
44243 | " uchar slice_type, uchar qp);\n" |
44244 | "uint2 __ovld\n" |
44245 | "intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table(\n" |
44246 | " uchar slice_type, uchar qp);\n" |
44247 | "uchar __ovld intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty(\n" |
44248 | " uchar slice_type, uchar qp);\n" |
44249 | "\n" |
44250 | "uint2 __ovld intel_sub_group_avc_mce_get_default_high_penalty_cost_table();\n" |
44251 | "uint2 __ovld intel_sub_group_avc_mce_get_default_medium_penalty_cost_table();\n" |
44252 | "uint2 __ovld intel_sub_group_avc_mce_get_default_low_penalty_cost_table();\n" |
44253 | "uint __ovld intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty();\n" |
44254 | "uchar __ovld\n" |
44255 | "intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty();\n" |
44256 | "\n" |
44257 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44258 | "intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(\n" |
44259 | " uchar reference_base_penalty, intel_sub_group_avc_mce_payload_t payload);\n" |
44260 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44261 | "intel_sub_group_avc_mce_set_inter_shape_penalty(\n" |
44262 | " ulong packed_shape_penalty, intel_sub_group_avc_mce_payload_t payload);\n" |
44263 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44264 | "intel_sub_group_avc_mce_set_inter_direction_penalty(\n" |
44265 | " uchar direction_cost, intel_sub_group_avc_mce_payload_t payload);\n" |
44266 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44267 | "intel_sub_group_avc_mce_set_motion_vector_cost_function(\n" |
44268 | " ulong packed_cost_center_delta, uint2 packed_cost_table,\n" |
44269 | " uchar cost_precision, intel_sub_group_avc_mce_payload_t payload);\n" |
44270 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44271 | "intel_sub_group_avc_mce_set_ac_only_haar(\n" |
44272 | " intel_sub_group_avc_mce_payload_t payload);\n" |
44273 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44274 | "intel_sub_group_avc_mce_set_source_interlaced_field_polarity(\n" |
44275 | " uchar src_field_polarity, intel_sub_group_avc_mce_payload_t payload);\n" |
44276 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44277 | "intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(\n" |
44278 | " uchar ref_field_polarity, intel_sub_group_avc_mce_payload_t payload);\n" |
44279 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44280 | "intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(\n" |
44281 | " uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n" |
44282 | " intel_sub_group_avc_mce_payload_t payload);\n" |
44283 | "\n" |
44284 | "ulong __ovld intel_sub_group_avc_mce_get_motion_vectors(\n" |
44285 | " intel_sub_group_avc_mce_result_t result);\n" |
44286 | "ushort __ovld intel_sub_group_avc_mce_get_inter_distortions(\n" |
44287 | " intel_sub_group_avc_mce_result_t result);\n" |
44288 | "ushort __ovld intel_sub_group_avc_mce_get_best_inter_distortion(\n" |
44289 | " intel_sub_group_avc_mce_result_t result);\n" |
44290 | "uchar __ovld intel_sub_group_avc_mce_get_inter_major_shape(\n" |
44291 | " intel_sub_group_avc_mce_result_t result);\n" |
44292 | "uchar __ovld intel_sub_group_avc_mce_get_inter_minor_shapes(\n" |
44293 | " intel_sub_group_avc_mce_result_t result);\n" |
44294 | "uchar __ovld intel_sub_group_avc_mce_get_inter_directions(\n" |
44295 | " intel_sub_group_avc_mce_result_t result);\n" |
44296 | "uchar __ovld intel_sub_group_avc_mce_get_inter_motion_vector_count(\n" |
44297 | " intel_sub_group_avc_mce_result_t result);\n" |
44298 | "uint __ovld intel_sub_group_avc_mce_get_inter_reference_ids(\n" |
44299 | " intel_sub_group_avc_mce_result_t result);\n" |
44300 | "uchar __ovld\n" |
44301 | "intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities(\n" |
44302 | " uint packed_reference_ids, uint packed_reference_parameter_field_polarities,\n" |
44303 | " intel_sub_group_avc_mce_result_t result);\n" |
44304 | "\n" |
44305 | "// IME built-in functions\n" |
44306 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44307 | "intel_sub_group_avc_ime_initialize(\n" |
44308 | " ushort2 src_coord, uchar partition_mask, uchar sad_adjustment);\n" |
44309 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44310 | "intel_sub_group_avc_ime_set_single_reference(\n" |
44311 | " short2 ref_offset, uchar search_window_config,\n" |
44312 | " intel_sub_group_avc_ime_payload_t payload);\n" |
44313 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44314 | "intel_sub_group_avc_ime_set_dual_reference(\n" |
44315 | " short2 fwd_ref_offset, short2 bwd_ref_offset, uchar search_window_config,\n" |
44316 | " intel_sub_group_avc_ime_payload_t payload);\n" |
44317 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44318 | "intel_sub_group_avc_ime_set_max_motion_vector_count(\n" |
44319 | " uchar max_motion_vector_count, intel_sub_group_avc_ime_payload_t payload);\n" |
44320 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44321 | "intel_sub_group_avc_ime_set_unidirectional_mix_disable(\n" |
44322 | " intel_sub_group_avc_ime_payload_t payload);\n" |
44323 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44324 | "intel_sub_group_avc_ime_set_early_search_termination_threshold(\n" |
44325 | " uchar threshold, intel_sub_group_avc_ime_payload_t payload);\n" |
44326 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44327 | "intel_sub_group_avc_ime_set_weighted_sad(\n" |
44328 | " uint packed_sad_weights, intel_sub_group_avc_ime_payload_t payload);\n" |
44329 | "\n" |
44330 | "__attribute__((deprecated(\"If you use the latest Intel driver, please use \"\n" |
44331 | " \"intel_sub_group_avc_ime_ref_window_size instead\",\n" |
44332 | " \"intel_sub_group_avc_ime_ref_window_size\")))\n" |
44333 | "ushort2 __ovld\n" |
44334 | "intel_sub_group_ime_ref_window_size(uchar search_window_config, char dual_ref);\n" |
44335 | "ushort2 __ovld intel_sub_group_avc_ime_ref_window_size(\n" |
44336 | " uchar search_window_config, char dual_ref);\n" |
44337 | "short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset(\n" |
44338 | " short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size,\n" |
44339 | " ushort2 image_size);\n" |
44340 | "\n" |
44341 | "intel_sub_group_avc_ime_result_t __ovld\n" |
44342 | "intel_sub_group_avc_ime_evaluate_with_single_reference(\n" |
44343 | " read_only image2d_t src_image, read_only image2d_t ref_image,\n" |
44344 | " sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);\n" |
44345 | "intel_sub_group_avc_ime_result_t __ovld\n" |
44346 | "intel_sub_group_avc_ime_evaluate_with_dual_reference(\n" |
44347 | " read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n" |
44348 | " read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n" |
44349 | " intel_sub_group_avc_ime_payload_t payload);\n" |
44350 | "intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld\n" |
44351 | "intel_sub_group_avc_ime_evaluate_with_single_reference_streamout(\n" |
44352 | " read_only image2d_t src_image, read_only image2d_t ref_image,\n" |
44353 | " sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);\n" |
44354 | "intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld\n" |
44355 | "intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout(\n" |
44356 | " read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n" |
44357 | " read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n" |
44358 | " intel_sub_group_avc_ime_payload_t payload);\n" |
44359 | "intel_sub_group_avc_ime_result_t __ovld\n" |
44360 | "intel_sub_group_avc_ime_evaluate_with_single_reference_streamin(\n" |
44361 | " read_only image2d_t src_image, read_only image2d_t ref_image,\n" |
44362 | " sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,\n" |
44363 | " intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);\n" |
44364 | "intel_sub_group_avc_ime_result_t __ovld\n" |
44365 | "intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin(\n" |
44366 | " read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n" |
44367 | " read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n" |
44368 | " intel_sub_group_avc_ime_payload_t payload,\n" |
44369 | " intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);\n" |
44370 | "intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld\n" |
44371 | "intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout(\n" |
44372 | " read_only image2d_t src_image, read_only image2d_t ref_image,\n" |
44373 | " sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,\n" |
44374 | " intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);\n" |
44375 | "intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld\n" |
44376 | "intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(\n" |
44377 | " read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n" |
44378 | " read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n" |
44379 | " intel_sub_group_avc_ime_payload_t payload,\n" |
44380 | " intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);\n" |
44381 | "\n" |
44382 | "intel_sub_group_avc_ime_single_reference_streamin_t __ovld\n" |
44383 | "intel_sub_group_avc_ime_get_single_reference_streamin(\n" |
44384 | " intel_sub_group_avc_ime_result_single_reference_streamout_t result);\n" |
44385 | "intel_sub_group_avc_ime_dual_reference_streamin_t __ovld\n" |
44386 | "intel_sub_group_avc_ime_get_dual_reference_streamin(\n" |
44387 | " intel_sub_group_avc_ime_result_dual_reference_streamout_t result);\n" |
44388 | "intel_sub_group_avc_ime_result_t __ovld\n" |
44389 | "intel_sub_group_avc_ime_strip_single_reference_streamout(\n" |
44390 | " intel_sub_group_avc_ime_result_single_reference_streamout_t result);\n" |
44391 | "intel_sub_group_avc_ime_result_t __ovld\n" |
44392 | "intel_sub_group_avc_ime_strip_dual_reference_streamout(\n" |
44393 | " intel_sub_group_avc_ime_result_dual_reference_streamout_t result);\n" |
44394 | "\n" |
44395 | "uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(\n" |
44396 | " intel_sub_group_avc_ime_result_single_reference_streamout_t result,\n" |
44397 | " uchar major_shape);\n" |
44398 | "ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(\n" |
44399 | " intel_sub_group_avc_ime_result_single_reference_streamout_t result,\n" |
44400 | " uchar major_shape);\n" |
44401 | "uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(\n" |
44402 | " intel_sub_group_avc_ime_result_single_reference_streamout_t result,\n" |
44403 | " uchar major_shape);\n" |
44404 | "uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(\n" |
44405 | " intel_sub_group_avc_ime_result_dual_reference_streamout_t result,\n" |
44406 | " uchar major_shape, uchar direction);\n" |
44407 | "ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(\n" |
44408 | " intel_sub_group_avc_ime_result_dual_reference_streamout_t result,\n" |
44409 | " uchar major_shape, uchar direction);\n" |
44410 | "uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(\n" |
44411 | " intel_sub_group_avc_ime_result_dual_reference_streamout_t result,\n" |
44412 | " uchar major_shape, uchar direction);\n" |
44413 | "\n" |
44414 | "uchar __ovld intel_sub_group_avc_ime_get_border_reached(\n" |
44415 | " uchar image_select, intel_sub_group_avc_ime_result_t result);\n" |
44416 | "uchar __ovld intel_sub_group_avc_ime_get_truncated_search_indication(\n" |
44417 | " intel_sub_group_avc_ime_result_t result);\n" |
44418 | "uchar __ovld\n" |
44419 | "intel_sub_group_avc_ime_get_unidirectional_early_search_termination(\n" |
44420 | " intel_sub_group_avc_ime_result_t result);\n" |
44421 | "uint __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector(\n" |
44422 | " intel_sub_group_avc_ime_result_t result);\n" |
44423 | "ushort __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion(\n" |
44424 | " intel_sub_group_avc_ime_result_t result);\n" |
44425 | "\n" |
44426 | "// REF built-in functions\n" |
44427 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44428 | "intel_sub_group_avc_fme_initialize(\n" |
44429 | " ushort2 src_coord, ulong motion_vectors, uchar major_shapes,\n" |
44430 | " uchar minor_shapes, uchar directions, uchar pixel_resolution,\n" |
44431 | " uchar sad_adjustment);\n" |
44432 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44433 | "intel_sub_group_avc_bme_initialize(\n" |
44434 | " ushort2 src_coord, ulong motion_vectors, uchar major_shapes,\n" |
44435 | " uchar minor_shapes, uchar directions, uchar pixel_resolution,\n" |
44436 | " uchar bidirectional_weight, uchar sad_adjustment);\n" |
44437 | "\n" |
44438 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44439 | "intel_sub_group_avc_ref_set_bidirectional_mix_disable(\n" |
44440 | " intel_sub_group_avc_ref_payload_t payload);\n" |
44441 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44442 | "intel_sub_group_avc_ref_set_bilinear_filter_enable(\n" |
44443 | " intel_sub_group_avc_ref_payload_t payload);\n" |
44444 | "\n" |
44445 | "intel_sub_group_avc_ref_result_t __ovld\n" |
44446 | "intel_sub_group_avc_ref_evaluate_with_single_reference(\n" |
44447 | " read_only image2d_t src_image, read_only image2d_t ref_image,\n" |
44448 | " sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);\n" |
44449 | "intel_sub_group_avc_ref_result_t __ovld\n" |
44450 | "intel_sub_group_avc_ref_evaluate_with_dual_reference(\n" |
44451 | " read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n" |
44452 | " read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n" |
44453 | " intel_sub_group_avc_ref_payload_t payload);\n" |
44454 | "intel_sub_group_avc_ref_result_t __ovld\n" |
44455 | "intel_sub_group_avc_ref_evaluate_with_multi_reference(\n" |
44456 | " read_only image2d_t src_image, uint packed_reference_ids,\n" |
44457 | " sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);\n" |
44458 | "intel_sub_group_avc_ref_result_t __ovld\n" |
44459 | "intel_sub_group_avc_ref_evaluate_with_multi_reference(\n" |
44460 | " read_only image2d_t src_image, uint packed_reference_ids,\n" |
44461 | " uchar packed_reference_field_polarities, sampler_t vme_media_sampler,\n" |
44462 | " intel_sub_group_avc_ref_payload_t payload);\n" |
44463 | "\n" |
44464 | "// SIC built-in functions\n" |
44465 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44466 | "intel_sub_group_avc_sic_initialize(\n" |
44467 | " ushort2 src_coord);\n" |
44468 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44469 | "intel_sub_group_avc_sic_configure_skc(\n" |
44470 | " uint skip_block_partition_type, uint skip_motion_vector_mask,\n" |
44471 | " ulong motion_vectors, uchar bidirectional_weight, uchar skip_sad_adjustment,\n" |
44472 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44473 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44474 | "intel_sub_group_avc_sic_configure_ipe(\n" |
44475 | " uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,\n" |
44476 | " uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,\n" |
44477 | " uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,\n" |
44478 | " uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload);\n" |
44479 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44480 | "intel_sub_group_avc_sic_configure_ipe(\n" |
44481 | " uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,\n" |
44482 | " uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,\n" |
44483 | " uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,\n" |
44484 | " ushort left_edge_chroma_pixels, ushort upper_left_corner_chroma_pixel,\n" |
44485 | " ushort upper_edge_chroma_pixels, uchar intra_sad_adjustment,\n" |
44486 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44487 | "uint __ovld\n" |
44488 | "intel_sub_group_avc_sic_get_motion_vector_mask(\n" |
44489 | " uint skip_block_partition_type, uchar direction);\n" |
44490 | "\n" |
44491 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44492 | "intel_sub_group_avc_sic_set_intra_luma_shape_penalty(\n" |
44493 | " uint packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);\n" |
44494 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44495 | "intel_sub_group_avc_sic_set_intra_luma_mode_cost_function(\n" |
44496 | " uchar luma_mode_penalty, uint luma_packed_neighbor_modes,\n" |
44497 | " uint luma_packed_non_dc_penalty, intel_sub_group_avc_sic_payload_t payload);\n" |
44498 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44499 | "intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function(\n" |
44500 | " uchar chroma_mode_penalty, intel_sub_group_avc_sic_payload_t payload);\n" |
44501 | "\n" |
44502 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44503 | "intel_sub_group_avc_sic_set_skc_bilinear_filter_enable(\n" |
44504 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44505 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44506 | "intel_sub_group_avc_sic_set_skc_forward_transform_enable(\n" |
44507 | " ulong packed_sad_coefficients, intel_sub_group_avc_sic_payload_t payload);\n" |
44508 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44509 | "intel_sub_group_avc_sic_set_block_based_raw_skip_sad(\n" |
44510 | " uchar block_based_skip_type,\n" |
44511 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44512 | "\n" |
44513 | "intel_sub_group_avc_sic_result_t __ovld\n" |
44514 | "intel_sub_group_avc_sic_evaluate_ipe(\n" |
44515 | " read_only image2d_t src_image, sampler_t vme_media_sampler,\n" |
44516 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44517 | "intel_sub_group_avc_sic_result_t __ovld\n" |
44518 | "intel_sub_group_avc_sic_evaluate_with_single_reference(\n" |
44519 | " read_only image2d_t src_image, read_only image2d_t ref_image,\n" |
44520 | " sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);\n" |
44521 | "intel_sub_group_avc_sic_result_t __ovld\n" |
44522 | "intel_sub_group_avc_sic_evaluate_with_dual_reference(\n" |
44523 | " read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n" |
44524 | " read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n" |
44525 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44526 | "intel_sub_group_avc_sic_result_t __ovld\n" |
44527 | "intel_sub_group_avc_sic_evaluate_with_multi_reference(\n" |
44528 | " read_only image2d_t src_image, uint packed_reference_ids,\n" |
44529 | " sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);\n" |
44530 | "intel_sub_group_avc_sic_result_t __ovld\n" |
44531 | "intel_sub_group_avc_sic_evaluate_with_multi_reference(\n" |
44532 | " read_only image2d_t src_image, uint packed_reference_ids,\n" |
44533 | " uchar packed_reference_field_polarities, sampler_t vme_media_sampler,\n" |
44534 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44535 | "\n" |
44536 | "uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape(\n" |
44537 | " intel_sub_group_avc_sic_result_t result);\n" |
44538 | "ushort __ovld intel_sub_group_avc_sic_get_best_ipe_luma_distortion(\n" |
44539 | " intel_sub_group_avc_sic_result_t result);\n" |
44540 | "ushort __ovld intel_sub_group_avc_sic_get_best_ipe_chroma_distortion(\n" |
44541 | " intel_sub_group_avc_sic_result_t result);\n" |
44542 | "ulong __ovld intel_sub_group_avc_sic_get_packed_ipe_luma_modes(\n" |
44543 | " intel_sub_group_avc_sic_result_t result);\n" |
44544 | "uchar __ovld intel_sub_group_avc_sic_get_ipe_chroma_mode(\n" |
44545 | " intel_sub_group_avc_sic_result_t result);\n" |
44546 | "uint __ovld intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold(\n" |
44547 | " intel_sub_group_avc_sic_result_t result);\n" |
44548 | "ulong __ovld intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold(\n" |
44549 | " intel_sub_group_avc_sic_result_t result);\n" |
44550 | "ushort __ovld intel_sub_group_avc_sic_get_inter_raw_sads(\n" |
44551 | " intel_sub_group_avc_sic_result_t result);\n" |
44552 | "\n" |
44553 | "// Wrappers\n" |
44554 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44555 | "intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty(\n" |
44556 | " uchar reference_base_penalty, intel_sub_group_avc_ime_payload_t payload);\n" |
44557 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44558 | "intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty(\n" |
44559 | " uchar reference_base_penalty, intel_sub_group_avc_ref_payload_t payload);\n" |
44560 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44561 | "intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty(\n" |
44562 | " uchar reference_base_penalty, intel_sub_group_avc_sic_payload_t payload);\n" |
44563 | "\n" |
44564 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44565 | "intel_sub_group_avc_ime_set_inter_shape_penalty(\n" |
44566 | " ulong packed_shape_cost, intel_sub_group_avc_ime_payload_t payload);\n" |
44567 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44568 | "intel_sub_group_avc_ref_set_inter_shape_penalty(\n" |
44569 | " ulong packed_shape_cost, intel_sub_group_avc_ref_payload_t payload);\n" |
44570 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44571 | "intel_sub_group_avc_sic_set_inter_shape_penalty(\n" |
44572 | " ulong packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);\n" |
44573 | "\n" |
44574 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44575 | "intel_sub_group_avc_ime_set_inter_direction_penalty(\n" |
44576 | " uchar direction_cost, intel_sub_group_avc_ime_payload_t payload);\n" |
44577 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44578 | "intel_sub_group_avc_ref_set_inter_direction_penalty(\n" |
44579 | " uchar direction_cost, intel_sub_group_avc_ref_payload_t payload);\n" |
44580 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44581 | "intel_sub_group_avc_sic_set_inter_direction_penalty(\n" |
44582 | " uchar direction_cost, intel_sub_group_avc_sic_payload_t payload);\n" |
44583 | "\n" |
44584 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44585 | "intel_sub_group_avc_ime_set_motion_vector_cost_function(\n" |
44586 | " ulong packed_cost_center_delta, uint2 packed_cost_table,\n" |
44587 | " uchar cost_precision, intel_sub_group_avc_ime_payload_t payload);\n" |
44588 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44589 | "intel_sub_group_avc_ref_set_motion_vector_cost_function(\n" |
44590 | " ulong packed_cost_center_delta, uint2 packed_cost_table,\n" |
44591 | " uchar cost_precision, intel_sub_group_avc_ref_payload_t payload);\n" |
44592 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44593 | "intel_sub_group_avc_sic_set_motion_vector_cost_function(\n" |
44594 | " ulong packed_cost_center_delta, uint2 packed_cost_table,\n" |
44595 | " uchar cost_precision, intel_sub_group_avc_sic_payload_t payload);\n" |
44596 | "\n" |
44597 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44598 | "intel_sub_group_avc_ime_set_source_interlaced_field_polarity(\n" |
44599 | " uchar src_field_polarity, intel_sub_group_avc_ime_payload_t payload);\n" |
44600 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44601 | "intel_sub_group_avc_ref_set_source_interlaced_field_polarity(\n" |
44602 | " uchar src_field_polarity, intel_sub_group_avc_ref_payload_t payload);\n" |
44603 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44604 | "intel_sub_group_avc_sic_set_source_interlaced_field_polarity(\n" |
44605 | " uchar src_field_polarity, intel_sub_group_avc_sic_payload_t payload);\n" |
44606 | "\n" |
44607 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44608 | "intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity(\n" |
44609 | " uchar ref_field_polarity, intel_sub_group_avc_ime_payload_t payload);\n" |
44610 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44611 | "intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity(\n" |
44612 | " uchar ref_field_polarity, intel_sub_group_avc_ref_payload_t payload);\n" |
44613 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44614 | "intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity(\n" |
44615 | " uchar ref_field_polarity, intel_sub_group_avc_sic_payload_t payload);\n" |
44616 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44617 | "intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities(\n" |
44618 | " uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n" |
44619 | " intel_sub_group_avc_ime_payload_t payload);\n" |
44620 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44621 | "intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities(\n" |
44622 | " uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n" |
44623 | " intel_sub_group_avc_ref_payload_t payload);\n" |
44624 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44625 | "intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities(\n" |
44626 | " uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n" |
44627 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44628 | "\n" |
44629 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44630 | "intel_sub_group_avc_ime_set_ac_only_haar(\n" |
44631 | " intel_sub_group_avc_ime_payload_t payload);\n" |
44632 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44633 | "intel_sub_group_avc_ref_set_ac_only_haar(\n" |
44634 | " intel_sub_group_avc_ref_payload_t payload);\n" |
44635 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44636 | "intel_sub_group_avc_sic_set_ac_only_haar(\n" |
44637 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44638 | "\n" |
44639 | "ulong __ovld intel_sub_group_avc_ime_get_motion_vectors(\n" |
44640 | " intel_sub_group_avc_ime_result_t result);\n" |
44641 | "ulong __ovld intel_sub_group_avc_ref_get_motion_vectors(\n" |
44642 | " intel_sub_group_avc_ref_result_t result);\n" |
44643 | "\n" |
44644 | "ushort __ovld intel_sub_group_avc_ime_get_inter_distortions(\n" |
44645 | " intel_sub_group_avc_ime_result_t result);\n" |
44646 | "ushort __ovld intel_sub_group_avc_ref_get_inter_distortions(\n" |
44647 | " intel_sub_group_avc_ref_result_t result);\n" |
44648 | "ushort __ovld intel_sub_group_avc_sic_get_inter_distortions(\n" |
44649 | " intel_sub_group_avc_sic_result_t result);\n" |
44650 | "\n" |
44651 | "ushort __ovld intel_sub_group_avc_ime_get_best_inter_distortion(\n" |
44652 | " intel_sub_group_avc_ime_result_t result);\n" |
44653 | "ushort __ovld intel_sub_group_avc_ref_get_best_inter_distortion(\n" |
44654 | " intel_sub_group_avc_ref_result_t result);\n" |
44655 | "\n" |
44656 | "uchar __ovld intel_sub_group_avc_ime_get_inter_major_shape(\n" |
44657 | " intel_sub_group_avc_ime_result_t result);\n" |
44658 | "uchar __ovld intel_sub_group_avc_ref_get_inter_major_shape(\n" |
44659 | " intel_sub_group_avc_ref_result_t result);\n" |
44660 | "uchar __ovld intel_sub_group_avc_ime_get_inter_minor_shapes(\n" |
44661 | " intel_sub_group_avc_ime_result_t result);\n" |
44662 | "uchar __ovld intel_sub_group_avc_ref_get_inter_minor_shapes(\n" |
44663 | " intel_sub_group_avc_ref_result_t result);\n" |
44664 | "\n" |
44665 | "uchar __ovld intel_sub_group_avc_ime_get_inter_directions(\n" |
44666 | " intel_sub_group_avc_ime_result_t result);\n" |
44667 | "uchar __ovld intel_sub_group_avc_ref_get_inter_directions(\n" |
44668 | " intel_sub_group_avc_ref_result_t result);\n" |
44669 | "\n" |
44670 | "uchar __ovld intel_sub_group_avc_ime_get_inter_motion_vector_count(\n" |
44671 | " intel_sub_group_avc_ime_result_t result);\n" |
44672 | "uchar __ovld intel_sub_group_avc_ref_get_inter_motion_vector_count(\n" |
44673 | " intel_sub_group_avc_ref_result_t result);\n" |
44674 | "\n" |
44675 | "uint __ovld intel_sub_group_avc_ime_get_inter_reference_ids(\n" |
44676 | " intel_sub_group_avc_ime_result_t result);\n" |
44677 | "uint __ovld intel_sub_group_avc_ref_get_inter_reference_ids(\n" |
44678 | " intel_sub_group_avc_ref_result_t result);\n" |
44679 | "\n" |
44680 | "uchar __ovld\n" |
44681 | "intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities(\n" |
44682 | " uint packed_reference_ids, uint packed_reference_parameter_field_polarities,\n" |
44683 | " intel_sub_group_avc_ime_result_t result);\n" |
44684 | "uchar __ovld\n" |
44685 | "intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities(\n" |
44686 | " uint packed_reference_ids, uint packed_reference_parameter_field_polarities,\n" |
44687 | " intel_sub_group_avc_ref_result_t result);\n" |
44688 | "\n" |
44689 | "// Type conversion functions\n" |
44690 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44691 | "intel_sub_group_avc_ime_convert_to_mce_payload(\n" |
44692 | " intel_sub_group_avc_ime_payload_t payload);\n" |
44693 | "intel_sub_group_avc_ime_payload_t __ovld\n" |
44694 | "intel_sub_group_avc_mce_convert_to_ime_payload(\n" |
44695 | " intel_sub_group_avc_mce_payload_t payload);\n" |
44696 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44697 | "intel_sub_group_avc_ref_convert_to_mce_payload(\n" |
44698 | " intel_sub_group_avc_ref_payload_t payload);\n" |
44699 | "intel_sub_group_avc_ref_payload_t __ovld\n" |
44700 | "intel_sub_group_avc_mce_convert_to_ref_payload(\n" |
44701 | " intel_sub_group_avc_mce_payload_t payload);\n" |
44702 | "intel_sub_group_avc_mce_payload_t __ovld\n" |
44703 | "intel_sub_group_avc_sic_convert_to_mce_payload(\n" |
44704 | " intel_sub_group_avc_sic_payload_t payload);\n" |
44705 | "intel_sub_group_avc_sic_payload_t __ovld\n" |
44706 | "intel_sub_group_avc_mce_convert_to_sic_payload(\n" |
44707 | " intel_sub_group_avc_mce_payload_t payload);\n" |
44708 | "\n" |
44709 | "intel_sub_group_avc_mce_result_t __ovld\n" |
44710 | "intel_sub_group_avc_ime_convert_to_mce_result(\n" |
44711 | " intel_sub_group_avc_ime_result_t result);\n" |
44712 | "intel_sub_group_avc_ime_result_t __ovld\n" |
44713 | "intel_sub_group_avc_mce_convert_to_ime_result(\n" |
44714 | " intel_sub_group_avc_mce_result_t result);\n" |
44715 | "intel_sub_group_avc_mce_result_t __ovld\n" |
44716 | "intel_sub_group_avc_ref_convert_to_mce_result(\n" |
44717 | " intel_sub_group_avc_ref_result_t result);\n" |
44718 | "intel_sub_group_avc_ref_result_t __ovld\n" |
44719 | "intel_sub_group_avc_mce_convert_to_ref_result(\n" |
44720 | " intel_sub_group_avc_mce_result_t result);\n" |
44721 | "intel_sub_group_avc_mce_result_t __ovld\n" |
44722 | "intel_sub_group_avc_sic_convert_to_mce_result(\n" |
44723 | " intel_sub_group_avc_sic_result_t result);\n" |
44724 | "intel_sub_group_avc_sic_result_t __ovld\n" |
44725 | "intel_sub_group_avc_mce_convert_to_sic_result(\n" |
44726 | " intel_sub_group_avc_mce_result_t result);\n" |
44727 | "#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end\n" |
44728 | "#endif // cl_intel_device_side_avc_motion_estimation\n" |
44729 | "\n" |
44730 | "#ifdef cl_amd_media_ops\n" |
44731 | "uint __ovld amd_bitalign(uint a, uint b, uint c);\n" |
44732 | "uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);\n" |
44733 | "uint3 __ovld amd_bitalign(uint3 a, uint3 b, uint3 c);\n" |
44734 | "uint4 __ovld amd_bitalign(uint4 a, uint4 b, uint4 c);\n" |
44735 | "uint8 __ovld amd_bitalign(uint8 a, uint8 b, uint8 c);\n" |
44736 | "uint16 __ovld amd_bitalign(uint16 a, uint16 b, uint16 c);\n" |
44737 | "\n" |
44738 | "uint __ovld amd_bytealign(uint a, uint b, uint c);\n" |
44739 | "uint2 __ovld amd_bytealign(uint2 a, uint2 b, uint2 c);\n" |
44740 | "uint3 __ovld amd_bytealign(uint3 a, uint3 b, uint3 c);\n" |
44741 | "uint4 __ovld amd_bytealign(uint4 a, uint4 b, uint4 c);\n" |
44742 | "uint8 __ovld amd_bytealign(uint8 a, uint8 b, uint8 c);\n" |
44743 | "uint16 __ovld amd_bytealign(uint16 a, uint16 b, uint16 c);\n" |
44744 | "\n" |
44745 | "uint __ovld amd_lerp(uint a, uint b, uint c);\n" |
44746 | "uint2 __ovld amd_lerp(uint2 a, uint2 b, uint2 c);\n" |
44747 | "uint3 __ovld amd_lerp(uint3 a, uint3 b, uint3 c);\n" |
44748 | "uint4 __ovld amd_lerp(uint4 a, uint4 b, uint4 c);\n" |
44749 | "uint8 __ovld amd_lerp(uint8 a, uint8 b, uint8 c);\n" |
44750 | "uint16 __ovld amd_lerp(uint16 a, uint16 b, uint16 c);\n" |
44751 | "\n" |
44752 | "uint __ovld amd_pack(float4 v);\n" |
44753 | "\n" |
44754 | "uint __ovld amd_sad4(uint4 x, uint4 y, uint z);\n" |
44755 | "\n" |
44756 | "uint __ovld amd_sadhi(uint a, uint b, uint c);\n" |
44757 | "uint2 __ovld amd_sadhi(uint2 a, uint2 b, uint2 c);\n" |
44758 | "uint3 __ovld amd_sadhi(uint3 a, uint3 b, uint3 c);\n" |
44759 | "uint4 __ovld amd_sadhi(uint4 a, uint4 b, uint4 c);\n" |
44760 | "uint8 __ovld amd_sadhi(uint8 a, uint8 b, uint8 c);\n" |
44761 | "uint16 __ovld amd_sadhi(uint16 a, uint16 b, uint16 c);\n" |
44762 | "\n" |
44763 | "uint __ovld amd_sad(uint a, uint b, uint c);\n" |
44764 | "uint2 __ovld amd_sad(uint2 a, uint2 b, uint2 c);\n" |
44765 | "uint3 __ovld amd_sad(uint3 a, uint3 b, uint3 c);\n" |
44766 | "uint4 __ovld amd_sad(uint4 a, uint4 b, uint4 c);\n" |
44767 | "uint8 __ovld amd_sad(uint8 a, uint8 b, uint8 c);\n" |
44768 | "uint16 __ovld amd_sad(uint16 a, uint16 b, uint16 c);\n" |
44769 | "\n" |
44770 | "float __ovld amd_unpack0(uint a);\n" |
44771 | "float2 __ovld amd_unpack0(uint2 a);\n" |
44772 | "float3 __ovld amd_unpack0(uint3 a);\n" |
44773 | "float4 __ovld amd_unpack0(uint4 a);\n" |
44774 | "float8 __ovld amd_unpack0(uint8 a);\n" |
44775 | "float16 __ovld amd_unpack0(uint16 a);\n" |
44776 | "\n" |
44777 | "float __ovld amd_unpack1(uint a);\n" |
44778 | "float2 __ovld amd_unpack1(uint2 a);\n" |
44779 | "float3 __ovld amd_unpack1(uint3 a);\n" |
44780 | "float4 __ovld amd_unpack1(uint4 a);\n" |
44781 | "float8 __ovld amd_unpack1(uint8 a);\n" |
44782 | "float16 __ovld amd_unpack1(uint16 a);\n" |
44783 | "\n" |
44784 | "float __ovld amd_unpack2(uint a);\n" |
44785 | "float2 __ovld amd_unpack2(uint2 a);\n" |
44786 | "float3 __ovld amd_unpack2(uint3 a);\n" |
44787 | "float4 __ovld amd_unpack2(uint4 a);\n" |
44788 | "float8 __ovld amd_unpack2(uint8 a);\n" |
44789 | "float16 __ovld amd_unpack2(uint16 a);\n" |
44790 | "\n" |
44791 | "float __ovld amd_unpack3(uint a);\n" |
44792 | "float2 __ovld amd_unpack3(uint2 a);\n" |
44793 | "float3 __ovld amd_unpack3(uint3 a);\n" |
44794 | "float4 __ovld amd_unpack3(uint4 a);\n" |
44795 | "float8 __ovld amd_unpack3(uint8 a);\n" |
44796 | "float16 __ovld amd_unpack3(uint16 a);\n" |
44797 | "#endif // cl_amd_media_ops\n" |
44798 | "\n" |
44799 | "#ifdef cl_amd_media_ops2\n" |
44800 | "int __ovld amd_bfe(int src0, uint src1, uint src2);\n" |
44801 | "int2 __ovld amd_bfe(int2 src0, uint2 src1, uint2 src2);\n" |
44802 | "int3 __ovld amd_bfe(int3 src0, uint3 src1, uint3 src2);\n" |
44803 | "int4 __ovld amd_bfe(int4 src0, uint4 src1, uint4 src2);\n" |
44804 | "int8 __ovld amd_bfe(int8 src0, uint8 src1, uint8 src2);\n" |
44805 | "int16 __ovld amd_bfe(int16 src0, uint16 src1, uint16 src2);\n" |
44806 | "\n" |
44807 | "uint __ovld amd_bfe(uint src0, uint src1, uint src2);\n" |
44808 | "uint2 __ovld amd_bfe(uint2 src0, uint2 src1, uint2 src2);\n" |
44809 | "uint3 __ovld amd_bfe(uint3 src0, uint3 src1, uint3 src2);\n" |
44810 | "uint4 __ovld amd_bfe(uint4 src0, uint4 src1, uint4 src2);\n" |
44811 | "uint8 __ovld amd_bfe(uint8 src0, uint8 src1, uint8 src2);\n" |
44812 | "uint16 __ovld amd_bfe(uint16 src0, uint16 src1, uint16 src2);\n" |
44813 | "\n" |
44814 | "uint __ovld amd_bfm(uint src0, uint src1);\n" |
44815 | "uint2 __ovld amd_bfm(uint2 src0, uint2 src1);\n" |
44816 | "uint3 __ovld amd_bfm(uint3 src0, uint3 src1);\n" |
44817 | "uint4 __ovld amd_bfm(uint4 src0, uint4 src1);\n" |
44818 | "uint8 __ovld amd_bfm(uint8 src0, uint8 src1);\n" |
44819 | "uint16 __ovld amd_bfm(uint16 src0, uint16 src1);\n" |
44820 | "\n" |
44821 | "float __ovld amd_max3(float src0, float src1, float src2);\n" |
44822 | "float2 __ovld amd_max3(float2 src0, float2 src1, float2 src2);\n" |
44823 | "float3 __ovld amd_max3(float3 src0, float3 src1, float3 src2);\n" |
44824 | "float4 __ovld amd_max3(float4 src0, float4 src1, float4 src2);\n" |
44825 | "float8 __ovld amd_max3(float8 src0, float8 src1, float8 src2);\n" |
44826 | "float16 __ovld amd_max3(float16 src0, float16 src1, float16 src2);\n" |
44827 | "\n" |
44828 | "int __ovld amd_max3(int src0, int src1, int src2);\n" |
44829 | "int2 __ovld amd_max3(int2 src0, int2 src1, int2 src2);\n" |
44830 | "int3 __ovld amd_max3(int3 src0, int3 src1, int3 src2);\n" |
44831 | "int4 __ovld amd_max3(int4 src0, int4 src1, int4 src2);\n" |
44832 | "int8 __ovld amd_max3(int8 src0, int8 src1, int8 src2);\n" |
44833 | "int16 __ovld amd_max3(int16 src0, int16 src1, int16 src2);\n" |
44834 | "\n" |
44835 | "uint __ovld amd_max3(uint src0, uint src1, uint src2);\n" |
44836 | "uint2 __ovld amd_max3(uint2 src0, uint2 src1, uint2 src2);\n" |
44837 | "uint3 __ovld amd_max3(uint3 src0, uint3 src1, uint3 src2);\n" |
44838 | "uint4 __ovld amd_max3(uint4 src0, uint4 src1, uint4 src2);\n" |
44839 | "uint8 __ovld amd_max3(uint8 src0, uint8 src1, uint8 src2);\n" |
44840 | "uint16 __ovld amd_max3(uint16 src0, uint16 src1, uint16 src2);\n" |
44841 | "\n" |
44842 | "float __ovld amd_median3(float src0, float src1, float src2);\n" |
44843 | "float2 __ovld amd_median3(float2 src0, float2 src1, float2 src2);\n" |
44844 | "float3 __ovld amd_median3(float3 src0, float3 src1, float3 src2);\n" |
44845 | "float4 __ovld amd_median3(float4 src0, float4 src1, float4 src2);\n" |
44846 | "float8 __ovld amd_median3(float8 src0, float8 src1, float8 src2);\n" |
44847 | "float16 __ovld amd_median3(float16 src0, float16 src1, float16 src2);\n" |
44848 | "\n" |
44849 | "int __ovld amd_median3(int src0, int src1, int src2);\n" |
44850 | "int2 __ovld amd_median3(int2 src0, int2 src1, int2 src2);\n" |
44851 | "int3 __ovld amd_median3(int3 src0, int3 src1, int3 src2);\n" |
44852 | "int4 __ovld amd_median3(int4 src0, int4 src1, int4 src2);\n" |
44853 | "int8 __ovld amd_median3(int8 src0, int8 src1, int8 src2);\n" |
44854 | "int16 __ovld amd_median3(int16 src0, int16 src1, int16 src2);\n" |
44855 | "\n" |
44856 | "uint __ovld amd_median3(uint src0, uint src1, uint src2);\n" |
44857 | "uint2 __ovld amd_median3(uint2 src0, uint2 src1, uint2 src2);\n" |
44858 | "uint3 __ovld amd_median3(uint3 src0, uint3 src1, uint3 src2);\n" |
44859 | "uint4 __ovld amd_median3(uint4 src0, uint4 src1, uint4 src2);\n" |
44860 | "uint8 __ovld amd_median3(uint8 src0, uint8 src1, uint8 src2);\n" |
44861 | "uint16 __ovld amd_median3(uint16 src0, uint16 src1, uint16 src2);\n" |
44862 | "\n" |
44863 | "float __ovld amd_min3(float src0, float src1, float src);\n" |
44864 | "float2 __ovld amd_min3(float2 src0, float2 src1, float2 src);\n" |
44865 | "float3 __ovld amd_min3(float3 src0, float3 src1, float3 src);\n" |
44866 | "float4 __ovld amd_min3(float4 src0, float4 src1, float4 src);\n" |
44867 | "float8 __ovld amd_min3(float8 src0, float8 src1, float8 src);\n" |
44868 | "float16 __ovld amd_min3(float16 src0, float16 src1, float16 src);\n" |
44869 | "\n" |
44870 | "int __ovld amd_min3(int src0, int src1, int src2);\n" |
44871 | "int2 __ovld amd_min3(int2 src0, int2 src1, int2 src2);\n" |
44872 | "int3 __ovld amd_min3(int3 src0, int3 src1, int3 src2);\n" |
44873 | "int4 __ovld amd_min3(int4 src0, int4 src1, int4 src2);\n" |
44874 | "int8 __ovld amd_min3(int8 src0, int8 src1, int8 src2);\n" |
44875 | "int16 __ovld amd_min3(int16 src0, int16 src1, int16 src2);\n" |
44876 | "\n" |
44877 | "uint __ovld amd_min3(uint src0, uint src1, uint src2);\n" |
44878 | "uint2 __ovld amd_min3(uint2 src0, uint2 src1, uint2 src2);\n" |
44879 | "uint3 __ovld amd_min3(uint3 src0, uint3 src1, uint3 src2);\n" |
44880 | "uint4 __ovld amd_min3(uint4 src0, uint4 src1, uint4 src2);\n" |
44881 | "uint8 __ovld amd_min3(uint8 src0, uint8 src1, uint8 src2);\n" |
44882 | "uint16 __ovld amd_min3(uint16 src0, uint16 src1, uint16 src2);\n" |
44883 | "\n" |
44884 | "ulong __ovld amd_mqsad(ulong src0, uint src1, ulong src2);\n" |
44885 | "ulong2 __ovld amd_mqsad(ulong2 src0, uint2 src1, ulong2 src2);\n" |
44886 | "ulong3 __ovld amd_mqsad(ulong3 src0, uint3 src1, ulong3 src2);\n" |
44887 | "ulong4 __ovld amd_mqsad(ulong4 src0, uint4 src1, ulong4 src2);\n" |
44888 | "ulong8 __ovld amd_mqsad(ulong8 src0, uint8 src1, ulong8 src2);\n" |
44889 | "ulong16 __ovld amd_mqsad(ulong16 src0, uint16 src1, ulong16 src2);\n" |
44890 | "\n" |
44891 | "ulong __ovld amd_qsad(ulong src0, uint src1, ulong src2);\n" |
44892 | "ulong2 __ovld amd_qsad(ulong2 src0, uint2 src1, ulong2 src2);\n" |
44893 | "ulong3 __ovld amd_qsad(ulong3 src0, uint3 src1, ulong3 src2);\n" |
44894 | "ulong4 __ovld amd_qsad(ulong4 src0, uint4 src1, ulong4 src2);\n" |
44895 | "ulong8 __ovld amd_qsad(ulong8 src0, uint8 src1, ulong8 src2);\n" |
44896 | "ulong16 __ovld amd_qsad(ulong16 src0, uint16 src1, ulong16 src2);\n" |
44897 | "\n" |
44898 | "uint __ovld amd_msad(uint src0, uint src1, uint src2);\n" |
44899 | "uint2 __ovld amd_msad(uint2 src0, uint2 src1, uint2 src2);\n" |
44900 | "uint3 __ovld amd_msad(uint3 src0, uint3 src1, uint3 src2);\n" |
44901 | "uint4 __ovld amd_msad(uint4 src0, uint4 src1, uint4 src2);\n" |
44902 | "uint8 __ovld amd_msad(uint8 src0, uint8 src1, uint8 src2);\n" |
44903 | "uint16 __ovld amd_msad(uint16 src0, uint16 src1, uint16 src2);\n" |
44904 | "\n" |
44905 | "uint __ovld amd_sadd(uint src0, uint src1, uint src2);\n" |
44906 | "uint2 __ovld amd_sadd(uint2 src0, uint2 src1, uint2 src2);\n" |
44907 | "uint3 __ovld amd_sadd(uint3 src0, uint3 src1, uint3 src2);\n" |
44908 | "uint4 __ovld amd_sadd(uint4 src0, uint4 src1, uint4 src2);\n" |
44909 | "uint8 __ovld amd_sadd(uint8 src0, uint8 src1, uint8 src2);\n" |
44910 | "uint16 __ovld amd_sadd(uint16 src0, uint16 src1, uint16 src2);\n" |
44911 | "\n" |
44912 | "uint __ovld amd_sadw(uint src0, uint src1, uint src2);\n" |
44913 | "uint2 __ovld amd_sadw(uint2 src0, uint2 src1, uint2 src2);\n" |
44914 | "uint3 __ovld amd_sadw(uint3 src0, uint3 src1, uint3 src2);\n" |
44915 | "uint4 __ovld amd_sadw(uint4 src0, uint4 src1, uint4 src2);\n" |
44916 | "uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2);\n" |
44917 | "uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2);\n" |
44918 | "#endif // cl_amd_media_ops2\n" |
44919 | "\n" |
44920 | "// Disable any extensions we may have enabled previously.\n" |
44921 | "#pragma OPENCL EXTENSION all : disable\n" |
44922 | "\n" |
44923 | "#undef __cnfn\n" |
44924 | "#undef __ovld\n" |
44925 | "#endif //_OPENCL_H_\n" |
44926 | "" } , |
44927 | { "/builtins/pconfigintrin.h" , "/*===---- pconfigintrin.h - X86 platform configuration ---------------------===\n" |
44928 | " *\n" |
44929 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
44930 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
44931 | " * in the Software without restriction, including without limitation the rights\n" |
44932 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
44933 | " * copies of the Software, and to permit persons to whom the Software is\n" |
44934 | " * furnished to do so, subject to the following conditions:\n" |
44935 | " *\n" |
44936 | " * The above copyright notice and this permission notice shall be included in\n" |
44937 | " * all copies or substantial portions of the Software.\n" |
44938 | " *\n" |
44939 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
44940 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
44941 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
44942 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
44943 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
44944 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
44945 | " * THE SOFTWARE.\n" |
44946 | " *\n" |
44947 | " *===-----------------------------------------------------------------------===\n" |
44948 | " */\n" |
44949 | "\n" |
44950 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
44951 | "#error \"Never use <pconfigintrin.h> directly; include <x86intrin.h> instead.\"\n" |
44952 | "#endif\n" |
44953 | "\n" |
44954 | "#ifndef __PCONFIGINTRIN_H\n" |
44955 | "#define __PCONFIGINTRIN_H\n" |
44956 | "\n" |
44957 | "#define __PCONFIG_KEY_PROGRAM 0x00000001\n" |
44958 | "\n" |
44959 | "/* Define the default attributes for the functions in this file. */\n" |
44960 | "#define __DEFAULT_FN_ATTRS \\\n" |
44961 | " __attribute__((__always_inline__, __nodebug__, __target__(\"pconfig\")))\n" |
44962 | "\n" |
44963 | "static __inline unsigned int __DEFAULT_FN_ATTRS\n" |
44964 | "_pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n" |
44965 | "{\n" |
44966 | " unsigned int __result;\n" |
44967 | " __asm__ (\"pconfig\"\n" |
44968 | " : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n" |
44969 | " : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n" |
44970 | " : \"cc\");\n" |
44971 | " return __result;\n" |
44972 | "}\n" |
44973 | "\n" |
44974 | "#undef __DEFAULT_FN_ATTRS\n" |
44975 | "\n" |
44976 | "#endif\n" |
44977 | "" } , |
44978 | { "/builtins/pkuintrin.h" , "/*===---- pkuintrin.h - PKU intrinsics -------------------------------------===\n" |
44979 | " *\n" |
44980 | " *\n" |
44981 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
44982 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
44983 | " * in the Software without restriction, including without limitation the rights\n" |
44984 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
44985 | " * copies of the Software, and to permit persons to whom the Software is\n" |
44986 | " * furnished to do so, subject to the following conditions:\n" |
44987 | " *\n" |
44988 | " * The above copyright notice and this permission notice shall be included in\n" |
44989 | " * all copies or substantial portions of the Software.\n" |
44990 | " *\n" |
44991 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
44992 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
44993 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
44994 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
44995 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
44996 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
44997 | " * THE SOFTWARE.\n" |
44998 | " *\n" |
44999 | " *===-----------------------------------------------------------------------===\n" |
45000 | " */\n" |
45001 | "#ifndef __IMMINTRIN_H\n" |
45002 | "#error \"Never use <pkuintrin.h> directly; include <immintrin.h> instead.\"\n" |
45003 | "#endif\n" |
45004 | "\n" |
45005 | "#ifndef __PKUINTRIN_H\n" |
45006 | "#define __PKUINTRIN_H\n" |
45007 | "\n" |
45008 | "/* Define the default attributes for the functions in this file. */\n" |
45009 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"pku\")))\n" |
45010 | "\n" |
45011 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
45012 | "_rdpkru_u32(void)\n" |
45013 | "{\n" |
45014 | " return __builtin_ia32_rdpkru();\n" |
45015 | "}\n" |
45016 | "\n" |
45017 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
45018 | "_wrpkru(unsigned int __val)\n" |
45019 | "{\n" |
45020 | " __builtin_ia32_wrpkru(__val);\n" |
45021 | "}\n" |
45022 | "\n" |
45023 | "#undef __DEFAULT_FN_ATTRS\n" |
45024 | "\n" |
45025 | "#endif\n" |
45026 | "" } , |
45027 | { "/builtins/pmmintrin.h" , "/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===\n" |
45028 | " *\n" |
45029 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45030 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45031 | " * in the Software without restriction, including without limitation the rights\n" |
45032 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45033 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45034 | " * furnished to do so, subject to the following conditions:\n" |
45035 | " *\n" |
45036 | " * The above copyright notice and this permission notice shall be included in\n" |
45037 | " * all copies or substantial portions of the Software.\n" |
45038 | " *\n" |
45039 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45040 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45041 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45042 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45043 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45044 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45045 | " * THE SOFTWARE.\n" |
45046 | " *\n" |
45047 | " *===-----------------------------------------------------------------------===\n" |
45048 | " */\n" |
45049 | "\n" |
45050 | "#ifndef __PMMINTRIN_H\n" |
45051 | "#define __PMMINTRIN_H\n" |
45052 | "\n" |
45053 | "#include <emmintrin.h>\n" |
45054 | "\n" |
45055 | "/* Define the default attributes for the functions in this file. */\n" |
45056 | "#define __DEFAULT_FN_ATTRS \\\n" |
45057 | " __attribute__((__always_inline__, __nodebug__, __target__(\"sse3\"), __min_vector_width__(128)))\n" |
45058 | "\n" |
45059 | "/// Loads data from an unaligned memory location to elements in a 128-bit\n" |
45060 | "/// vector.\n" |
45061 | "///\n" |
45062 | "/// If the address of the data is not 16-byte aligned, the instruction may\n" |
45063 | "/// read two adjacent aligned blocks of memory to retrieve the requested\n" |
45064 | "/// data.\n" |
45065 | "///\n" |
45066 | "/// \\headerfile <x86intrin.h>\n" |
45067 | "///\n" |
45068 | "/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n" |
45069 | "///\n" |
45070 | "/// \\param __p\n" |
45071 | "/// A pointer to a 128-bit integer vector containing integer values.\n" |
45072 | "/// \\returns A 128-bit vector containing the moved values.\n" |
45073 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
45074 | "_mm_lddqu_si128(__m128i const *__p)\n" |
45075 | "{\n" |
45076 | " return (__m128i)__builtin_ia32_lddqu((char const *)__p);\n" |
45077 | "}\n" |
45078 | "\n" |
45079 | "/// Adds the even-indexed values and subtracts the odd-indexed values of\n" |
45080 | "/// two 128-bit vectors of [4 x float].\n" |
45081 | "///\n" |
45082 | "/// \\headerfile <x86intrin.h>\n" |
45083 | "///\n" |
45084 | "/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n" |
45085 | "///\n" |
45086 | "/// \\param __a\n" |
45087 | "/// A 128-bit vector of [4 x float] containing the left source operand.\n" |
45088 | "/// \\param __b\n" |
45089 | "/// A 128-bit vector of [4 x float] containing the right source operand.\n" |
45090 | "/// \\returns A 128-bit vector of [4 x float] containing the alternating sums and\n" |
45091 | "/// differences of both operands.\n" |
45092 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
45093 | "_mm_addsub_ps(__m128 __a, __m128 __b)\n" |
45094 | "{\n" |
45095 | " return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);\n" |
45096 | "}\n" |
45097 | "\n" |
45098 | "/// Horizontally adds the adjacent pairs of values contained in two\n" |
45099 | "/// 128-bit vectors of [4 x float].\n" |
45100 | "///\n" |
45101 | "/// \\headerfile <x86intrin.h>\n" |
45102 | "///\n" |
45103 | "/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n" |
45104 | "///\n" |
45105 | "/// \\param __a\n" |
45106 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
45107 | "/// The horizontal sums of the values are stored in the lower bits of the\n" |
45108 | "/// destination.\n" |
45109 | "/// \\param __b\n" |
45110 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
45111 | "/// The horizontal sums of the values are stored in the upper bits of the\n" |
45112 | "/// destination.\n" |
45113 | "/// \\returns A 128-bit vector of [4 x float] containing the horizontal sums of\n" |
45114 | "/// both operands.\n" |
45115 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
45116 | "_mm_hadd_ps(__m128 __a, __m128 __b)\n" |
45117 | "{\n" |
45118 | " return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);\n" |
45119 | "}\n" |
45120 | "\n" |
45121 | "/// Horizontally subtracts the adjacent pairs of values contained in two\n" |
45122 | "/// 128-bit vectors of [4 x float].\n" |
45123 | "///\n" |
45124 | "/// \\headerfile <x86intrin.h>\n" |
45125 | "///\n" |
45126 | "/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n" |
45127 | "///\n" |
45128 | "/// \\param __a\n" |
45129 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
45130 | "/// The horizontal differences between the values are stored in the lower\n" |
45131 | "/// bits of the destination.\n" |
45132 | "/// \\param __b\n" |
45133 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
45134 | "/// The horizontal differences between the values are stored in the upper\n" |
45135 | "/// bits of the destination.\n" |
45136 | "/// \\returns A 128-bit vector of [4 x float] containing the horizontal\n" |
45137 | "/// differences of both operands.\n" |
45138 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
45139 | "_mm_hsub_ps(__m128 __a, __m128 __b)\n" |
45140 | "{\n" |
45141 | " return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);\n" |
45142 | "}\n" |
45143 | "\n" |
45144 | "/// Moves and duplicates odd-indexed values from a 128-bit vector\n" |
45145 | "/// of [4 x float] to float values stored in a 128-bit vector of\n" |
45146 | "/// [4 x float].\n" |
45147 | "///\n" |
45148 | "/// \\headerfile <x86intrin.h>\n" |
45149 | "///\n" |
45150 | "/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n" |
45151 | "///\n" |
45152 | "/// \\param __a\n" |
45153 | "/// A 128-bit vector of [4 x float]. \\n\n" |
45154 | "/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of\n" |
45155 | "/// the destination. \\n\n" |
45156 | "/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the\n" |
45157 | "/// destination.\n" |
45158 | "/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n" |
45159 | "/// values.\n" |
45160 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
45161 | "_mm_movehdup_ps(__m128 __a)\n" |
45162 | "{\n" |
45163 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);\n" |
45164 | "}\n" |
45165 | "\n" |
45166 | "/// Duplicates even-indexed values from a 128-bit vector of\n" |
45167 | "/// [4 x float] to float values stored in a 128-bit vector of [4 x float].\n" |
45168 | "///\n" |
45169 | "/// \\headerfile <x86intrin.h>\n" |
45170 | "///\n" |
45171 | "/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n" |
45172 | "///\n" |
45173 | "/// \\param __a\n" |
45174 | "/// A 128-bit vector of [4 x float] \\n\n" |
45175 | "/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of\n" |
45176 | "/// the destination. \\n\n" |
45177 | "/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the\n" |
45178 | "/// destination.\n" |
45179 | "/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n" |
45180 | "/// values.\n" |
45181 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
45182 | "_mm_moveldup_ps(__m128 __a)\n" |
45183 | "{\n" |
45184 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);\n" |
45185 | "}\n" |
45186 | "\n" |
45187 | "/// Adds the even-indexed values and subtracts the odd-indexed values of\n" |
45188 | "/// two 128-bit vectors of [2 x double].\n" |
45189 | "///\n" |
45190 | "/// \\headerfile <x86intrin.h>\n" |
45191 | "///\n" |
45192 | "/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n" |
45193 | "///\n" |
45194 | "/// \\param __a\n" |
45195 | "/// A 128-bit vector of [2 x double] containing the left source operand.\n" |
45196 | "/// \\param __b\n" |
45197 | "/// A 128-bit vector of [2 x double] containing the right source operand.\n" |
45198 | "/// \\returns A 128-bit vector of [2 x double] containing the alternating sums\n" |
45199 | "/// and differences of both operands.\n" |
45200 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
45201 | "_mm_addsub_pd(__m128d __a, __m128d __b)\n" |
45202 | "{\n" |
45203 | " return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);\n" |
45204 | "}\n" |
45205 | "\n" |
45206 | "/// Horizontally adds the pairs of values contained in two 128-bit\n" |
45207 | "/// vectors of [2 x double].\n" |
45208 | "///\n" |
45209 | "/// \\headerfile <x86intrin.h>\n" |
45210 | "///\n" |
45211 | "/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n" |
45212 | "///\n" |
45213 | "/// \\param __a\n" |
45214 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
45215 | "/// The horizontal sum of the values is stored in the lower bits of the\n" |
45216 | "/// destination.\n" |
45217 | "/// \\param __b\n" |
45218 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
45219 | "/// The horizontal sum of the values is stored in the upper bits of the\n" |
45220 | "/// destination.\n" |
45221 | "/// \\returns A 128-bit vector of [2 x double] containing the horizontal sums of\n" |
45222 | "/// both operands.\n" |
45223 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
45224 | "_mm_hadd_pd(__m128d __a, __m128d __b)\n" |
45225 | "{\n" |
45226 | " return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);\n" |
45227 | "}\n" |
45228 | "\n" |
45229 | "/// Horizontally subtracts the pairs of values contained in two 128-bit\n" |
45230 | "/// vectors of [2 x double].\n" |
45231 | "///\n" |
45232 | "/// \\headerfile <x86intrin.h>\n" |
45233 | "///\n" |
45234 | "/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n" |
45235 | "///\n" |
45236 | "/// \\param __a\n" |
45237 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
45238 | "/// The horizontal difference of the values is stored in the lower bits of\n" |
45239 | "/// the destination.\n" |
45240 | "/// \\param __b\n" |
45241 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
45242 | "/// The horizontal difference of the values is stored in the upper bits of\n" |
45243 | "/// the destination.\n" |
45244 | "/// \\returns A 128-bit vector of [2 x double] containing the horizontal\n" |
45245 | "/// differences of both operands.\n" |
45246 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
45247 | "_mm_hsub_pd(__m128d __a, __m128d __b)\n" |
45248 | "{\n" |
45249 | " return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);\n" |
45250 | "}\n" |
45251 | "\n" |
45252 | "/// Moves and duplicates one double-precision value to double-precision\n" |
45253 | "/// values stored in a 128-bit vector of [2 x double].\n" |
45254 | "///\n" |
45255 | "/// \\headerfile <x86intrin.h>\n" |
45256 | "///\n" |
45257 | "/// \\code\n" |
45258 | "/// __m128d _mm_loaddup_pd(double const *dp);\n" |
45259 | "/// \\endcode\n" |
45260 | "///\n" |
45261 | "/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n" |
45262 | "///\n" |
45263 | "/// \\param dp\n" |
45264 | "/// A pointer to a double-precision value to be moved and duplicated.\n" |
45265 | "/// \\returns A 128-bit vector of [2 x double] containing the moved and\n" |
45266 | "/// duplicated values.\n" |
45267 | "#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)\n" |
45268 | "\n" |
45269 | "/// Moves and duplicates the double-precision value in the lower bits of\n" |
45270 | "/// a 128-bit vector of [2 x double] to double-precision values stored in a\n" |
45271 | "/// 128-bit vector of [2 x double].\n" |
45272 | "///\n" |
45273 | "/// \\headerfile <x86intrin.h>\n" |
45274 | "///\n" |
45275 | "/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n" |
45276 | "///\n" |
45277 | "/// \\param __a\n" |
45278 | "/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits\n" |
45279 | "/// [127:64] and [63:0] of the destination.\n" |
45280 | "/// \\returns A 128-bit vector of [2 x double] containing the moved and\n" |
45281 | "/// duplicated values.\n" |
45282 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
45283 | "_mm_movedup_pd(__m128d __a)\n" |
45284 | "{\n" |
45285 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n" |
45286 | "}\n" |
45287 | "\n" |
45288 | "/// Establishes a linear address memory range to be monitored and puts\n" |
45289 | "/// the processor in the monitor event pending state. Data stored in the\n" |
45290 | "/// monitored address range causes the processor to exit the pending state.\n" |
45291 | "///\n" |
45292 | "/// \\headerfile <x86intrin.h>\n" |
45293 | "///\n" |
45294 | "/// This intrinsic corresponds to the <c> MONITOR </c> instruction.\n" |
45295 | "///\n" |
45296 | "/// \\param __p\n" |
45297 | "/// The memory range to be monitored. The size of the range is determined by\n" |
45298 | "/// CPUID function 0000_0005h.\n" |
45299 | "/// \\param __extensions\n" |
45300 | "/// Optional extensions for the monitoring state.\n" |
45301 | "/// \\param __hints\n" |
45302 | "/// Optional hints for the monitoring state.\n" |
45303 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
45304 | "_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)\n" |
45305 | "{\n" |
45306 | " __builtin_ia32_monitor((void *)__p, __extensions, __hints);\n" |
45307 | "}\n" |
45308 | "\n" |
45309 | "/// Used with the MONITOR instruction to wait while the processor is in\n" |
45310 | "/// the monitor event pending state. Data stored in the monitored address\n" |
45311 | "/// range causes the processor to exit the pending state.\n" |
45312 | "///\n" |
45313 | "/// \\headerfile <x86intrin.h>\n" |
45314 | "///\n" |
45315 | "/// This intrinsic corresponds to the <c> MWAIT </c> instruction.\n" |
45316 | "///\n" |
45317 | "/// \\param __extensions\n" |
45318 | "/// Optional extensions for the monitoring state, which may vary by\n" |
45319 | "/// processor.\n" |
45320 | "/// \\param __hints\n" |
45321 | "/// Optional hints for the monitoring state, which may vary by processor.\n" |
45322 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
45323 | "_mm_mwait(unsigned __extensions, unsigned __hints)\n" |
45324 | "{\n" |
45325 | " __builtin_ia32_mwait(__extensions, __hints);\n" |
45326 | "}\n" |
45327 | "\n" |
45328 | "#undef __DEFAULT_FN_ATTRS\n" |
45329 | "\n" |
45330 | "#endif /* __PMMINTRIN_H */\n" |
45331 | "" } , |
45332 | { "/builtins/popcntintrin.h" , "/*===---- popcntintrin.h - POPCNT intrinsics -------------------------------===\n" |
45333 | " *\n" |
45334 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45335 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45336 | " * in the Software without restriction, including without limitation the rights\n" |
45337 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45338 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45339 | " * furnished to do so, subject to the following conditions:\n" |
45340 | " *\n" |
45341 | " * The above copyright notice and this permission notice shall be included in\n" |
45342 | " * all copies or substantial portions of the Software.\n" |
45343 | " *\n" |
45344 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45345 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45346 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45347 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45348 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45349 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45350 | " * THE SOFTWARE.\n" |
45351 | " *\n" |
45352 | " *===-----------------------------------------------------------------------===\n" |
45353 | " */\n" |
45354 | "\n" |
45355 | "#ifndef __POPCNTINTRIN_H\n" |
45356 | "#define __POPCNTINTRIN_H\n" |
45357 | "\n" |
45358 | "/* Define the default attributes for the functions in this file. */\n" |
45359 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"popcnt\")))\n" |
45360 | "\n" |
45361 | "/// Counts the number of bits in the source operand having a value of 1.\n" |
45362 | "///\n" |
45363 | "/// \\headerfile <x86intrin.h>\n" |
45364 | "///\n" |
45365 | "/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n" |
45366 | "///\n" |
45367 | "/// \\param __A\n" |
45368 | "/// An unsigned 32-bit integer operand.\n" |
45369 | "/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n" |
45370 | "/// source operand.\n" |
45371 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
45372 | "_mm_popcnt_u32(unsigned int __A)\n" |
45373 | "{\n" |
45374 | " return __builtin_popcount(__A);\n" |
45375 | "}\n" |
45376 | "\n" |
45377 | "/// Counts the number of bits in the source operand having a value of 1.\n" |
45378 | "///\n" |
45379 | "/// \\headerfile <x86intrin.h>\n" |
45380 | "///\n" |
45381 | "/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n" |
45382 | "///\n" |
45383 | "/// \\param __A\n" |
45384 | "/// A signed 32-bit integer operand.\n" |
45385 | "/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n" |
45386 | "/// source operand.\n" |
45387 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
45388 | "_popcnt32(int __A)\n" |
45389 | "{\n" |
45390 | " return __builtin_popcount(__A);\n" |
45391 | "}\n" |
45392 | "\n" |
45393 | "#ifdef __x86_64__\n" |
45394 | "/// Counts the number of bits in the source operand having a value of 1.\n" |
45395 | "///\n" |
45396 | "/// \\headerfile <x86intrin.h>\n" |
45397 | "///\n" |
45398 | "/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n" |
45399 | "///\n" |
45400 | "/// \\param __A\n" |
45401 | "/// An unsigned 64-bit integer operand.\n" |
45402 | "/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n" |
45403 | "/// source operand.\n" |
45404 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
45405 | "_mm_popcnt_u64(unsigned long long __A)\n" |
45406 | "{\n" |
45407 | " return __builtin_popcountll(__A);\n" |
45408 | "}\n" |
45409 | "\n" |
45410 | "/// Counts the number of bits in the source operand having a value of 1.\n" |
45411 | "///\n" |
45412 | "/// \\headerfile <x86intrin.h>\n" |
45413 | "///\n" |
45414 | "/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n" |
45415 | "///\n" |
45416 | "/// \\param __A\n" |
45417 | "/// A signed 64-bit integer operand.\n" |
45418 | "/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n" |
45419 | "/// source operand.\n" |
45420 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
45421 | "_popcnt64(long long __A)\n" |
45422 | "{\n" |
45423 | " return __builtin_popcountll(__A);\n" |
45424 | "}\n" |
45425 | "#endif /* __x86_64__ */\n" |
45426 | "\n" |
45427 | "#undef __DEFAULT_FN_ATTRS\n" |
45428 | "\n" |
45429 | "#endif /* __POPCNTINTRIN_H */\n" |
45430 | "" } , |
45431 | { "/builtins/prfchwintrin.h" , "/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------===\n" |
45432 | " *\n" |
45433 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45434 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45435 | " * in the Software without restriction, including without limitation the rights\n" |
45436 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45437 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45438 | " * furnished to do so, subject to the following conditions:\n" |
45439 | " *\n" |
45440 | " * The above copyright notice and this permission notice shall be included in\n" |
45441 | " * all copies or substantial portions of the Software.\n" |
45442 | " *\n" |
45443 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45444 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45445 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45446 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45447 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45448 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45449 | " * THE SOFTWARE.\n" |
45450 | " *\n" |
45451 | " *===-----------------------------------------------------------------------===\n" |
45452 | " */\n" |
45453 | "\n" |
45454 | "#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)\n" |
45455 | "#error \"Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead.\"\n" |
45456 | "#endif\n" |
45457 | "\n" |
45458 | "#ifndef __PRFCHWINTRIN_H\n" |
45459 | "#define __PRFCHWINTRIN_H\n" |
45460 | "\n" |
45461 | "/// Loads a memory sequence containing the specified memory address into\n" |
45462 | "/// all data cache levels. The cache-coherency state is set to exclusive.\n" |
45463 | "/// Data can be read from and written to the cache line without additional\n" |
45464 | "/// delay.\n" |
45465 | "///\n" |
45466 | "/// \\headerfile <x86intrin.h>\n" |
45467 | "///\n" |
45468 | "/// This intrinsic corresponds to the \\c PREFETCHT0 instruction.\n" |
45469 | "///\n" |
45470 | "/// \\param __P\n" |
45471 | "/// A pointer specifying the memory address to be prefetched.\n" |
45472 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
45473 | "_m_prefetch(void *__P)\n" |
45474 | "{\n" |
45475 | " __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);\n" |
45476 | "}\n" |
45477 | "\n" |
45478 | "/// Loads a memory sequence containing the specified memory address into\n" |
45479 | "/// the L1 data cache and sets the cache-coherency to modified. This\n" |
45480 | "/// provides a hint to the processor that the cache line will be modified.\n" |
45481 | "/// It is intended for use when the cache line will be written to shortly\n" |
45482 | "/// after the prefetch is performed.\n" |
45483 | "///\n" |
45484 | "/// Note that the effect of this intrinsic is dependent on the processor\n" |
45485 | "/// implementation.\n" |
45486 | "///\n" |
45487 | "/// \\headerfile <x86intrin.h>\n" |
45488 | "///\n" |
45489 | "/// This intrinsic corresponds to the \\c PREFETCHW instruction.\n" |
45490 | "///\n" |
45491 | "/// \\param __P\n" |
45492 | "/// A pointer specifying the memory address to be prefetched.\n" |
45493 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
45494 | "_m_prefetchw(void *__P)\n" |
45495 | "{\n" |
45496 | " __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);\n" |
45497 | "}\n" |
45498 | "\n" |
45499 | "#endif /* __PRFCHWINTRIN_H */\n" |
45500 | "" } , |
45501 | { "/builtins/ptwriteintrin.h" , "/*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------===\n" |
45502 | " *\n" |
45503 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45504 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45505 | " * in the Software without restriction, including without limitation the rights\n" |
45506 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45507 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45508 | " * furnished to do so, subject to the following conditions:\n" |
45509 | " *\n" |
45510 | " * The above copyright notice and this permission notice shall be included in\n" |
45511 | " * all copies or substantial portions of the Software.\n" |
45512 | " *\n" |
45513 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45514 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45515 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45516 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45517 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45518 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45519 | " * THE SOFTWARE.\n" |
45520 | " *\n" |
45521 | " *===-----------------------------------------------------------------------===\n" |
45522 | " */\n" |
45523 | "\n" |
45524 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
45525 | "#error \"Never use <ptwriteintrin.h> directly; include <x86intrin.h> instead.\"\n" |
45526 | "#endif\n" |
45527 | "\n" |
45528 | "#ifndef __PTWRITEINTRIN_H\n" |
45529 | "#define __PTWRITEINTRIN_H\n" |
45530 | "\n" |
45531 | "/* Define the default attributes for the functions in this file. */\n" |
45532 | "#define __DEFAULT_FN_ATTRS \\\n" |
45533 | " __attribute__((__always_inline__, __nodebug__, __target__(\"ptwrite\")))\n" |
45534 | "\n" |
45535 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
45536 | "_ptwrite32(unsigned int __value) {\n" |
45537 | " __builtin_ia32_ptwrite32(__value);\n" |
45538 | "}\n" |
45539 | "\n" |
45540 | "#ifdef __x86_64__\n" |
45541 | "\n" |
45542 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
45543 | "_ptwrite64(unsigned long long __value) {\n" |
45544 | " __builtin_ia32_ptwrite64(__value);\n" |
45545 | "}\n" |
45546 | "\n" |
45547 | "#endif /* __x86_64__ */\n" |
45548 | "\n" |
45549 | "#undef __DEFAULT_FN_ATTRS\n" |
45550 | "\n" |
45551 | "#endif /* __PTWRITEINTRIN_H */\n" |
45552 | "" } , |
45553 | { "/builtins/rdseedintrin.h" , "/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------===\n" |
45554 | " *\n" |
45555 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45556 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45557 | " * in the Software without restriction, including without limitation the rights\n" |
45558 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45559 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45560 | " * furnished to do so, subject to the following conditions:\n" |
45561 | " *\n" |
45562 | " * The above copyright notice and this permission notice shall be included in\n" |
45563 | " * all copies or substantial portions of the Software.\n" |
45564 | " *\n" |
45565 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45566 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45567 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45568 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45569 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45570 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45571 | " * THE SOFTWARE.\n" |
45572 | " *\n" |
45573 | " *===-----------------------------------------------------------------------===\n" |
45574 | " */\n" |
45575 | "\n" |
45576 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
45577 | "#error \"Never use <rdseedintrin.h> directly; include <x86intrin.h> instead.\"\n" |
45578 | "#endif\n" |
45579 | "\n" |
45580 | "#ifndef __RDSEEDINTRIN_H\n" |
45581 | "#define __RDSEEDINTRIN_H\n" |
45582 | "\n" |
45583 | "/* Define the default attributes for the functions in this file. */\n" |
45584 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rdseed\")))\n" |
45585 | "\n" |
45586 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
45587 | "_rdseed16_step(unsigned short *__p)\n" |
45588 | "{\n" |
45589 | " return __builtin_ia32_rdseed16_step(__p);\n" |
45590 | "}\n" |
45591 | "\n" |
45592 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
45593 | "_rdseed32_step(unsigned int *__p)\n" |
45594 | "{\n" |
45595 | " return __builtin_ia32_rdseed32_step(__p);\n" |
45596 | "}\n" |
45597 | "\n" |
45598 | "#ifdef __x86_64__\n" |
45599 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
45600 | "_rdseed64_step(unsigned long long *__p)\n" |
45601 | "{\n" |
45602 | " return __builtin_ia32_rdseed64_step(__p);\n" |
45603 | "}\n" |
45604 | "#endif\n" |
45605 | "\n" |
45606 | "#undef __DEFAULT_FN_ATTRS\n" |
45607 | "\n" |
45608 | "#endif /* __RDSEEDINTRIN_H */\n" |
45609 | "" } , |
45610 | { "/builtins/rtmintrin.h" , "/*===---- rtmintrin.h - RTM intrinsics -------------------------------------===\n" |
45611 | " *\n" |
45612 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45613 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45614 | " * in the Software without restriction, including without limitation the rights\n" |
45615 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45616 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45617 | " * furnished to do so, subject to the following conditions:\n" |
45618 | " *\n" |
45619 | " * The above copyright notice and this permission notice shall be included in\n" |
45620 | " * all copies or substantial portions of the Software.\n" |
45621 | " *\n" |
45622 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45623 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45624 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45625 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45626 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45627 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45628 | " * THE SOFTWARE.\n" |
45629 | " *\n" |
45630 | " *===-----------------------------------------------------------------------===\n" |
45631 | " */\n" |
45632 | "\n" |
45633 | "#ifndef __IMMINTRIN_H\n" |
45634 | "#error \"Never use <rtmintrin.h> directly; include <immintrin.h> instead.\"\n" |
45635 | "#endif\n" |
45636 | "\n" |
45637 | "#ifndef __RTMINTRIN_H\n" |
45638 | "#define __RTMINTRIN_H\n" |
45639 | "\n" |
45640 | "#define _XBEGIN_STARTED (~0u)\n" |
45641 | "#define _XABORT_EXPLICIT (1 << 0)\n" |
45642 | "#define _XABORT_RETRY (1 << 1)\n" |
45643 | "#define _XABORT_CONFLICT (1 << 2)\n" |
45644 | "#define _XABORT_CAPACITY (1 << 3)\n" |
45645 | "#define _XABORT_DEBUG (1 << 4)\n" |
45646 | "#define _XABORT_NESTED (1 << 5)\n" |
45647 | "#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)\n" |
45648 | "\n" |
45649 | "/* Define the default attributes for the functions in this file. */\n" |
45650 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n" |
45651 | "\n" |
45652 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
45653 | "_xbegin(void)\n" |
45654 | "{\n" |
45655 | " return __builtin_ia32_xbegin();\n" |
45656 | "}\n" |
45657 | "\n" |
45658 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
45659 | "_xend(void)\n" |
45660 | "{\n" |
45661 | " __builtin_ia32_xend();\n" |
45662 | "}\n" |
45663 | "\n" |
45664 | "#define _xabort(imm) __builtin_ia32_xabort((imm))\n" |
45665 | "\n" |
45666 | "#undef __DEFAULT_FN_ATTRS\n" |
45667 | "\n" |
45668 | "#endif /* __RTMINTRIN_H */\n" |
45669 | "" } , |
45670 | { "/builtins/s390intrin.h" , "/*===---- s390intrin.h - SystemZ intrinsics --------------------------------===\n" |
45671 | " *\n" |
45672 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45673 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45674 | " * in the Software without restriction, including without limitation the rights\n" |
45675 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45676 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45677 | " * furnished to do so, subject to the following conditions:\n" |
45678 | " *\n" |
45679 | " * The above copyright notice and this permission notice shall be included in\n" |
45680 | " * all copies or substantial portions of the Software.\n" |
45681 | " *\n" |
45682 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45683 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45684 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45685 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45686 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45687 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45688 | " * THE SOFTWARE.\n" |
45689 | " *\n" |
45690 | " *===-----------------------------------------------------------------------===\n" |
45691 | " */\n" |
45692 | "\n" |
45693 | "#ifndef __S390INTRIN_H\n" |
45694 | "#define __S390INTRIN_H\n" |
45695 | "\n" |
45696 | "#ifndef __s390__\n" |
45697 | "#error \"<s390intrin.h> is for s390 only\"\n" |
45698 | "#endif\n" |
45699 | "\n" |
45700 | "#ifdef __HTM__\n" |
45701 | "#include <htmintrin.h>\n" |
45702 | "#endif\n" |
45703 | "\n" |
45704 | "#ifdef __VEC__\n" |
45705 | "#include <vecintrin.h>\n" |
45706 | "#endif\n" |
45707 | "\n" |
45708 | "#endif /* __S390INTRIN_H*/\n" |
45709 | "" } , |
45710 | { "/builtins/sgxintrin.h" , "/*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------===\n" |
45711 | " *\n" |
45712 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45713 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45714 | " * in the Software without restriction, including without limitation the rights\n" |
45715 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45716 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45717 | " * furnished to do so, subject to the following conditions:\n" |
45718 | " *\n" |
45719 | " * The above copyright notice and this permission notice shall be included in\n" |
45720 | " * all copies or substantial portions of the Software.\n" |
45721 | " *\n" |
45722 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45723 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45724 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45725 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45726 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45727 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45728 | " * THE SOFTWARE.\n" |
45729 | " *\n" |
45730 | " *===-----------------------------------------------------------------------===\n" |
45731 | " */\n" |
45732 | "\n" |
45733 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
45734 | "#error \"Never use <sgxintrin.h> directly; include <x86intrin.h> instead.\"\n" |
45735 | "#endif\n" |
45736 | "\n" |
45737 | "#ifndef __SGXINTRIN_H\n" |
45738 | "#define __SGXINTRIN_H\n" |
45739 | "\n" |
45740 | "/* Define the default attributes for the functions in this file. */\n" |
45741 | "#define __DEFAULT_FN_ATTRS \\\n" |
45742 | " __attribute__((__always_inline__, __nodebug__, __target__(\"sgx\")))\n" |
45743 | "\n" |
45744 | "static __inline unsigned int __DEFAULT_FN_ATTRS\n" |
45745 | "_enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n" |
45746 | "{\n" |
45747 | " unsigned int __result;\n" |
45748 | " __asm__ (\"enclu\"\n" |
45749 | " : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n" |
45750 | " : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n" |
45751 | " : \"cc\");\n" |
45752 | " return __result;\n" |
45753 | "}\n" |
45754 | "\n" |
45755 | "static __inline unsigned int __DEFAULT_FN_ATTRS\n" |
45756 | "_encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n" |
45757 | "{\n" |
45758 | " unsigned int __result;\n" |
45759 | " __asm__ (\"encls\"\n" |
45760 | " : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n" |
45761 | " : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n" |
45762 | " : \"cc\");\n" |
45763 | " return __result;\n" |
45764 | "}\n" |
45765 | "\n" |
45766 | "static __inline unsigned int __DEFAULT_FN_ATTRS\n" |
45767 | "_enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n" |
45768 | "{\n" |
45769 | " unsigned int __result;\n" |
45770 | " __asm__ (\"enclv\"\n" |
45771 | " : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n" |
45772 | " : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n" |
45773 | " : \"cc\");\n" |
45774 | " return __result;\n" |
45775 | "}\n" |
45776 | "\n" |
45777 | "#undef __DEFAULT_FN_ATTRS\n" |
45778 | "\n" |
45779 | "#endif\n" |
45780 | "" } , |
45781 | { "/builtins/shaintrin.h" , "/*===---- shaintrin.h - SHA intrinsics -------------------------------------===\n" |
45782 | " *\n" |
45783 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45784 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45785 | " * in the Software without restriction, including without limitation the rights\n" |
45786 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45787 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45788 | " * furnished to do so, subject to the following conditions:\n" |
45789 | " *\n" |
45790 | " * The above copyright notice and this permission notice shall be included in\n" |
45791 | " * all copies or substantial portions of the Software.\n" |
45792 | " *\n" |
45793 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45794 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45795 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45796 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45797 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45798 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45799 | " * THE SOFTWARE.\n" |
45800 | " *\n" |
45801 | " *===-----------------------------------------------------------------------===\n" |
45802 | " */\n" |
45803 | "\n" |
45804 | "#ifndef __IMMINTRIN_H\n" |
45805 | "#error \"Never use <shaintrin.h> directly; include <immintrin.h> instead.\"\n" |
45806 | "#endif\n" |
45807 | "\n" |
45808 | "#ifndef __SHAINTRIN_H\n" |
45809 | "#define __SHAINTRIN_H\n" |
45810 | "\n" |
45811 | "/* Define the default attributes for the functions in this file. */\n" |
45812 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sha\"), __min_vector_width__(128)))\n" |
45813 | "\n" |
45814 | "#define _mm_sha1rnds4_epu32(V1, V2, M) \\\n" |
45815 | " __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M))\n" |
45816 | "\n" |
45817 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
45818 | "_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)\n" |
45819 | "{\n" |
45820 | " return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y);\n" |
45821 | "}\n" |
45822 | "\n" |
45823 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
45824 | "_mm_sha1msg1_epu32(__m128i __X, __m128i __Y)\n" |
45825 | "{\n" |
45826 | " return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y);\n" |
45827 | "}\n" |
45828 | "\n" |
45829 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
45830 | "_mm_sha1msg2_epu32(__m128i __X, __m128i __Y)\n" |
45831 | "{\n" |
45832 | " return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y);\n" |
45833 | "}\n" |
45834 | "\n" |
45835 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
45836 | "_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)\n" |
45837 | "{\n" |
45838 | " return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z);\n" |
45839 | "}\n" |
45840 | "\n" |
45841 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
45842 | "_mm_sha256msg1_epu32(__m128i __X, __m128i __Y)\n" |
45843 | "{\n" |
45844 | " return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y);\n" |
45845 | "}\n" |
45846 | "\n" |
45847 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
45848 | "_mm_sha256msg2_epu32(__m128i __X, __m128i __Y)\n" |
45849 | "{\n" |
45850 | " return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y);\n" |
45851 | "}\n" |
45852 | "\n" |
45853 | "#undef __DEFAULT_FN_ATTRS\n" |
45854 | "\n" |
45855 | "#endif /* __SHAINTRIN_H */\n" |
45856 | "" } , |
45857 | { "/builtins/smmintrin.h" , "/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===\n" |
45858 | " *\n" |
45859 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
45860 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
45861 | " * in the Software without restriction, including without limitation the rights\n" |
45862 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
45863 | " * copies of the Software, and to permit persons to whom the Software is\n" |
45864 | " * furnished to do so, subject to the following conditions:\n" |
45865 | " *\n" |
45866 | " * The above copyright notice and this permission notice shall be included in\n" |
45867 | " * all copies or substantial portions of the Software.\n" |
45868 | " *\n" |
45869 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
45870 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
45871 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
45872 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
45873 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
45874 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
45875 | " * THE SOFTWARE.\n" |
45876 | " *\n" |
45877 | " *===-----------------------------------------------------------------------===\n" |
45878 | " */\n" |
45879 | "\n" |
45880 | "#ifndef __SMMINTRIN_H\n" |
45881 | "#define __SMMINTRIN_H\n" |
45882 | "\n" |
45883 | "#include <tmmintrin.h>\n" |
45884 | "\n" |
45885 | "/* Define the default attributes for the functions in this file. */\n" |
45886 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.1\"), __min_vector_width__(128)))\n" |
45887 | "\n" |
45888 | "/* SSE4 Rounding macros. */\n" |
45889 | "#define _MM_FROUND_TO_NEAREST_INT 0x00\n" |
45890 | "#define _MM_FROUND_TO_NEG_INF 0x01\n" |
45891 | "#define _MM_FROUND_TO_POS_INF 0x02\n" |
45892 | "#define _MM_FROUND_TO_ZERO 0x03\n" |
45893 | "#define _MM_FROUND_CUR_DIRECTION 0x04\n" |
45894 | "\n" |
45895 | "#define _MM_FROUND_RAISE_EXC 0x00\n" |
45896 | "#define _MM_FROUND_NO_EXC 0x08\n" |
45897 | "\n" |
45898 | "#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT)\n" |
45899 | "#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF)\n" |
45900 | "#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF)\n" |
45901 | "#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO)\n" |
45902 | "#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)\n" |
45903 | "#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)\n" |
45904 | "\n" |
45905 | "/// Rounds up each element of the 128-bit vector of [4 x float] to an\n" |
45906 | "/// integer and returns the rounded values in a 128-bit vector of\n" |
45907 | "/// [4 x float].\n" |
45908 | "///\n" |
45909 | "/// \\headerfile <x86intrin.h>\n" |
45910 | "///\n" |
45911 | "/// \\code\n" |
45912 | "/// __m128 _mm_ceil_ps(__m128 X);\n" |
45913 | "/// \\endcode\n" |
45914 | "///\n" |
45915 | "/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n" |
45916 | "///\n" |
45917 | "/// \\param X\n" |
45918 | "/// A 128-bit vector of [4 x float] values to be rounded up.\n" |
45919 | "/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n" |
45920 | "#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)\n" |
45921 | "\n" |
45922 | "/// Rounds up each element of the 128-bit vector of [2 x double] to an\n" |
45923 | "/// integer and returns the rounded values in a 128-bit vector of\n" |
45924 | "/// [2 x double].\n" |
45925 | "///\n" |
45926 | "/// \\headerfile <x86intrin.h>\n" |
45927 | "///\n" |
45928 | "/// \\code\n" |
45929 | "/// __m128d _mm_ceil_pd(__m128d X);\n" |
45930 | "/// \\endcode\n" |
45931 | "///\n" |
45932 | "/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n" |
45933 | "///\n" |
45934 | "/// \\param X\n" |
45935 | "/// A 128-bit vector of [2 x double] values to be rounded up.\n" |
45936 | "/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n" |
45937 | "#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)\n" |
45938 | "\n" |
45939 | "/// Copies three upper elements of the first 128-bit vector operand to\n" |
45940 | "/// the corresponding three upper elements of the 128-bit result vector of\n" |
45941 | "/// [4 x float]. Rounds up the lowest element of the second 128-bit vector\n" |
45942 | "/// operand to an integer and copies it to the lowest element of the 128-bit\n" |
45943 | "/// result vector of [4 x float].\n" |
45944 | "///\n" |
45945 | "/// \\headerfile <x86intrin.h>\n" |
45946 | "///\n" |
45947 | "/// \\code\n" |
45948 | "/// __m128 _mm_ceil_ss(__m128 X, __m128 Y);\n" |
45949 | "/// \\endcode\n" |
45950 | "///\n" |
45951 | "/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n" |
45952 | "///\n" |
45953 | "/// \\param X\n" |
45954 | "/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n" |
45955 | "/// copied to the corresponding bits of the result.\n" |
45956 | "/// \\param Y\n" |
45957 | "/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n" |
45958 | "/// rounded up to the nearest integer and copied to the corresponding bits\n" |
45959 | "/// of the result.\n" |
45960 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n" |
45961 | "/// values.\n" |
45962 | "#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)\n" |
45963 | "\n" |
45964 | "/// Copies the upper element of the first 128-bit vector operand to the\n" |
45965 | "/// corresponding upper element of the 128-bit result vector of [2 x double].\n" |
45966 | "/// Rounds up the lower element of the second 128-bit vector operand to an\n" |
45967 | "/// integer and copies it to the lower element of the 128-bit result vector\n" |
45968 | "/// of [2 x double].\n" |
45969 | "///\n" |
45970 | "/// \\headerfile <x86intrin.h>\n" |
45971 | "///\n" |
45972 | "/// \\code\n" |
45973 | "/// __m128d _mm_ceil_sd(__m128d X, __m128d Y);\n" |
45974 | "/// \\endcode\n" |
45975 | "///\n" |
45976 | "/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n" |
45977 | "///\n" |
45978 | "/// \\param X\n" |
45979 | "/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n" |
45980 | "/// copied to the corresponding bits of the result.\n" |
45981 | "/// \\param Y\n" |
45982 | "/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n" |
45983 | "/// rounded up to the nearest integer and copied to the corresponding bits\n" |
45984 | "/// of the result.\n" |
45985 | "/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n" |
45986 | "/// values.\n" |
45987 | "#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)\n" |
45988 | "\n" |
45989 | "/// Rounds down each element of the 128-bit vector of [4 x float] to an\n" |
45990 | "/// an integer and returns the rounded values in a 128-bit vector of\n" |
45991 | "/// [4 x float].\n" |
45992 | "///\n" |
45993 | "/// \\headerfile <x86intrin.h>\n" |
45994 | "///\n" |
45995 | "/// \\code\n" |
45996 | "/// __m128 _mm_floor_ps(__m128 X);\n" |
45997 | "/// \\endcode\n" |
45998 | "///\n" |
45999 | "/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n" |
46000 | "///\n" |
46001 | "/// \\param X\n" |
46002 | "/// A 128-bit vector of [4 x float] values to be rounded down.\n" |
46003 | "/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n" |
46004 | "#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)\n" |
46005 | "\n" |
46006 | "/// Rounds down each element of the 128-bit vector of [2 x double] to an\n" |
46007 | "/// integer and returns the rounded values in a 128-bit vector of\n" |
46008 | "/// [2 x double].\n" |
46009 | "///\n" |
46010 | "/// \\headerfile <x86intrin.h>\n" |
46011 | "///\n" |
46012 | "/// \\code\n" |
46013 | "/// __m128d _mm_floor_pd(__m128d X);\n" |
46014 | "/// \\endcode\n" |
46015 | "///\n" |
46016 | "/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n" |
46017 | "///\n" |
46018 | "/// \\param X\n" |
46019 | "/// A 128-bit vector of [2 x double].\n" |
46020 | "/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n" |
46021 | "#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)\n" |
46022 | "\n" |
46023 | "/// Copies three upper elements of the first 128-bit vector operand to\n" |
46024 | "/// the corresponding three upper elements of the 128-bit result vector of\n" |
46025 | "/// [4 x float]. Rounds down the lowest element of the second 128-bit vector\n" |
46026 | "/// operand to an integer and copies it to the lowest element of the 128-bit\n" |
46027 | "/// result vector of [4 x float].\n" |
46028 | "///\n" |
46029 | "/// \\headerfile <x86intrin.h>\n" |
46030 | "///\n" |
46031 | "/// \\code\n" |
46032 | "/// __m128 _mm_floor_ss(__m128 X, __m128 Y);\n" |
46033 | "/// \\endcode\n" |
46034 | "///\n" |
46035 | "/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n" |
46036 | "///\n" |
46037 | "/// \\param X\n" |
46038 | "/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n" |
46039 | "/// copied to the corresponding bits of the result.\n" |
46040 | "/// \\param Y\n" |
46041 | "/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n" |
46042 | "/// rounded down to the nearest integer and copied to the corresponding bits\n" |
46043 | "/// of the result.\n" |
46044 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n" |
46045 | "/// values.\n" |
46046 | "#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)\n" |
46047 | "\n" |
46048 | "/// Copies the upper element of the first 128-bit vector operand to the\n" |
46049 | "/// corresponding upper element of the 128-bit result vector of [2 x double].\n" |
46050 | "/// Rounds down the lower element of the second 128-bit vector operand to an\n" |
46051 | "/// integer and copies it to the lower element of the 128-bit result vector\n" |
46052 | "/// of [2 x double].\n" |
46053 | "///\n" |
46054 | "/// \\headerfile <x86intrin.h>\n" |
46055 | "///\n" |
46056 | "/// \\code\n" |
46057 | "/// __m128d _mm_floor_sd(__m128d X, __m128d Y);\n" |
46058 | "/// \\endcode\n" |
46059 | "///\n" |
46060 | "/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n" |
46061 | "///\n" |
46062 | "/// \\param X\n" |
46063 | "/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n" |
46064 | "/// copied to the corresponding bits of the result.\n" |
46065 | "/// \\param Y\n" |
46066 | "/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n" |
46067 | "/// rounded down to the nearest integer and copied to the corresponding bits\n" |
46068 | "/// of the result.\n" |
46069 | "/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n" |
46070 | "/// values.\n" |
46071 | "#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)\n" |
46072 | "\n" |
46073 | "/// Rounds each element of the 128-bit vector of [4 x float] to an\n" |
46074 | "/// integer value according to the rounding control specified by the second\n" |
46075 | "/// argument and returns the rounded values in a 128-bit vector of\n" |
46076 | "/// [4 x float].\n" |
46077 | "///\n" |
46078 | "/// \\headerfile <x86intrin.h>\n" |
46079 | "///\n" |
46080 | "/// \\code\n" |
46081 | "/// __m128 _mm_round_ps(__m128 X, const int M);\n" |
46082 | "/// \\endcode\n" |
46083 | "///\n" |
46084 | "/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n" |
46085 | "///\n" |
46086 | "/// \\param X\n" |
46087 | "/// A 128-bit vector of [4 x float].\n" |
46088 | "/// \\param M\n" |
46089 | "/// An integer value that specifies the rounding operation. \\n\n" |
46090 | "/// Bits [7:4] are reserved. \\n\n" |
46091 | "/// Bit [3] is a precision exception value: \\n\n" |
46092 | "/// 0: A normal PE exception is used \\n\n" |
46093 | "/// 1: The PE field is not updated \\n\n" |
46094 | "/// Bit [2] is the rounding control source: \\n\n" |
46095 | "/// 0: Use bits [1:0] of \\a M \\n\n" |
46096 | "/// 1: Use the current MXCSR setting \\n\n" |
46097 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
46098 | "/// 00: Nearest \\n\n" |
46099 | "/// 01: Downward (toward negative infinity) \\n\n" |
46100 | "/// 10: Upward (toward positive infinity) \\n\n" |
46101 | "/// 11: Truncated\n" |
46102 | "/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n" |
46103 | "#define _mm_round_ps(X, M) \\\n" |
46104 | " (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))\n" |
46105 | "\n" |
46106 | "/// Copies three upper elements of the first 128-bit vector operand to\n" |
46107 | "/// the corresponding three upper elements of the 128-bit result vector of\n" |
46108 | "/// [4 x float]. Rounds the lowest element of the second 128-bit vector\n" |
46109 | "/// operand to an integer value according to the rounding control specified\n" |
46110 | "/// by the third argument and copies it to the lowest element of the 128-bit\n" |
46111 | "/// result vector of [4 x float].\n" |
46112 | "///\n" |
46113 | "/// \\headerfile <x86intrin.h>\n" |
46114 | "///\n" |
46115 | "/// \\code\n" |
46116 | "/// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M);\n" |
46117 | "/// \\endcode\n" |
46118 | "///\n" |
46119 | "/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n" |
46120 | "///\n" |
46121 | "/// \\param X\n" |
46122 | "/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n" |
46123 | "/// copied to the corresponding bits of the result.\n" |
46124 | "/// \\param Y\n" |
46125 | "/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n" |
46126 | "/// rounded to the nearest integer using the specified rounding control and\n" |
46127 | "/// copied to the corresponding bits of the result.\n" |
46128 | "/// \\param M\n" |
46129 | "/// An integer value that specifies the rounding operation. \\n\n" |
46130 | "/// Bits [7:4] are reserved. \\n\n" |
46131 | "/// Bit [3] is a precision exception value: \\n\n" |
46132 | "/// 0: A normal PE exception is used \\n\n" |
46133 | "/// 1: The PE field is not updated \\n\n" |
46134 | "/// Bit [2] is the rounding control source: \\n\n" |
46135 | "/// 0: Use bits [1:0] of \\a M \\n\n" |
46136 | "/// 1: Use the current MXCSR setting \\n\n" |
46137 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
46138 | "/// 00: Nearest \\n\n" |
46139 | "/// 01: Downward (toward negative infinity) \\n\n" |
46140 | "/// 10: Upward (toward positive infinity) \\n\n" |
46141 | "/// 11: Truncated\n" |
46142 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n" |
46143 | "/// values.\n" |
46144 | "#define _mm_round_ss(X, Y, M) \\\n" |
46145 | " (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \\\n" |
46146 | " (__v4sf)(__m128)(Y), (M))\n" |
46147 | "\n" |
46148 | "/// Rounds each element of the 128-bit vector of [2 x double] to an\n" |
46149 | "/// integer value according to the rounding control specified by the second\n" |
46150 | "/// argument and returns the rounded values in a 128-bit vector of\n" |
46151 | "/// [2 x double].\n" |
46152 | "///\n" |
46153 | "/// \\headerfile <x86intrin.h>\n" |
46154 | "///\n" |
46155 | "/// \\code\n" |
46156 | "/// __m128d _mm_round_pd(__m128d X, const int M);\n" |
46157 | "/// \\endcode\n" |
46158 | "///\n" |
46159 | "/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n" |
46160 | "///\n" |
46161 | "/// \\param X\n" |
46162 | "/// A 128-bit vector of [2 x double].\n" |
46163 | "/// \\param M\n" |
46164 | "/// An integer value that specifies the rounding operation. \\n\n" |
46165 | "/// Bits [7:4] are reserved. \\n\n" |
46166 | "/// Bit [3] is a precision exception value: \\n\n" |
46167 | "/// 0: A normal PE exception is used \\n\n" |
46168 | "/// 1: The PE field is not updated \\n\n" |
46169 | "/// Bit [2] is the rounding control source: \\n\n" |
46170 | "/// 0: Use bits [1:0] of \\a M \\n\n" |
46171 | "/// 1: Use the current MXCSR setting \\n\n" |
46172 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
46173 | "/// 00: Nearest \\n\n" |
46174 | "/// 01: Downward (toward negative infinity) \\n\n" |
46175 | "/// 10: Upward (toward positive infinity) \\n\n" |
46176 | "/// 11: Truncated\n" |
46177 | "/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n" |
46178 | "#define _mm_round_pd(X, M) \\\n" |
46179 | " (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))\n" |
46180 | "\n" |
46181 | "/// Copies the upper element of the first 128-bit vector operand to the\n" |
46182 | "/// corresponding upper element of the 128-bit result vector of [2 x double].\n" |
46183 | "/// Rounds the lower element of the second 128-bit vector operand to an\n" |
46184 | "/// integer value according to the rounding control specified by the third\n" |
46185 | "/// argument and copies it to the lower element of the 128-bit result vector\n" |
46186 | "/// of [2 x double].\n" |
46187 | "///\n" |
46188 | "/// \\headerfile <x86intrin.h>\n" |
46189 | "///\n" |
46190 | "/// \\code\n" |
46191 | "/// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M);\n" |
46192 | "/// \\endcode\n" |
46193 | "///\n" |
46194 | "/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n" |
46195 | "///\n" |
46196 | "/// \\param X\n" |
46197 | "/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n" |
46198 | "/// copied to the corresponding bits of the result.\n" |
46199 | "/// \\param Y\n" |
46200 | "/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n" |
46201 | "/// rounded to the nearest integer using the specified rounding control and\n" |
46202 | "/// copied to the corresponding bits of the result.\n" |
46203 | "/// \\param M\n" |
46204 | "/// An integer value that specifies the rounding operation. \\n\n" |
46205 | "/// Bits [7:4] are reserved. \\n\n" |
46206 | "/// Bit [3] is a precision exception value: \\n\n" |
46207 | "/// 0: A normal PE exception is used \\n\n" |
46208 | "/// 1: The PE field is not updated \\n\n" |
46209 | "/// Bit [2] is the rounding control source: \\n\n" |
46210 | "/// 0: Use bits [1:0] of \\a M \\n\n" |
46211 | "/// 1: Use the current MXCSR setting \\n\n" |
46212 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
46213 | "/// 00: Nearest \\n\n" |
46214 | "/// 01: Downward (toward negative infinity) \\n\n" |
46215 | "/// 10: Upward (toward positive infinity) \\n\n" |
46216 | "/// 11: Truncated\n" |
46217 | "/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n" |
46218 | "/// values.\n" |
46219 | "#define _mm_round_sd(X, Y, M) \\\n" |
46220 | " (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \\\n" |
46221 | " (__v2df)(__m128d)(Y), (M))\n" |
46222 | "\n" |
46223 | "/* SSE4 Packed Blending Intrinsics. */\n" |
46224 | "/// Returns a 128-bit vector of [2 x double] where the values are\n" |
46225 | "/// selected from either the first or second operand as specified by the\n" |
46226 | "/// third operand, the control mask.\n" |
46227 | "///\n" |
46228 | "/// \\headerfile <x86intrin.h>\n" |
46229 | "///\n" |
46230 | "/// \\code\n" |
46231 | "/// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M);\n" |
46232 | "/// \\endcode\n" |
46233 | "///\n" |
46234 | "/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n" |
46235 | "///\n" |
46236 | "/// \\param V1\n" |
46237 | "/// A 128-bit vector of [2 x double].\n" |
46238 | "/// \\param V2\n" |
46239 | "/// A 128-bit vector of [2 x double].\n" |
46240 | "/// \\param M\n" |
46241 | "/// An immediate integer operand, with mask bits [1:0] specifying how the\n" |
46242 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
46243 | "/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n" |
46244 | "/// element in operand \\a V1 is copied to the same position in the result.\n" |
46245 | "/// When a mask bit is 1, the corresponding 64-bit element in operand \\a V2\n" |
46246 | "/// is copied to the same position in the result.\n" |
46247 | "/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n" |
46248 | "#define _mm_blend_pd(V1, V2, M) \\\n" |
46249 | " (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \\\n" |
46250 | " (__v2df)(__m128d)(V2), (int)(M))\n" |
46251 | "\n" |
46252 | "/// Returns a 128-bit vector of [4 x float] where the values are selected\n" |
46253 | "/// from either the first or second operand as specified by the third\n" |
46254 | "/// operand, the control mask.\n" |
46255 | "///\n" |
46256 | "/// \\headerfile <x86intrin.h>\n" |
46257 | "///\n" |
46258 | "/// \\code\n" |
46259 | "/// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M);\n" |
46260 | "/// \\endcode\n" |
46261 | "///\n" |
46262 | "/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS </c> instruction.\n" |
46263 | "///\n" |
46264 | "/// \\param V1\n" |
46265 | "/// A 128-bit vector of [4 x float].\n" |
46266 | "/// \\param V2\n" |
46267 | "/// A 128-bit vector of [4 x float].\n" |
46268 | "/// \\param M\n" |
46269 | "/// An immediate integer operand, with mask bits [3:0] specifying how the\n" |
46270 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
46271 | "/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n" |
46272 | "/// element in operand \\a V1 is copied to the same position in the result.\n" |
46273 | "/// When a mask bit is 1, the corresponding 32-bit element in operand \\a V2\n" |
46274 | "/// is copied to the same position in the result.\n" |
46275 | "/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n" |
46276 | "#define _mm_blend_ps(V1, V2, M) \\\n" |
46277 | " (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \\\n" |
46278 | " (__v4sf)(__m128)(V2), (int)(M))\n" |
46279 | "\n" |
46280 | "/// Returns a 128-bit vector of [2 x double] where the values are\n" |
46281 | "/// selected from either the first or second operand as specified by the\n" |
46282 | "/// third operand, the control mask.\n" |
46283 | "///\n" |
46284 | "/// \\headerfile <x86intrin.h>\n" |
46285 | "///\n" |
46286 | "/// This intrinsic corresponds to the <c> VBLENDVPD / BLENDVPD </c> instruction.\n" |
46287 | "///\n" |
46288 | "/// \\param __V1\n" |
46289 | "/// A 128-bit vector of [2 x double].\n" |
46290 | "/// \\param __V2\n" |
46291 | "/// A 128-bit vector of [2 x double].\n" |
46292 | "/// \\param __M\n" |
46293 | "/// A 128-bit vector operand, with mask bits 127 and 63 specifying how the\n" |
46294 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
46295 | "/// most significant bit of a copied value. When a mask bit is 0, the\n" |
46296 | "/// corresponding 64-bit element in operand \\a __V1 is copied to the same\n" |
46297 | "/// position in the result. When a mask bit is 1, the corresponding 64-bit\n" |
46298 | "/// element in operand \\a __V2 is copied to the same position in the result.\n" |
46299 | "/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n" |
46300 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
46301 | "_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)\n" |
46302 | "{\n" |
46303 | " return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,\n" |
46304 | " (__v2df)__M);\n" |
46305 | "}\n" |
46306 | "\n" |
46307 | "/// Returns a 128-bit vector of [4 x float] where the values are\n" |
46308 | "/// selected from either the first or second operand as specified by the\n" |
46309 | "/// third operand, the control mask.\n" |
46310 | "///\n" |
46311 | "/// \\headerfile <x86intrin.h>\n" |
46312 | "///\n" |
46313 | "/// This intrinsic corresponds to the <c> VBLENDVPS / BLENDVPS </c> instruction.\n" |
46314 | "///\n" |
46315 | "/// \\param __V1\n" |
46316 | "/// A 128-bit vector of [4 x float].\n" |
46317 | "/// \\param __V2\n" |
46318 | "/// A 128-bit vector of [4 x float].\n" |
46319 | "/// \\param __M\n" |
46320 | "/// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying\n" |
46321 | "/// how the values are to be copied. The position of the mask bit corresponds\n" |
46322 | "/// to the most significant bit of a copied value. When a mask bit is 0, the\n" |
46323 | "/// corresponding 32-bit element in operand \\a __V1 is copied to the same\n" |
46324 | "/// position in the result. When a mask bit is 1, the corresponding 32-bit\n" |
46325 | "/// element in operand \\a __V2 is copied to the same position in the result.\n" |
46326 | "/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n" |
46327 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
46328 | "_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)\n" |
46329 | "{\n" |
46330 | " return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,\n" |
46331 | " (__v4sf)__M);\n" |
46332 | "}\n" |
46333 | "\n" |
46334 | "/// Returns a 128-bit vector of [16 x i8] where the values are selected\n" |
46335 | "/// from either of the first or second operand as specified by the third\n" |
46336 | "/// operand, the control mask.\n" |
46337 | "///\n" |
46338 | "/// \\headerfile <x86intrin.h>\n" |
46339 | "///\n" |
46340 | "/// This intrinsic corresponds to the <c> VPBLENDVB / PBLENDVB </c> instruction.\n" |
46341 | "///\n" |
46342 | "/// \\param __V1\n" |
46343 | "/// A 128-bit vector of [16 x i8].\n" |
46344 | "/// \\param __V2\n" |
46345 | "/// A 128-bit vector of [16 x i8].\n" |
46346 | "/// \\param __M\n" |
46347 | "/// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying\n" |
46348 | "/// how the values are to be copied. The position of the mask bit corresponds\n" |
46349 | "/// to the most significant bit of a copied value. When a mask bit is 0, the\n" |
46350 | "/// corresponding 8-bit element in operand \\a __V1 is copied to the same\n" |
46351 | "/// position in the result. When a mask bit is 1, the corresponding 8-bit\n" |
46352 | "/// element in operand \\a __V2 is copied to the same position in the result.\n" |
46353 | "/// \\returns A 128-bit vector of [16 x i8] containing the copied values.\n" |
46354 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46355 | "_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)\n" |
46356 | "{\n" |
46357 | " return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,\n" |
46358 | " (__v16qi)__M);\n" |
46359 | "}\n" |
46360 | "\n" |
46361 | "/// Returns a 128-bit vector of [8 x i16] where the values are selected\n" |
46362 | "/// from either of the first or second operand as specified by the third\n" |
46363 | "/// operand, the control mask.\n" |
46364 | "///\n" |
46365 | "/// \\headerfile <x86intrin.h>\n" |
46366 | "///\n" |
46367 | "/// \\code\n" |
46368 | "/// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M);\n" |
46369 | "/// \\endcode\n" |
46370 | "///\n" |
46371 | "/// This intrinsic corresponds to the <c> VPBLENDW / PBLENDW </c> instruction.\n" |
46372 | "///\n" |
46373 | "/// \\param V1\n" |
46374 | "/// A 128-bit vector of [8 x i16].\n" |
46375 | "/// \\param V2\n" |
46376 | "/// A 128-bit vector of [8 x i16].\n" |
46377 | "/// \\param M\n" |
46378 | "/// An immediate integer operand, with mask bits [7:0] specifying how the\n" |
46379 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
46380 | "/// index of a copied value. When a mask bit is 0, the corresponding 16-bit\n" |
46381 | "/// element in operand \\a V1 is copied to the same position in the result.\n" |
46382 | "/// When a mask bit is 1, the corresponding 16-bit element in operand \\a V2\n" |
46383 | "/// is copied to the same position in the result.\n" |
46384 | "/// \\returns A 128-bit vector of [8 x i16] containing the copied values.\n" |
46385 | "#define _mm_blend_epi16(V1, V2, M) \\\n" |
46386 | " (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \\\n" |
46387 | " (__v8hi)(__m128i)(V2), (int)(M))\n" |
46388 | "\n" |
46389 | "/* SSE4 Dword Multiply Instructions. */\n" |
46390 | "/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]\n" |
46391 | "/// and returns the lower 32 bits of the each product in a 128-bit vector of\n" |
46392 | "/// [4 x i32].\n" |
46393 | "///\n" |
46394 | "/// \\headerfile <x86intrin.h>\n" |
46395 | "///\n" |
46396 | "/// This intrinsic corresponds to the <c> VPMULLD / PMULLD </c> instruction.\n" |
46397 | "///\n" |
46398 | "/// \\param __V1\n" |
46399 | "/// A 128-bit integer vector.\n" |
46400 | "/// \\param __V2\n" |
46401 | "/// A 128-bit integer vector.\n" |
46402 | "/// \\returns A 128-bit integer vector containing the products of both operands.\n" |
46403 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46404 | "_mm_mullo_epi32 (__m128i __V1, __m128i __V2)\n" |
46405 | "{\n" |
46406 | " return (__m128i) ((__v4su)__V1 * (__v4su)__V2);\n" |
46407 | "}\n" |
46408 | "\n" |
46409 | "/// Multiplies corresponding even-indexed elements of two 128-bit\n" |
46410 | "/// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]\n" |
46411 | "/// containing the products.\n" |
46412 | "///\n" |
46413 | "/// \\headerfile <x86intrin.h>\n" |
46414 | "///\n" |
46415 | "/// This intrinsic corresponds to the <c> VPMULDQ / PMULDQ </c> instruction.\n" |
46416 | "///\n" |
46417 | "/// \\param __V1\n" |
46418 | "/// A 128-bit vector of [4 x i32].\n" |
46419 | "/// \\param __V2\n" |
46420 | "/// A 128-bit vector of [4 x i32].\n" |
46421 | "/// \\returns A 128-bit vector of [2 x i64] containing the products of both\n" |
46422 | "/// operands.\n" |
46423 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46424 | "_mm_mul_epi32 (__m128i __V1, __m128i __V2)\n" |
46425 | "{\n" |
46426 | " return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);\n" |
46427 | "}\n" |
46428 | "\n" |
46429 | "/* SSE4 Floating Point Dot Product Instructions. */\n" |
46430 | "/// Computes the dot product of the two 128-bit vectors of [4 x float]\n" |
46431 | "/// and returns it in the elements of the 128-bit result vector of\n" |
46432 | "/// [4 x float].\n" |
46433 | "///\n" |
46434 | "/// The immediate integer operand controls which input elements\n" |
46435 | "/// will contribute to the dot product, and where the final results are\n" |
46436 | "/// returned.\n" |
46437 | "///\n" |
46438 | "/// \\headerfile <x86intrin.h>\n" |
46439 | "///\n" |
46440 | "/// \\code\n" |
46441 | "/// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M);\n" |
46442 | "/// \\endcode\n" |
46443 | "///\n" |
46444 | "/// This intrinsic corresponds to the <c> VDPPS / DPPS </c> instruction.\n" |
46445 | "///\n" |
46446 | "/// \\param X\n" |
46447 | "/// A 128-bit vector of [4 x float].\n" |
46448 | "/// \\param Y\n" |
46449 | "/// A 128-bit vector of [4 x float].\n" |
46450 | "/// \\param M\n" |
46451 | "/// An immediate integer operand. Mask bits [7:4] determine which elements\n" |
46452 | "/// of the input vectors are used, with bit [4] corresponding to the lowest\n" |
46453 | "/// element and bit [7] corresponding to the highest element of each [4 x\n" |
46454 | "/// float] vector. If a bit is set, the corresponding elements from the two\n" |
46455 | "/// input vectors are used as an input for dot product; otherwise that input\n" |
46456 | "/// is treated as zero. Bits [3:0] determine which elements of the result\n" |
46457 | "/// will receive a copy of the final dot product, with bit [0] corresponding\n" |
46458 | "/// to the lowest element and bit [3] corresponding to the highest element of\n" |
46459 | "/// each [4 x float] subvector. If a bit is set, the dot product is returned\n" |
46460 | "/// in the corresponding element; otherwise that element is set to zero.\n" |
46461 | "/// \\returns A 128-bit vector of [4 x float] containing the dot product.\n" |
46462 | "#define _mm_dp_ps(X, Y, M) \\\n" |
46463 | " (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \\\n" |
46464 | " (__v4sf)(__m128)(Y), (M))\n" |
46465 | "\n" |
46466 | "/// Computes the dot product of the two 128-bit vectors of [2 x double]\n" |
46467 | "/// and returns it in the elements of the 128-bit result vector of\n" |
46468 | "/// [2 x double].\n" |
46469 | "///\n" |
46470 | "/// The immediate integer operand controls which input\n" |
46471 | "/// elements will contribute to the dot product, and where the final results\n" |
46472 | "/// are returned.\n" |
46473 | "///\n" |
46474 | "/// \\headerfile <x86intrin.h>\n" |
46475 | "///\n" |
46476 | "/// \\code\n" |
46477 | "/// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M);\n" |
46478 | "/// \\endcode\n" |
46479 | "///\n" |
46480 | "/// This intrinsic corresponds to the <c> VDPPD / DPPD </c> instruction.\n" |
46481 | "///\n" |
46482 | "/// \\param X\n" |
46483 | "/// A 128-bit vector of [2 x double].\n" |
46484 | "/// \\param Y\n" |
46485 | "/// A 128-bit vector of [2 x double].\n" |
46486 | "/// \\param M\n" |
46487 | "/// An immediate integer operand. Mask bits [5:4] determine which elements\n" |
46488 | "/// of the input vectors are used, with bit [4] corresponding to the lowest\n" |
46489 | "/// element and bit [5] corresponding to the highest element of each of [2 x\n" |
46490 | "/// double] vector. If a bit is set, the corresponding elements from the two\n" |
46491 | "/// input vectors are used as an input for dot product; otherwise that input\n" |
46492 | "/// is treated as zero. Bits [1:0] determine which elements of the result\n" |
46493 | "/// will receive a copy of the final dot product, with bit [0] corresponding\n" |
46494 | "/// to the lowest element and bit [1] corresponding to the highest element of\n" |
46495 | "/// each [2 x double] vector. If a bit is set, the dot product is returned in\n" |
46496 | "/// the corresponding element; otherwise that element is set to zero.\n" |
46497 | "#define _mm_dp_pd(X, Y, M) \\\n" |
46498 | " (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \\\n" |
46499 | " (__v2df)(__m128d)(Y), (M))\n" |
46500 | "\n" |
46501 | "/* SSE4 Streaming Load Hint Instruction. */\n" |
46502 | "/// Loads integer values from a 128-bit aligned memory location to a\n" |
46503 | "/// 128-bit integer vector.\n" |
46504 | "///\n" |
46505 | "/// \\headerfile <x86intrin.h>\n" |
46506 | "///\n" |
46507 | "/// This intrinsic corresponds to the <c> VMOVNTDQA / MOVNTDQA </c> instruction.\n" |
46508 | "///\n" |
46509 | "/// \\param __V\n" |
46510 | "/// A pointer to a 128-bit aligned memory location that contains the integer\n" |
46511 | "/// values.\n" |
46512 | "/// \\returns A 128-bit integer vector containing the data stored at the\n" |
46513 | "/// specified memory location.\n" |
46514 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46515 | "_mm_stream_load_si128 (__m128i const *__V)\n" |
46516 | "{\n" |
46517 | " return (__m128i) __builtin_nontemporal_load ((const __v2di *) __V);\n" |
46518 | "}\n" |
46519 | "\n" |
46520 | "/* SSE4 Packed Integer Min/Max Instructions. */\n" |
46521 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
46522 | "/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser\n" |
46523 | "/// of the two values.\n" |
46524 | "///\n" |
46525 | "/// \\headerfile <x86intrin.h>\n" |
46526 | "///\n" |
46527 | "/// This intrinsic corresponds to the <c> VPMINSB / PMINSB </c> instruction.\n" |
46528 | "///\n" |
46529 | "/// \\param __V1\n" |
46530 | "/// A 128-bit vector of [16 x i8].\n" |
46531 | "/// \\param __V2\n" |
46532 | "/// A 128-bit vector of [16 x i8]\n" |
46533 | "/// \\returns A 128-bit vector of [16 x i8] containing the lesser values.\n" |
46534 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46535 | "_mm_min_epi8 (__m128i __V1, __m128i __V2)\n" |
46536 | "{\n" |
46537 | " return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);\n" |
46538 | "}\n" |
46539 | "\n" |
46540 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
46541 | "/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the\n" |
46542 | "/// greater value of the two.\n" |
46543 | "///\n" |
46544 | "/// \\headerfile <x86intrin.h>\n" |
46545 | "///\n" |
46546 | "/// This intrinsic corresponds to the <c> VPMAXSB / PMAXSB </c> instruction.\n" |
46547 | "///\n" |
46548 | "/// \\param __V1\n" |
46549 | "/// A 128-bit vector of [16 x i8].\n" |
46550 | "/// \\param __V2\n" |
46551 | "/// A 128-bit vector of [16 x i8].\n" |
46552 | "/// \\returns A 128-bit vector of [16 x i8] containing the greater values.\n" |
46553 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46554 | "_mm_max_epi8 (__m128i __V1, __m128i __V2)\n" |
46555 | "{\n" |
46556 | " return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);\n" |
46557 | "}\n" |
46558 | "\n" |
46559 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
46560 | "/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser\n" |
46561 | "/// value of the two.\n" |
46562 | "///\n" |
46563 | "/// \\headerfile <x86intrin.h>\n" |
46564 | "///\n" |
46565 | "/// This intrinsic corresponds to the <c> VPMINUW / PMINUW </c> instruction.\n" |
46566 | "///\n" |
46567 | "/// \\param __V1\n" |
46568 | "/// A 128-bit vector of [8 x u16].\n" |
46569 | "/// \\param __V2\n" |
46570 | "/// A 128-bit vector of [8 x u16].\n" |
46571 | "/// \\returns A 128-bit vector of [8 x u16] containing the lesser values.\n" |
46572 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46573 | "_mm_min_epu16 (__m128i __V1, __m128i __V2)\n" |
46574 | "{\n" |
46575 | " return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);\n" |
46576 | "}\n" |
46577 | "\n" |
46578 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
46579 | "/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the\n" |
46580 | "/// greater value of the two.\n" |
46581 | "///\n" |
46582 | "/// \\headerfile <x86intrin.h>\n" |
46583 | "///\n" |
46584 | "/// This intrinsic corresponds to the <c> VPMAXUW / PMAXUW </c> instruction.\n" |
46585 | "///\n" |
46586 | "/// \\param __V1\n" |
46587 | "/// A 128-bit vector of [8 x u16].\n" |
46588 | "/// \\param __V2\n" |
46589 | "/// A 128-bit vector of [8 x u16].\n" |
46590 | "/// \\returns A 128-bit vector of [8 x u16] containing the greater values.\n" |
46591 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46592 | "_mm_max_epu16 (__m128i __V1, __m128i __V2)\n" |
46593 | "{\n" |
46594 | " return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);\n" |
46595 | "}\n" |
46596 | "\n" |
46597 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
46598 | "/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser\n" |
46599 | "/// value of the two.\n" |
46600 | "///\n" |
46601 | "/// \\headerfile <x86intrin.h>\n" |
46602 | "///\n" |
46603 | "/// This intrinsic corresponds to the <c> VPMINSD / PMINSD </c> instruction.\n" |
46604 | "///\n" |
46605 | "/// \\param __V1\n" |
46606 | "/// A 128-bit vector of [4 x i32].\n" |
46607 | "/// \\param __V2\n" |
46608 | "/// A 128-bit vector of [4 x i32].\n" |
46609 | "/// \\returns A 128-bit vector of [4 x i32] containing the lesser values.\n" |
46610 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46611 | "_mm_min_epi32 (__m128i __V1, __m128i __V2)\n" |
46612 | "{\n" |
46613 | " return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);\n" |
46614 | "}\n" |
46615 | "\n" |
46616 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
46617 | "/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the\n" |
46618 | "/// greater value of the two.\n" |
46619 | "///\n" |
46620 | "/// \\headerfile <x86intrin.h>\n" |
46621 | "///\n" |
46622 | "/// This intrinsic corresponds to the <c> VPMAXSD / PMAXSD </c> instruction.\n" |
46623 | "///\n" |
46624 | "/// \\param __V1\n" |
46625 | "/// A 128-bit vector of [4 x i32].\n" |
46626 | "/// \\param __V2\n" |
46627 | "/// A 128-bit vector of [4 x i32].\n" |
46628 | "/// \\returns A 128-bit vector of [4 x i32] containing the greater values.\n" |
46629 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46630 | "_mm_max_epi32 (__m128i __V1, __m128i __V2)\n" |
46631 | "{\n" |
46632 | " return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);\n" |
46633 | "}\n" |
46634 | "\n" |
46635 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
46636 | "/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser\n" |
46637 | "/// value of the two.\n" |
46638 | "///\n" |
46639 | "/// \\headerfile <x86intrin.h>\n" |
46640 | "///\n" |
46641 | "/// This intrinsic corresponds to the <c> VPMINUD / PMINUD </c> instruction.\n" |
46642 | "///\n" |
46643 | "/// \\param __V1\n" |
46644 | "/// A 128-bit vector of [4 x u32].\n" |
46645 | "/// \\param __V2\n" |
46646 | "/// A 128-bit vector of [4 x u32].\n" |
46647 | "/// \\returns A 128-bit vector of [4 x u32] containing the lesser values.\n" |
46648 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46649 | "_mm_min_epu32 (__m128i __V1, __m128i __V2)\n" |
46650 | "{\n" |
46651 | " return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);\n" |
46652 | "}\n" |
46653 | "\n" |
46654 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
46655 | "/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the\n" |
46656 | "/// greater value of the two.\n" |
46657 | "///\n" |
46658 | "/// \\headerfile <x86intrin.h>\n" |
46659 | "///\n" |
46660 | "/// This intrinsic corresponds to the <c> VPMAXUD / PMAXUD </c> instruction.\n" |
46661 | "///\n" |
46662 | "/// \\param __V1\n" |
46663 | "/// A 128-bit vector of [4 x u32].\n" |
46664 | "/// \\param __V2\n" |
46665 | "/// A 128-bit vector of [4 x u32].\n" |
46666 | "/// \\returns A 128-bit vector of [4 x u32] containing the greater values.\n" |
46667 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
46668 | "_mm_max_epu32 (__m128i __V1, __m128i __V2)\n" |
46669 | "{\n" |
46670 | " return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);\n" |
46671 | "}\n" |
46672 | "\n" |
46673 | "/* SSE4 Insertion and Extraction from XMM Register Instructions. */\n" |
46674 | "/// Takes the first argument \\a X and inserts an element from the second\n" |
46675 | "/// argument \\a Y as selected by the third argument \\a N. That result then\n" |
46676 | "/// has elements zeroed out also as selected by the third argument \\a N. The\n" |
46677 | "/// resulting 128-bit vector of [4 x float] is then returned.\n" |
46678 | "///\n" |
46679 | "/// \\headerfile <x86intrin.h>\n" |
46680 | "///\n" |
46681 | "/// \\code\n" |
46682 | "/// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N);\n" |
46683 | "/// \\endcode\n" |
46684 | "///\n" |
46685 | "/// This intrinsic corresponds to the <c> VINSERTPS </c> instruction.\n" |
46686 | "///\n" |
46687 | "/// \\param X\n" |
46688 | "/// A 128-bit vector source operand of [4 x float]. With the exception of\n" |
46689 | "/// those bits in the result copied from parameter \\a Y and zeroed by bits\n" |
46690 | "/// [3:0] of \\a N, all bits from this parameter are copied to the result.\n" |
46691 | "/// \\param Y\n" |
46692 | "/// A 128-bit vector source operand of [4 x float]. One single-precision\n" |
46693 | "/// floating-point element from this source, as determined by the immediate\n" |
46694 | "/// parameter, is copied to the result.\n" |
46695 | "/// \\param N\n" |
46696 | "/// Specifies which bits from operand \\a Y will be copied, which bits in the\n" |
46697 | "/// result they will be be copied to, and which bits in the result will be\n" |
46698 | "/// cleared. The following assignments are made: \\n\n" |
46699 | "/// Bits [7:6] specify the bits to copy from operand \\a Y: \\n\n" |
46700 | "/// 00: Selects bits [31:0] from operand \\a Y. \\n\n" |
46701 | "/// 01: Selects bits [63:32] from operand \\a Y. \\n\n" |
46702 | "/// 10: Selects bits [95:64] from operand \\a Y. \\n\n" |
46703 | "/// 11: Selects bits [127:96] from operand \\a Y. \\n\n" |
46704 | "/// Bits [5:4] specify the bits in the result to which the selected bits\n" |
46705 | "/// from operand \\a Y are copied: \\n\n" |
46706 | "/// 00: Copies the selected bits from \\a Y to result bits [31:0]. \\n\n" |
46707 | "/// 01: Copies the selected bits from \\a Y to result bits [63:32]. \\n\n" |
46708 | "/// 10: Copies the selected bits from \\a Y to result bits [95:64]. \\n\n" |
46709 | "/// 11: Copies the selected bits from \\a Y to result bits [127:96]. \\n\n" |
46710 | "/// Bits[3:0]: If any of these bits are set, the corresponding result\n" |
46711 | "/// element is cleared.\n" |
46712 | "/// \\returns A 128-bit vector of [4 x float] containing the copied\n" |
46713 | "/// single-precision floating point elements from the operands.\n" |
46714 | "#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))\n" |
46715 | "\n" |
46716 | "/// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and\n" |
46717 | "/// returns it, using the immediate value parameter \\a N as a selector.\n" |
46718 | "///\n" |
46719 | "/// \\headerfile <x86intrin.h>\n" |
46720 | "///\n" |
46721 | "/// \\code\n" |
46722 | "/// int _mm_extract_ps(__m128 X, const int N);\n" |
46723 | "/// \\endcode\n" |
46724 | "///\n" |
46725 | "/// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c>\n" |
46726 | "/// instruction.\n" |
46727 | "///\n" |
46728 | "/// \\param X\n" |
46729 | "/// A 128-bit vector of [4 x float].\n" |
46730 | "/// \\param N\n" |
46731 | "/// An immediate value. Bits [1:0] determines which bits from the argument\n" |
46732 | "/// \\a X are extracted and returned: \\n\n" |
46733 | "/// 00: Bits [31:0] of parameter \\a X are returned. \\n\n" |
46734 | "/// 01: Bits [63:32] of parameter \\a X are returned. \\n\n" |
46735 | "/// 10: Bits [95:64] of parameter \\a X are returned. \\n\n" |
46736 | "/// 11: Bits [127:96] of parameter \\a X are returned.\n" |
46737 | "/// \\returns A 32-bit integer containing the extracted 32 bits of float data.\n" |
46738 | "#define _mm_extract_ps(X, N) (__extension__ \\\n" |
46739 | " ({ union { int __i; float __f; } __t; \\\n" |
46740 | " __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \\\n" |
46741 | " __t.__i;}))\n" |
46742 | "\n" |
46743 | "/* Miscellaneous insert and extract macros. */\n" |
46744 | "/* Extract a single-precision float from X at index N into D. */\n" |
46745 | "#define _MM_EXTRACT_FLOAT(D, X, N) \\\n" |
46746 | " { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }\n" |
46747 | "\n" |
46748 | "/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create\n" |
46749 | " an index suitable for _mm_insert_ps. */\n" |
46750 | "#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))\n" |
46751 | "\n" |
46752 | "/* Extract a float from X at index N into the first index of the return. */\n" |
46753 | "#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \\\n" |
46754 | " _MM_MK_INSERTPS_NDX((N), 0, 0x0e))\n" |
46755 | "\n" |
46756 | "/* Insert int into packed integer array at index. */\n" |
46757 | "/// Constructs a 128-bit vector of [16 x i8] by first making a copy of\n" |
46758 | "/// the 128-bit integer vector parameter, and then inserting the lower 8 bits\n" |
46759 | "/// of an integer parameter \\a I into an offset specified by the immediate\n" |
46760 | "/// value parameter \\a N.\n" |
46761 | "///\n" |
46762 | "/// \\headerfile <x86intrin.h>\n" |
46763 | "///\n" |
46764 | "/// \\code\n" |
46765 | "/// __m128i _mm_insert_epi8(__m128i X, int I, const int N);\n" |
46766 | "/// \\endcode\n" |
46767 | "///\n" |
46768 | "/// This intrinsic corresponds to the <c> VPINSRB / PINSRB </c> instruction.\n" |
46769 | "///\n" |
46770 | "/// \\param X\n" |
46771 | "/// A 128-bit integer vector of [16 x i8]. This vector is copied to the\n" |
46772 | "/// result and then one of the sixteen elements in the result vector is\n" |
46773 | "/// replaced by the lower 8 bits of \\a I.\n" |
46774 | "/// \\param I\n" |
46775 | "/// An integer. The lower 8 bits of this operand are written to the result\n" |
46776 | "/// beginning at the offset specified by \\a N.\n" |
46777 | "/// \\param N\n" |
46778 | "/// An immediate value. Bits [3:0] specify the bit offset in the result at\n" |
46779 | "/// which the lower 8 bits of \\a I are written. \\n\n" |
46780 | "/// 0000: Bits [7:0] of the result are used for insertion. \\n\n" |
46781 | "/// 0001: Bits [15:8] of the result are used for insertion. \\n\n" |
46782 | "/// 0010: Bits [23:16] of the result are used for insertion. \\n\n" |
46783 | "/// 0011: Bits [31:24] of the result are used for insertion. \\n\n" |
46784 | "/// 0100: Bits [39:32] of the result are used for insertion. \\n\n" |
46785 | "/// 0101: Bits [47:40] of the result are used for insertion. \\n\n" |
46786 | "/// 0110: Bits [55:48] of the result are used for insertion. \\n\n" |
46787 | "/// 0111: Bits [63:56] of the result are used for insertion. \\n\n" |
46788 | "/// 1000: Bits [71:64] of the result are used for insertion. \\n\n" |
46789 | "/// 1001: Bits [79:72] of the result are used for insertion. \\n\n" |
46790 | "/// 1010: Bits [87:80] of the result are used for insertion. \\n\n" |
46791 | "/// 1011: Bits [95:88] of the result are used for insertion. \\n\n" |
46792 | "/// 1100: Bits [103:96] of the result are used for insertion. \\n\n" |
46793 | "/// 1101: Bits [111:104] of the result are used for insertion. \\n\n" |
46794 | "/// 1110: Bits [119:112] of the result are used for insertion. \\n\n" |
46795 | "/// 1111: Bits [127:120] of the result are used for insertion.\n" |
46796 | "/// \\returns A 128-bit integer vector containing the constructed values.\n" |
46797 | "#define _mm_insert_epi8(X, I, N) \\\n" |
46798 | " (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \\\n" |
46799 | " (int)(I), (int)(N))\n" |
46800 | "\n" |
46801 | "/// Constructs a 128-bit vector of [4 x i32] by first making a copy of\n" |
46802 | "/// the 128-bit integer vector parameter, and then inserting the 32-bit\n" |
46803 | "/// integer parameter \\a I at the offset specified by the immediate value\n" |
46804 | "/// parameter \\a N.\n" |
46805 | "///\n" |
46806 | "/// \\headerfile <x86intrin.h>\n" |
46807 | "///\n" |
46808 | "/// \\code\n" |
46809 | "/// __m128i _mm_insert_epi32(__m128i X, int I, const int N);\n" |
46810 | "/// \\endcode\n" |
46811 | "///\n" |
46812 | "/// This intrinsic corresponds to the <c> VPINSRD / PINSRD </c> instruction.\n" |
46813 | "///\n" |
46814 | "/// \\param X\n" |
46815 | "/// A 128-bit integer vector of [4 x i32]. This vector is copied to the\n" |
46816 | "/// result and then one of the four elements in the result vector is\n" |
46817 | "/// replaced by \\a I.\n" |
46818 | "/// \\param I\n" |
46819 | "/// A 32-bit integer that is written to the result beginning at the offset\n" |
46820 | "/// specified by \\a N.\n" |
46821 | "/// \\param N\n" |
46822 | "/// An immediate value. Bits [1:0] specify the bit offset in the result at\n" |
46823 | "/// which the integer \\a I is written. \\n\n" |
46824 | "/// 00: Bits [31:0] of the result are used for insertion. \\n\n" |
46825 | "/// 01: Bits [63:32] of the result are used for insertion. \\n\n" |
46826 | "/// 10: Bits [95:64] of the result are used for insertion. \\n\n" |
46827 | "/// 11: Bits [127:96] of the result are used for insertion.\n" |
46828 | "/// \\returns A 128-bit integer vector containing the constructed values.\n" |
46829 | "#define _mm_insert_epi32(X, I, N) \\\n" |
46830 | " (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \\\n" |
46831 | " (int)(I), (int)(N))\n" |
46832 | "\n" |
46833 | "#ifdef __x86_64__\n" |
46834 | "/// Constructs a 128-bit vector of [2 x i64] by first making a copy of\n" |
46835 | "/// the 128-bit integer vector parameter, and then inserting the 64-bit\n" |
46836 | "/// integer parameter \\a I, using the immediate value parameter \\a N as an\n" |
46837 | "/// insertion location selector.\n" |
46838 | "///\n" |
46839 | "/// \\headerfile <x86intrin.h>\n" |
46840 | "///\n" |
46841 | "/// \\code\n" |
46842 | "/// __m128i _mm_insert_epi64(__m128i X, long long I, const int N);\n" |
46843 | "/// \\endcode\n" |
46844 | "///\n" |
46845 | "/// This intrinsic corresponds to the <c> VPINSRQ / PINSRQ </c> instruction.\n" |
46846 | "///\n" |
46847 | "/// \\param X\n" |
46848 | "/// A 128-bit integer vector of [2 x i64]. This vector is copied to the\n" |
46849 | "/// result and then one of the two elements in the result vector is replaced\n" |
46850 | "/// by \\a I.\n" |
46851 | "/// \\param I\n" |
46852 | "/// A 64-bit integer that is written to the result beginning at the offset\n" |
46853 | "/// specified by \\a N.\n" |
46854 | "/// \\param N\n" |
46855 | "/// An immediate value. Bit [0] specifies the bit offset in the result at\n" |
46856 | "/// which the integer \\a I is written. \\n\n" |
46857 | "/// 0: Bits [63:0] of the result are used for insertion. \\n\n" |
46858 | "/// 1: Bits [127:64] of the result are used for insertion. \\n\n" |
46859 | "/// \\returns A 128-bit integer vector containing the constructed values.\n" |
46860 | "#define _mm_insert_epi64(X, I, N) \\\n" |
46861 | " (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \\\n" |
46862 | " (long long)(I), (int)(N))\n" |
46863 | "#endif /* __x86_64__ */\n" |
46864 | "\n" |
46865 | "/* Extract int from packed integer array at index. This returns the element\n" |
46866 | " * as a zero extended value, so it is unsigned.\n" |
46867 | " */\n" |
46868 | "/// Extracts an 8-bit element from the 128-bit integer vector of\n" |
46869 | "/// [16 x i8], using the immediate value parameter \\a N as a selector.\n" |
46870 | "///\n" |
46871 | "/// \\headerfile <x86intrin.h>\n" |
46872 | "///\n" |
46873 | "/// \\code\n" |
46874 | "/// int _mm_extract_epi8(__m128i X, const int N);\n" |
46875 | "/// \\endcode\n" |
46876 | "///\n" |
46877 | "/// This intrinsic corresponds to the <c> VPEXTRB / PEXTRB </c> instruction.\n" |
46878 | "///\n" |
46879 | "/// \\param X\n" |
46880 | "/// A 128-bit integer vector.\n" |
46881 | "/// \\param N\n" |
46882 | "/// An immediate value. Bits [3:0] specify which 8-bit vector element from\n" |
46883 | "/// the argument \\a X to extract and copy to the result. \\n\n" |
46884 | "/// 0000: Bits [7:0] of parameter \\a X are extracted. \\n\n" |
46885 | "/// 0001: Bits [15:8] of the parameter \\a X are extracted. \\n\n" |
46886 | "/// 0010: Bits [23:16] of the parameter \\a X are extracted. \\n\n" |
46887 | "/// 0011: Bits [31:24] of the parameter \\a X are extracted. \\n\n" |
46888 | "/// 0100: Bits [39:32] of the parameter \\a X are extracted. \\n\n" |
46889 | "/// 0101: Bits [47:40] of the parameter \\a X are extracted. \\n\n" |
46890 | "/// 0110: Bits [55:48] of the parameter \\a X are extracted. \\n\n" |
46891 | "/// 0111: Bits [63:56] of the parameter \\a X are extracted. \\n\n" |
46892 | "/// 1000: Bits [71:64] of the parameter \\a X are extracted. \\n\n" |
46893 | "/// 1001: Bits [79:72] of the parameter \\a X are extracted. \\n\n" |
46894 | "/// 1010: Bits [87:80] of the parameter \\a X are extracted. \\n\n" |
46895 | "/// 1011: Bits [95:88] of the parameter \\a X are extracted. \\n\n" |
46896 | "/// 1100: Bits [103:96] of the parameter \\a X are extracted. \\n\n" |
46897 | "/// 1101: Bits [111:104] of the parameter \\a X are extracted. \\n\n" |
46898 | "/// 1110: Bits [119:112] of the parameter \\a X are extracted. \\n\n" |
46899 | "/// 1111: Bits [127:120] of the parameter \\a X are extracted.\n" |
46900 | "/// \\returns An unsigned integer, whose lower 8 bits are selected from the\n" |
46901 | "/// 128-bit integer vector parameter and the remaining bits are assigned\n" |
46902 | "/// zeros.\n" |
46903 | "#define _mm_extract_epi8(X, N) \\\n" |
46904 | " (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \\\n" |
46905 | " (int)(N))\n" |
46906 | "\n" |
46907 | "/// Extracts a 32-bit element from the 128-bit integer vector of\n" |
46908 | "/// [4 x i32], using the immediate value parameter \\a N as a selector.\n" |
46909 | "///\n" |
46910 | "/// \\headerfile <x86intrin.h>\n" |
46911 | "///\n" |
46912 | "/// \\code\n" |
46913 | "/// int _mm_extract_epi32(__m128i X, const int N);\n" |
46914 | "/// \\endcode\n" |
46915 | "///\n" |
46916 | "/// This intrinsic corresponds to the <c> VPEXTRD / PEXTRD </c> instruction.\n" |
46917 | "///\n" |
46918 | "/// \\param X\n" |
46919 | "/// A 128-bit integer vector.\n" |
46920 | "/// \\param N\n" |
46921 | "/// An immediate value. Bits [1:0] specify which 32-bit vector element from\n" |
46922 | "/// the argument \\a X to extract and copy to the result. \\n\n" |
46923 | "/// 00: Bits [31:0] of the parameter \\a X are extracted. \\n\n" |
46924 | "/// 01: Bits [63:32] of the parameter \\a X are extracted. \\n\n" |
46925 | "/// 10: Bits [95:64] of the parameter \\a X are extracted. \\n\n" |
46926 | "/// 11: Bits [127:96] of the parameter \\a X are exracted.\n" |
46927 | "/// \\returns An integer, whose lower 32 bits are selected from the 128-bit\n" |
46928 | "/// integer vector parameter and the remaining bits are assigned zeros.\n" |
46929 | "#define _mm_extract_epi32(X, N) \\\n" |
46930 | " (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))\n" |
46931 | "\n" |
46932 | "#ifdef __x86_64__\n" |
46933 | "/// Extracts a 64-bit element from the 128-bit integer vector of\n" |
46934 | "/// [2 x i64], using the immediate value parameter \\a N as a selector.\n" |
46935 | "///\n" |
46936 | "/// \\headerfile <x86intrin.h>\n" |
46937 | "///\n" |
46938 | "/// \\code\n" |
46939 | "/// long long _mm_extract_epi64(__m128i X, const int N);\n" |
46940 | "/// \\endcode\n" |
46941 | "///\n" |
46942 | "/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n" |
46943 | "///\n" |
46944 | "/// \\param X\n" |
46945 | "/// A 128-bit integer vector.\n" |
46946 | "/// \\param N\n" |
46947 | "/// An immediate value. Bit [0] specifies which 64-bit vector element from\n" |
46948 | "/// the argument \\a X to return. \\n\n" |
46949 | "/// 0: Bits [63:0] are returned. \\n\n" |
46950 | "/// 1: Bits [127:64] are returned. \\n\n" |
46951 | "/// \\returns A 64-bit integer.\n" |
46952 | "#define _mm_extract_epi64(X, N) \\\n" |
46953 | " (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))\n" |
46954 | "#endif /* __x86_64 */\n" |
46955 | "\n" |
46956 | "/* SSE4 128-bit Packed Integer Comparisons. */\n" |
46957 | "/// Tests whether the specified bits in a 128-bit integer vector are all\n" |
46958 | "/// zeros.\n" |
46959 | "///\n" |
46960 | "/// \\headerfile <x86intrin.h>\n" |
46961 | "///\n" |
46962 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
46963 | "///\n" |
46964 | "/// \\param __M\n" |
46965 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
46966 | "/// \\param __V\n" |
46967 | "/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n" |
46968 | "/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n" |
46969 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
46970 | "_mm_testz_si128(__m128i __M, __m128i __V)\n" |
46971 | "{\n" |
46972 | " return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);\n" |
46973 | "}\n" |
46974 | "\n" |
46975 | "/// Tests whether the specified bits in a 128-bit integer vector are all\n" |
46976 | "/// ones.\n" |
46977 | "///\n" |
46978 | "/// \\headerfile <x86intrin.h>\n" |
46979 | "///\n" |
46980 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
46981 | "///\n" |
46982 | "/// \\param __M\n" |
46983 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
46984 | "/// \\param __V\n" |
46985 | "/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n" |
46986 | "/// \\returns TRUE if the specified bits are all ones; FALSE otherwise.\n" |
46987 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
46988 | "_mm_testc_si128(__m128i __M, __m128i __V)\n" |
46989 | "{\n" |
46990 | " return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);\n" |
46991 | "}\n" |
46992 | "\n" |
46993 | "/// Tests whether the specified bits in a 128-bit integer vector are\n" |
46994 | "/// neither all zeros nor all ones.\n" |
46995 | "///\n" |
46996 | "/// \\headerfile <x86intrin.h>\n" |
46997 | "///\n" |
46998 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
46999 | "///\n" |
47000 | "/// \\param __M\n" |
47001 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
47002 | "/// \\param __V\n" |
47003 | "/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n" |
47004 | "/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n" |
47005 | "/// FALSE otherwise.\n" |
47006 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
47007 | "_mm_testnzc_si128(__m128i __M, __m128i __V)\n" |
47008 | "{\n" |
47009 | " return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);\n" |
47010 | "}\n" |
47011 | "\n" |
47012 | "/// Tests whether the specified bits in a 128-bit integer vector are all\n" |
47013 | "/// ones.\n" |
47014 | "///\n" |
47015 | "/// \\headerfile <x86intrin.h>\n" |
47016 | "///\n" |
47017 | "/// \\code\n" |
47018 | "/// int _mm_test_all_ones(__m128i V);\n" |
47019 | "/// \\endcode\n" |
47020 | "///\n" |
47021 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
47022 | "///\n" |
47023 | "/// \\param V\n" |
47024 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
47025 | "/// \\returns TRUE if the bits specified in the operand are all set to 1; FALSE\n" |
47026 | "/// otherwise.\n" |
47027 | "#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))\n" |
47028 | "\n" |
47029 | "/// Tests whether the specified bits in a 128-bit integer vector are\n" |
47030 | "/// neither all zeros nor all ones.\n" |
47031 | "///\n" |
47032 | "/// \\headerfile <x86intrin.h>\n" |
47033 | "///\n" |
47034 | "/// \\code\n" |
47035 | "/// int _mm_test_mix_ones_zeros(__m128i M, __m128i V);\n" |
47036 | "/// \\endcode\n" |
47037 | "///\n" |
47038 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
47039 | "///\n" |
47040 | "/// \\param M\n" |
47041 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
47042 | "/// \\param V\n" |
47043 | "/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n" |
47044 | "/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n" |
47045 | "/// FALSE otherwise.\n" |
47046 | "#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))\n" |
47047 | "\n" |
47048 | "/// Tests whether the specified bits in a 128-bit integer vector are all\n" |
47049 | "/// zeros.\n" |
47050 | "///\n" |
47051 | "/// \\headerfile <x86intrin.h>\n" |
47052 | "///\n" |
47053 | "/// \\code\n" |
47054 | "/// int _mm_test_all_zeros(__m128i M, __m128i V);\n" |
47055 | "/// \\endcode\n" |
47056 | "///\n" |
47057 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
47058 | "///\n" |
47059 | "/// \\param M\n" |
47060 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
47061 | "/// \\param V\n" |
47062 | "/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n" |
47063 | "/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n" |
47064 | "#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))\n" |
47065 | "\n" |
47066 | "/* SSE4 64-bit Packed Integer Comparisons. */\n" |
47067 | "/// Compares each of the corresponding 64-bit values of the 128-bit\n" |
47068 | "/// integer vectors for equality.\n" |
47069 | "///\n" |
47070 | "/// \\headerfile <x86intrin.h>\n" |
47071 | "///\n" |
47072 | "/// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.\n" |
47073 | "///\n" |
47074 | "/// \\param __V1\n" |
47075 | "/// A 128-bit integer vector.\n" |
47076 | "/// \\param __V2\n" |
47077 | "/// A 128-bit integer vector.\n" |
47078 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
47079 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47080 | "_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)\n" |
47081 | "{\n" |
47082 | " return (__m128i)((__v2di)__V1 == (__v2di)__V2);\n" |
47083 | "}\n" |
47084 | "\n" |
47085 | "/* SSE4 Packed Integer Sign-Extension. */\n" |
47086 | "/// Sign-extends each of the lower eight 8-bit integer elements of a\n" |
47087 | "/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n" |
47088 | "/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n" |
47089 | "/// are unused.\n" |
47090 | "///\n" |
47091 | "/// \\headerfile <x86intrin.h>\n" |
47092 | "///\n" |
47093 | "/// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.\n" |
47094 | "///\n" |
47095 | "/// \\param __V\n" |
47096 | "/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-\n" |
47097 | "/// extended to 16-bit values.\n" |
47098 | "/// \\returns A 128-bit vector of [8 x i16] containing the sign-extended values.\n" |
47099 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47100 | "_mm_cvtepi8_epi16(__m128i __V)\n" |
47101 | "{\n" |
47102 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
47103 | " which may be signed or unsigned, so use __v16qs. */\n" |
47104 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n" |
47105 | "}\n" |
47106 | "\n" |
47107 | "/// Sign-extends each of the lower four 8-bit integer elements of a\n" |
47108 | "/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n" |
47109 | "/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n" |
47110 | "/// vector are unused.\n" |
47111 | "///\n" |
47112 | "/// \\headerfile <x86intrin.h>\n" |
47113 | "///\n" |
47114 | "/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.\n" |
47115 | "///\n" |
47116 | "/// \\param __V\n" |
47117 | "/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n" |
47118 | "/// sign-extended to 32-bit values.\n" |
47119 | "/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n" |
47120 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47121 | "_mm_cvtepi8_epi32(__m128i __V)\n" |
47122 | "{\n" |
47123 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
47124 | " which may be signed or unsigned, so use __v16qs. */\n" |
47125 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);\n" |
47126 | "}\n" |
47127 | "\n" |
47128 | "/// Sign-extends each of the lower two 8-bit integer elements of a\n" |
47129 | "/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n" |
47130 | "/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n" |
47131 | "/// vector are unused.\n" |
47132 | "///\n" |
47133 | "/// \\headerfile <x86intrin.h>\n" |
47134 | "///\n" |
47135 | "/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.\n" |
47136 | "///\n" |
47137 | "/// \\param __V\n" |
47138 | "/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n" |
47139 | "/// sign-extended to 64-bit values.\n" |
47140 | "/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n" |
47141 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47142 | "_mm_cvtepi8_epi64(__m128i __V)\n" |
47143 | "{\n" |
47144 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
47145 | " which may be signed or unsigned, so use __v16qs. */\n" |
47146 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);\n" |
47147 | "}\n" |
47148 | "\n" |
47149 | "/// Sign-extends each of the lower four 16-bit integer elements of a\n" |
47150 | "/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n" |
47151 | "/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n" |
47152 | "/// vector are unused.\n" |
47153 | "///\n" |
47154 | "/// \\headerfile <x86intrin.h>\n" |
47155 | "///\n" |
47156 | "/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.\n" |
47157 | "///\n" |
47158 | "/// \\param __V\n" |
47159 | "/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n" |
47160 | "/// sign-extended to 32-bit values.\n" |
47161 | "/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n" |
47162 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47163 | "_mm_cvtepi16_epi32(__m128i __V)\n" |
47164 | "{\n" |
47165 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);\n" |
47166 | "}\n" |
47167 | "\n" |
47168 | "/// Sign-extends each of the lower two 16-bit integer elements of a\n" |
47169 | "/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n" |
47170 | "/// a 128-bit vector of [2 x i64]. The upper six elements of the input\n" |
47171 | "/// vector are unused.\n" |
47172 | "///\n" |
47173 | "/// \\headerfile <x86intrin.h>\n" |
47174 | "///\n" |
47175 | "/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.\n" |
47176 | "///\n" |
47177 | "/// \\param __V\n" |
47178 | "/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n" |
47179 | "/// sign-extended to 64-bit values.\n" |
47180 | "/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n" |
47181 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47182 | "_mm_cvtepi16_epi64(__m128i __V)\n" |
47183 | "{\n" |
47184 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);\n" |
47185 | "}\n" |
47186 | "\n" |
47187 | "/// Sign-extends each of the lower two 32-bit integer elements of a\n" |
47188 | "/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n" |
47189 | "/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n" |
47190 | "/// are unused.\n" |
47191 | "///\n" |
47192 | "/// \\headerfile <x86intrin.h>\n" |
47193 | "///\n" |
47194 | "/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.\n" |
47195 | "///\n" |
47196 | "/// \\param __V\n" |
47197 | "/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n" |
47198 | "/// sign-extended to 64-bit values.\n" |
47199 | "/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n" |
47200 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47201 | "_mm_cvtepi32_epi64(__m128i __V)\n" |
47202 | "{\n" |
47203 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);\n" |
47204 | "}\n" |
47205 | "\n" |
47206 | "/* SSE4 Packed Integer Zero-Extension. */\n" |
47207 | "/// Zero-extends each of the lower eight 8-bit integer elements of a\n" |
47208 | "/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n" |
47209 | "/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n" |
47210 | "/// are unused.\n" |
47211 | "///\n" |
47212 | "/// \\headerfile <x86intrin.h>\n" |
47213 | "///\n" |
47214 | "/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.\n" |
47215 | "///\n" |
47216 | "/// \\param __V\n" |
47217 | "/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are\n" |
47218 | "/// zero-extended to 16-bit values.\n" |
47219 | "/// \\returns A 128-bit vector of [8 x i16] containing the zero-extended values.\n" |
47220 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47221 | "_mm_cvtepu8_epi16(__m128i __V)\n" |
47222 | "{\n" |
47223 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n" |
47224 | "}\n" |
47225 | "\n" |
47226 | "/// Zero-extends each of the lower four 8-bit integer elements of a\n" |
47227 | "/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n" |
47228 | "/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n" |
47229 | "/// vector are unused.\n" |
47230 | "///\n" |
47231 | "/// \\headerfile <x86intrin.h>\n" |
47232 | "///\n" |
47233 | "/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.\n" |
47234 | "///\n" |
47235 | "/// \\param __V\n" |
47236 | "/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n" |
47237 | "/// zero-extended to 32-bit values.\n" |
47238 | "/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n" |
47239 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47240 | "_mm_cvtepu8_epi32(__m128i __V)\n" |
47241 | "{\n" |
47242 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);\n" |
47243 | "}\n" |
47244 | "\n" |
47245 | "/// Zero-extends each of the lower two 8-bit integer elements of a\n" |
47246 | "/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n" |
47247 | "/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n" |
47248 | "/// vector are unused.\n" |
47249 | "///\n" |
47250 | "/// \\headerfile <x86intrin.h>\n" |
47251 | "///\n" |
47252 | "/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.\n" |
47253 | "///\n" |
47254 | "/// \\param __V\n" |
47255 | "/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n" |
47256 | "/// zero-extended to 64-bit values.\n" |
47257 | "/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n" |
47258 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47259 | "_mm_cvtepu8_epi64(__m128i __V)\n" |
47260 | "{\n" |
47261 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);\n" |
47262 | "}\n" |
47263 | "\n" |
47264 | "/// Zero-extends each of the lower four 16-bit integer elements of a\n" |
47265 | "/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n" |
47266 | "/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n" |
47267 | "/// vector are unused.\n" |
47268 | "///\n" |
47269 | "/// \\headerfile <x86intrin.h>\n" |
47270 | "///\n" |
47271 | "/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.\n" |
47272 | "///\n" |
47273 | "/// \\param __V\n" |
47274 | "/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n" |
47275 | "/// zero-extended to 32-bit values.\n" |
47276 | "/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n" |
47277 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47278 | "_mm_cvtepu16_epi32(__m128i __V)\n" |
47279 | "{\n" |
47280 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);\n" |
47281 | "}\n" |
47282 | "\n" |
47283 | "/// Zero-extends each of the lower two 16-bit integer elements of a\n" |
47284 | "/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n" |
47285 | "/// a 128-bit vector of [2 x i64]. The upper six elements of the input vector\n" |
47286 | "/// are unused.\n" |
47287 | "///\n" |
47288 | "/// \\headerfile <x86intrin.h>\n" |
47289 | "///\n" |
47290 | "/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.\n" |
47291 | "///\n" |
47292 | "/// \\param __V\n" |
47293 | "/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n" |
47294 | "/// zero-extended to 64-bit values.\n" |
47295 | "/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n" |
47296 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47297 | "_mm_cvtepu16_epi64(__m128i __V)\n" |
47298 | "{\n" |
47299 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);\n" |
47300 | "}\n" |
47301 | "\n" |
47302 | "/// Zero-extends each of the lower two 32-bit integer elements of a\n" |
47303 | "/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n" |
47304 | "/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n" |
47305 | "/// are unused.\n" |
47306 | "///\n" |
47307 | "/// \\headerfile <x86intrin.h>\n" |
47308 | "///\n" |
47309 | "/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.\n" |
47310 | "///\n" |
47311 | "/// \\param __V\n" |
47312 | "/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n" |
47313 | "/// zero-extended to 64-bit values.\n" |
47314 | "/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n" |
47315 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47316 | "_mm_cvtepu32_epi64(__m128i __V)\n" |
47317 | "{\n" |
47318 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);\n" |
47319 | "}\n" |
47320 | "\n" |
47321 | "/* SSE4 Pack with Unsigned Saturation. */\n" |
47322 | "/// Converts 32-bit signed integers from both 128-bit integer vector\n" |
47323 | "/// operands into 16-bit unsigned integers, and returns the packed result.\n" |
47324 | "/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than\n" |
47325 | "/// 0x0000 are saturated to 0x0000.\n" |
47326 | "///\n" |
47327 | "/// \\headerfile <x86intrin.h>\n" |
47328 | "///\n" |
47329 | "/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.\n" |
47330 | "///\n" |
47331 | "/// \\param __V1\n" |
47332 | "/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n" |
47333 | "/// signed integer and is converted to a 16-bit unsigned integer with\n" |
47334 | "/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n" |
47335 | "/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n" |
47336 | "/// are written to the lower 64 bits of the result.\n" |
47337 | "/// \\param __V2\n" |
47338 | "/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n" |
47339 | "/// signed integer and is converted to a 16-bit unsigned integer with\n" |
47340 | "/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n" |
47341 | "/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n" |
47342 | "/// are written to the higher 64 bits of the result.\n" |
47343 | "/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n" |
47344 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47345 | "_mm_packus_epi32(__m128i __V1, __m128i __V2)\n" |
47346 | "{\n" |
47347 | " return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);\n" |
47348 | "}\n" |
47349 | "\n" |
47350 | "/* SSE4 Multiple Packed Sums of Absolute Difference. */\n" |
47351 | "/// Subtracts 8-bit unsigned integer values and computes the absolute\n" |
47352 | "/// values of the differences to the corresponding bits in the destination.\n" |
47353 | "/// Then sums of the absolute differences are returned according to the bit\n" |
47354 | "/// fields in the immediate operand.\n" |
47355 | "///\n" |
47356 | "/// \\headerfile <x86intrin.h>\n" |
47357 | "///\n" |
47358 | "/// \\code\n" |
47359 | "/// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M);\n" |
47360 | "/// \\endcode\n" |
47361 | "///\n" |
47362 | "/// This intrinsic corresponds to the <c> VMPSADBW / MPSADBW </c> instruction.\n" |
47363 | "///\n" |
47364 | "/// \\param X\n" |
47365 | "/// A 128-bit vector of [16 x i8].\n" |
47366 | "/// \\param Y\n" |
47367 | "/// A 128-bit vector of [16 x i8].\n" |
47368 | "/// \\param M\n" |
47369 | "/// An 8-bit immediate operand specifying how the absolute differences are to\n" |
47370 | "/// be calculated, according to the following algorithm:\n" |
47371 | "/// \\code\n" |
47372 | "/// // M2 represents bit 2 of the immediate operand\n" |
47373 | "/// // M10 represents bits [1:0] of the immediate operand\n" |
47374 | "/// i = M2 * 4;\n" |
47375 | "/// j = M10 * 4;\n" |
47376 | "/// for (k = 0; k < 8; k = k + 1) {\n" |
47377 | "/// d0 = abs(X[i + k + 0] - Y[j + 0]);\n" |
47378 | "/// d1 = abs(X[i + k + 1] - Y[j + 1]);\n" |
47379 | "/// d2 = abs(X[i + k + 2] - Y[j + 2]);\n" |
47380 | "/// d3 = abs(X[i + k + 3] - Y[j + 3]);\n" |
47381 | "/// r[k] = d0 + d1 + d2 + d3;\n" |
47382 | "/// }\n" |
47383 | "/// \\endcode\n" |
47384 | "/// \\returns A 128-bit integer vector containing the sums of the sets of\n" |
47385 | "/// absolute differences between both operands.\n" |
47386 | "#define _mm_mpsadbw_epu8(X, Y, M) \\\n" |
47387 | " (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \\\n" |
47388 | " (__v16qi)(__m128i)(Y), (M))\n" |
47389 | "\n" |
47390 | "/// Finds the minimum unsigned 16-bit element in the input 128-bit\n" |
47391 | "/// vector of [8 x u16] and returns it and along with its index.\n" |
47392 | "///\n" |
47393 | "/// \\headerfile <x86intrin.h>\n" |
47394 | "///\n" |
47395 | "/// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>\n" |
47396 | "/// instruction.\n" |
47397 | "///\n" |
47398 | "/// \\param __V\n" |
47399 | "/// A 128-bit vector of [8 x u16].\n" |
47400 | "/// \\returns A 128-bit value where bits [15:0] contain the minimum value found\n" |
47401 | "/// in parameter \\a __V, bits [18:16] contain the index of the minimum value\n" |
47402 | "/// and the remaining bits are set to 0.\n" |
47403 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
47404 | "_mm_minpos_epu16(__m128i __V)\n" |
47405 | "{\n" |
47406 | " return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);\n" |
47407 | "}\n" |
47408 | "\n" |
47409 | "/* Handle the sse4.2 definitions here. */\n" |
47410 | "\n" |
47411 | "/* These definitions are normally in nmmintrin.h, but gcc puts them in here\n" |
47412 | " so we'll do the same. */\n" |
47413 | "\n" |
47414 | "#undef __DEFAULT_FN_ATTRS\n" |
47415 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.2\")))\n" |
47416 | "\n" |
47417 | "/* These specify the type of data that we're comparing. */\n" |
47418 | "#define _SIDD_UBYTE_OPS 0x00\n" |
47419 | "#define _SIDD_UWORD_OPS 0x01\n" |
47420 | "#define _SIDD_SBYTE_OPS 0x02\n" |
47421 | "#define _SIDD_SWORD_OPS 0x03\n" |
47422 | "\n" |
47423 | "/* These specify the type of comparison operation. */\n" |
47424 | "#define _SIDD_CMP_EQUAL_ANY 0x00\n" |
47425 | "#define _SIDD_CMP_RANGES 0x04\n" |
47426 | "#define _SIDD_CMP_EQUAL_EACH 0x08\n" |
47427 | "#define _SIDD_CMP_EQUAL_ORDERED 0x0c\n" |
47428 | "\n" |
47429 | "/* These macros specify the polarity of the operation. */\n" |
47430 | "#define _SIDD_POSITIVE_POLARITY 0x00\n" |
47431 | "#define _SIDD_NEGATIVE_POLARITY 0x10\n" |
47432 | "#define _SIDD_MASKED_POSITIVE_POLARITY 0x20\n" |
47433 | "#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30\n" |
47434 | "\n" |
47435 | "/* These macros are used in _mm_cmpXstri() to specify the return. */\n" |
47436 | "#define _SIDD_LEAST_SIGNIFICANT 0x00\n" |
47437 | "#define _SIDD_MOST_SIGNIFICANT 0x40\n" |
47438 | "\n" |
47439 | "/* These macros are used in _mm_cmpXstri() to specify the return. */\n" |
47440 | "#define _SIDD_BIT_MASK 0x00\n" |
47441 | "#define _SIDD_UNIT_MASK 0x40\n" |
47442 | "\n" |
47443 | "/* SSE4.2 Packed Comparison Intrinsics. */\n" |
47444 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47445 | "/// data with implicitly defined lengths that is contained in source operands\n" |
47446 | "/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n" |
47447 | "/// mask of the comparison.\n" |
47448 | "///\n" |
47449 | "/// \\headerfile <x86intrin.h>\n" |
47450 | "///\n" |
47451 | "/// \\code\n" |
47452 | "/// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M);\n" |
47453 | "/// \\endcode\n" |
47454 | "///\n" |
47455 | "/// This intrinsic corresponds to the <c> VPCMPISTRM / PCMPISTRM </c>\n" |
47456 | "/// instruction.\n" |
47457 | "///\n" |
47458 | "/// \\param A\n" |
47459 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47460 | "/// compared.\n" |
47461 | "/// \\param B\n" |
47462 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47463 | "/// compared.\n" |
47464 | "/// \\param M\n" |
47465 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47466 | "/// words, the type of comparison to perform, and the format of the return\n" |
47467 | "/// value. \\n\n" |
47468 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47469 | "/// 00: 16 unsigned bytes \\n\n" |
47470 | "/// 01: 8 unsigned words \\n\n" |
47471 | "/// 10: 16 signed bytes \\n\n" |
47472 | "/// 11: 8 signed words \\n\n" |
47473 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47474 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47475 | "/// the characters in \\a A. \\n\n" |
47476 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47477 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47478 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47479 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47480 | "/// \\a B for equality. \\n\n" |
47481 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
47482 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47483 | "/// mask of the comparison results. \\n\n" |
47484 | "/// 00: No effect. \\n\n" |
47485 | "/// 01: Negate the bit mask. \\n\n" |
47486 | "/// 10: No effect. \\n\n" |
47487 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47488 | "/// to the size of \\a A or \\a B. \\n\n" |
47489 | "/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n" |
47490 | "/// bytes. \\n\n" |
47491 | "/// 0: The result is zero-extended to 16 bytes. \\n\n" |
47492 | "/// 1: The result is expanded to 16 bytes (this expansion is performed by\n" |
47493 | "/// repeating each bit 8 or 16 times).\n" |
47494 | "/// \\returns Returns a 128-bit integer vector representing the result mask of\n" |
47495 | "/// the comparison.\n" |
47496 | "#define _mm_cmpistrm(A, B, M) \\\n" |
47497 | " (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \\\n" |
47498 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
47499 | "\n" |
47500 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47501 | "/// data with implicitly defined lengths that is contained in source operands\n" |
47502 | "/// \\a A and \\a B. Returns an integer representing the result index of the\n" |
47503 | "/// comparison.\n" |
47504 | "///\n" |
47505 | "/// \\headerfile <x86intrin.h>\n" |
47506 | "///\n" |
47507 | "/// \\code\n" |
47508 | "/// int _mm_cmpistri(__m128i A, __m128i B, const int M);\n" |
47509 | "/// \\endcode\n" |
47510 | "///\n" |
47511 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
47512 | "/// instruction.\n" |
47513 | "///\n" |
47514 | "/// \\param A\n" |
47515 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47516 | "/// compared.\n" |
47517 | "/// \\param B\n" |
47518 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47519 | "/// compared.\n" |
47520 | "/// \\param M\n" |
47521 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47522 | "/// words, the type of comparison to perform, and the format of the return\n" |
47523 | "/// value. \\n\n" |
47524 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47525 | "/// 00: 16 unsigned bytes \\n\n" |
47526 | "/// 01: 8 unsigned words \\n\n" |
47527 | "/// 10: 16 signed bytes \\n\n" |
47528 | "/// 11: 8 signed words \\n\n" |
47529 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47530 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47531 | "/// the characters in \\a A. \\n\n" |
47532 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47533 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47534 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47535 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47536 | "/// \\a B for equality. \\n\n" |
47537 | "/// 11: Substring: Search B for substring matches of \\a A. \\n\n" |
47538 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47539 | "/// mask of the comparison results. \\n\n" |
47540 | "/// 00: No effect. \\n\n" |
47541 | "/// 01: Negate the bit mask. \\n\n" |
47542 | "/// 10: No effect. \\n\n" |
47543 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47544 | "/// to the size of \\a A or \\a B. \\n\n" |
47545 | "/// Bit [6]: Determines whether the index of the lowest set bit or the\n" |
47546 | "/// highest set bit is returned. \\n\n" |
47547 | "/// 0: The index of the least significant set bit. \\n\n" |
47548 | "/// 1: The index of the most significant set bit. \\n\n" |
47549 | "/// \\returns Returns an integer representing the result index of the comparison.\n" |
47550 | "#define _mm_cmpistri(A, B, M) \\\n" |
47551 | " (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \\\n" |
47552 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
47553 | "\n" |
47554 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47555 | "/// data with explicitly defined lengths that is contained in source operands\n" |
47556 | "/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n" |
47557 | "/// mask of the comparison.\n" |
47558 | "///\n" |
47559 | "/// \\headerfile <x86intrin.h>\n" |
47560 | "///\n" |
47561 | "/// \\code\n" |
47562 | "/// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
47563 | "/// \\endcode\n" |
47564 | "///\n" |
47565 | "/// This intrinsic corresponds to the <c> VPCMPESTRM / PCMPESTRM </c>\n" |
47566 | "/// instruction.\n" |
47567 | "///\n" |
47568 | "/// \\param A\n" |
47569 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47570 | "/// compared.\n" |
47571 | "/// \\param LA\n" |
47572 | "/// An integer that specifies the length of the string in \\a A.\n" |
47573 | "/// \\param B\n" |
47574 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47575 | "/// compared.\n" |
47576 | "/// \\param LB\n" |
47577 | "/// An integer that specifies the length of the string in \\a B.\n" |
47578 | "/// \\param M\n" |
47579 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47580 | "/// words, the type of comparison to perform, and the format of the return\n" |
47581 | "/// value. \\n\n" |
47582 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47583 | "/// 00: 16 unsigned bytes \\n\n" |
47584 | "/// 01: 8 unsigned words \\n\n" |
47585 | "/// 10: 16 signed bytes \\n\n" |
47586 | "/// 11: 8 signed words \\n\n" |
47587 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47588 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47589 | "/// the characters in \\a A. \\n\n" |
47590 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47591 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47592 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47593 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47594 | "/// \\a B for equality. \\n\n" |
47595 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
47596 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47597 | "/// mask of the comparison results. \\n\n" |
47598 | "/// 00: No effect. \\n\n" |
47599 | "/// 01: Negate the bit mask. \\n\n" |
47600 | "/// 10: No effect. \\n\n" |
47601 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47602 | "/// to the size of \\a A or \\a B. \\n\n" |
47603 | "/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n" |
47604 | "/// bytes. \\n\n" |
47605 | "/// 0: The result is zero-extended to 16 bytes. \\n\n" |
47606 | "/// 1: The result is expanded to 16 bytes (this expansion is performed by\n" |
47607 | "/// repeating each bit 8 or 16 times). \\n\n" |
47608 | "/// \\returns Returns a 128-bit integer vector representing the result mask of\n" |
47609 | "/// the comparison.\n" |
47610 | "#define _mm_cmpestrm(A, LA, B, LB, M) \\\n" |
47611 | " (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
47612 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
47613 | " (int)(M))\n" |
47614 | "\n" |
47615 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47616 | "/// data with explicitly defined lengths that is contained in source operands\n" |
47617 | "/// \\a A and \\a B. Returns an integer representing the result index of the\n" |
47618 | "/// comparison.\n" |
47619 | "///\n" |
47620 | "/// \\headerfile <x86intrin.h>\n" |
47621 | "///\n" |
47622 | "/// \\code\n" |
47623 | "/// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
47624 | "/// \\endcode\n" |
47625 | "///\n" |
47626 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
47627 | "/// instruction.\n" |
47628 | "///\n" |
47629 | "/// \\param A\n" |
47630 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47631 | "/// compared.\n" |
47632 | "/// \\param LA\n" |
47633 | "/// An integer that specifies the length of the string in \\a A.\n" |
47634 | "/// \\param B\n" |
47635 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47636 | "/// compared.\n" |
47637 | "/// \\param LB\n" |
47638 | "/// An integer that specifies the length of the string in \\a B.\n" |
47639 | "/// \\param M\n" |
47640 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47641 | "/// words, the type of comparison to perform, and the format of the return\n" |
47642 | "/// value. \\n\n" |
47643 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47644 | "/// 00: 16 unsigned bytes \\n\n" |
47645 | "/// 01: 8 unsigned words \\n\n" |
47646 | "/// 10: 16 signed bytes \\n\n" |
47647 | "/// 11: 8 signed words \\n\n" |
47648 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47649 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47650 | "/// the characters in \\a A. \\n\n" |
47651 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47652 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47653 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47654 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47655 | "/// \\a B for equality. \\n\n" |
47656 | "/// 11: Substring: Search B for substring matches of \\a A. \\n\n" |
47657 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47658 | "/// mask of the comparison results. \\n\n" |
47659 | "/// 00: No effect. \\n\n" |
47660 | "/// 01: Negate the bit mask. \\n\n" |
47661 | "/// 10: No effect. \\n\n" |
47662 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47663 | "/// to the size of \\a A or \\a B. \\n\n" |
47664 | "/// Bit [6]: Determines whether the index of the lowest set bit or the\n" |
47665 | "/// highest set bit is returned. \\n\n" |
47666 | "/// 0: The index of the least significant set bit. \\n\n" |
47667 | "/// 1: The index of the most significant set bit. \\n\n" |
47668 | "/// \\returns Returns an integer representing the result index of the comparison.\n" |
47669 | "#define _mm_cmpestri(A, LA, B, LB, M) \\\n" |
47670 | " (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
47671 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
47672 | " (int)(M))\n" |
47673 | "\n" |
47674 | "/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */\n" |
47675 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47676 | "/// data with implicitly defined lengths that is contained in source operands\n" |
47677 | "/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n" |
47678 | "/// string in \\a B is the maximum, otherwise, returns 0.\n" |
47679 | "///\n" |
47680 | "/// \\headerfile <x86intrin.h>\n" |
47681 | "///\n" |
47682 | "/// \\code\n" |
47683 | "/// int _mm_cmpistra(__m128i A, __m128i B, const int M);\n" |
47684 | "/// \\endcode\n" |
47685 | "///\n" |
47686 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
47687 | "/// instruction.\n" |
47688 | "///\n" |
47689 | "/// \\param A\n" |
47690 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47691 | "/// compared.\n" |
47692 | "/// \\param B\n" |
47693 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47694 | "/// compared.\n" |
47695 | "/// \\param M\n" |
47696 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47697 | "/// words and the type of comparison to perform. \\n\n" |
47698 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47699 | "/// 00: 16 unsigned bytes \\n\n" |
47700 | "/// 01: 8 unsigned words \\n\n" |
47701 | "/// 10: 16 signed bytes \\n\n" |
47702 | "/// 11: 8 signed words \\n\n" |
47703 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47704 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47705 | "/// the characters in \\a A. \\n\n" |
47706 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47707 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47708 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47709 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47710 | "/// \\a B for equality. \\n\n" |
47711 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
47712 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47713 | "/// mask of the comparison results. \\n\n" |
47714 | "/// 00: No effect. \\n\n" |
47715 | "/// 01: Negate the bit mask. \\n\n" |
47716 | "/// 10: No effect. \\n\n" |
47717 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47718 | "/// to the size of \\a A or \\a B. \\n\n" |
47719 | "/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n" |
47720 | "/// \\a B is the maximum; otherwise, returns 0.\n" |
47721 | "#define _mm_cmpistra(A, B, M) \\\n" |
47722 | " (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \\\n" |
47723 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
47724 | "\n" |
47725 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47726 | "/// data with implicitly defined lengths that is contained in source operands\n" |
47727 | "/// \\a A and \\a B. Returns 1 if the bit mask is non-zero, otherwise, returns\n" |
47728 | "/// 0.\n" |
47729 | "///\n" |
47730 | "/// \\headerfile <x86intrin.h>\n" |
47731 | "///\n" |
47732 | "/// \\code\n" |
47733 | "/// int _mm_cmpistrc(__m128i A, __m128i B, const int M);\n" |
47734 | "/// \\endcode\n" |
47735 | "///\n" |
47736 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
47737 | "/// instruction.\n" |
47738 | "///\n" |
47739 | "/// \\param A\n" |
47740 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47741 | "/// compared.\n" |
47742 | "/// \\param B\n" |
47743 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47744 | "/// compared.\n" |
47745 | "/// \\param M\n" |
47746 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47747 | "/// words and the type of comparison to perform. \\n\n" |
47748 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47749 | "/// 00: 16 unsigned bytes \\n\n" |
47750 | "/// 01: 8 unsigned words \\n\n" |
47751 | "/// 10: 16 signed bytes \\n\n" |
47752 | "/// 11: 8 signed words \\n\n" |
47753 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47754 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47755 | "/// the characters in \\a A. \\n\n" |
47756 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47757 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47758 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47759 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47760 | "/// \\a B for equality. \\n\n" |
47761 | "/// 11: Substring: Search B for substring matches of \\a A. \\n\n" |
47762 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47763 | "/// mask of the comparison results. \\n\n" |
47764 | "/// 00: No effect. \\n\n" |
47765 | "/// 01: Negate the bit mask. \\n\n" |
47766 | "/// 10: No effect. \\n\n" |
47767 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47768 | "/// to the size of \\a A or \\a B.\n" |
47769 | "/// \\returns Returns 1 if the bit mask is non-zero, otherwise, returns 0.\n" |
47770 | "#define _mm_cmpistrc(A, B, M) \\\n" |
47771 | " (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \\\n" |
47772 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
47773 | "\n" |
47774 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47775 | "/// data with implicitly defined lengths that is contained in source operands\n" |
47776 | "/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n" |
47777 | "///\n" |
47778 | "/// \\headerfile <x86intrin.h>\n" |
47779 | "///\n" |
47780 | "/// \\code\n" |
47781 | "/// int _mm_cmpistro(__m128i A, __m128i B, const int M);\n" |
47782 | "/// \\endcode\n" |
47783 | "///\n" |
47784 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
47785 | "/// instruction.\n" |
47786 | "///\n" |
47787 | "/// \\param A\n" |
47788 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47789 | "/// compared.\n" |
47790 | "/// \\param B\n" |
47791 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47792 | "/// compared.\n" |
47793 | "/// \\param M\n" |
47794 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47795 | "/// words and the type of comparison to perform. \\n\n" |
47796 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47797 | "/// 00: 16 unsigned bytes \\n\n" |
47798 | "/// 01: 8 unsigned words \\n\n" |
47799 | "/// 10: 16 signed bytes \\n\n" |
47800 | "/// 11: 8 signed words \\n\n" |
47801 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47802 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47803 | "/// the characters in \\a A. \\n\n" |
47804 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47805 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47806 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47807 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47808 | "/// \\a B for equality. \\n\n" |
47809 | "/// 11: Substring: Search B for substring matches of \\a A. \\n\n" |
47810 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47811 | "/// mask of the comparison results. \\n\n" |
47812 | "/// 00: No effect. \\n\n" |
47813 | "/// 01: Negate the bit mask. \\n\n" |
47814 | "/// 10: No effect. \\n\n" |
47815 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47816 | "/// to the size of \\a A or \\a B. \\n\n" |
47817 | "/// \\returns Returns bit 0 of the resulting bit mask.\n" |
47818 | "#define _mm_cmpistro(A, B, M) \\\n" |
47819 | " (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \\\n" |
47820 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
47821 | "\n" |
47822 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47823 | "/// data with implicitly defined lengths that is contained in source operands\n" |
47824 | "/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n" |
47825 | "/// the maximum, otherwise, returns 0.\n" |
47826 | "///\n" |
47827 | "/// \\headerfile <x86intrin.h>\n" |
47828 | "///\n" |
47829 | "/// \\code\n" |
47830 | "/// int _mm_cmpistrs(__m128i A, __m128i B, const int M);\n" |
47831 | "/// \\endcode\n" |
47832 | "///\n" |
47833 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
47834 | "/// instruction.\n" |
47835 | "///\n" |
47836 | "/// \\param A\n" |
47837 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47838 | "/// compared.\n" |
47839 | "/// \\param B\n" |
47840 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47841 | "/// compared.\n" |
47842 | "/// \\param M\n" |
47843 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47844 | "/// words and the type of comparison to perform. \\n\n" |
47845 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47846 | "/// 00: 16 unsigned bytes \\n\n" |
47847 | "/// 01: 8 unsigned words \\n\n" |
47848 | "/// 10: 16 signed bytes \\n\n" |
47849 | "/// 11: 8 signed words \\n\n" |
47850 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47851 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47852 | "/// the characters in \\a A. \\n\n" |
47853 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47854 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47855 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47856 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47857 | "/// \\a B for equality. \\n\n" |
47858 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
47859 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47860 | "/// mask of the comparison results. \\n\n" |
47861 | "/// 00: No effect. \\n\n" |
47862 | "/// 01: Negate the bit mask. \\n\n" |
47863 | "/// 10: No effect. \\n\n" |
47864 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47865 | "/// to the size of \\a A or \\a B. \\n\n" |
47866 | "/// \\returns Returns 1 if the length of the string in \\a A is less than the\n" |
47867 | "/// maximum, otherwise, returns 0.\n" |
47868 | "#define _mm_cmpistrs(A, B, M) \\\n" |
47869 | " (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \\\n" |
47870 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
47871 | "\n" |
47872 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47873 | "/// data with implicitly defined lengths that is contained in source operands\n" |
47874 | "/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n" |
47875 | "/// the maximum, otherwise, returns 0.\n" |
47876 | "///\n" |
47877 | "/// \\headerfile <x86intrin.h>\n" |
47878 | "///\n" |
47879 | "/// \\code\n" |
47880 | "/// int _mm_cmpistrz(__m128i A, __m128i B, const int M);\n" |
47881 | "/// \\endcode\n" |
47882 | "///\n" |
47883 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
47884 | "/// instruction.\n" |
47885 | "///\n" |
47886 | "/// \\param A\n" |
47887 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47888 | "/// compared.\n" |
47889 | "/// \\param B\n" |
47890 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47891 | "/// compared.\n" |
47892 | "/// \\param M\n" |
47893 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47894 | "/// words and the type of comparison to perform. \\n\n" |
47895 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47896 | "/// 00: 16 unsigned bytes \\n\n" |
47897 | "/// 01: 8 unsigned words \\n\n" |
47898 | "/// 10: 16 signed bytes \\n\n" |
47899 | "/// 11: 8 signed words \\n\n" |
47900 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47901 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47902 | "/// the characters in \\a A. \\n\n" |
47903 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47904 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47905 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47906 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47907 | "/// \\a B for equality. \\n\n" |
47908 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
47909 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47910 | "/// mask of the comparison results. \\n\n" |
47911 | "/// 00: No effect. \\n\n" |
47912 | "/// 01: Negate the bit mask. \\n\n" |
47913 | "/// 10: No effect. \\n\n" |
47914 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47915 | "/// to the size of \\a A or \\a B.\n" |
47916 | "/// \\returns Returns 1 if the length of the string in \\a B is less than the\n" |
47917 | "/// maximum, otherwise, returns 0.\n" |
47918 | "#define _mm_cmpistrz(A, B, M) \\\n" |
47919 | " (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \\\n" |
47920 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
47921 | "\n" |
47922 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47923 | "/// data with explicitly defined lengths that is contained in source operands\n" |
47924 | "/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n" |
47925 | "/// string in \\a B is the maximum, otherwise, returns 0.\n" |
47926 | "///\n" |
47927 | "/// \\headerfile <x86intrin.h>\n" |
47928 | "///\n" |
47929 | "/// \\code\n" |
47930 | "/// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
47931 | "/// \\endcode\n" |
47932 | "///\n" |
47933 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
47934 | "/// instruction.\n" |
47935 | "///\n" |
47936 | "/// \\param A\n" |
47937 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47938 | "/// compared.\n" |
47939 | "/// \\param LA\n" |
47940 | "/// An integer that specifies the length of the string in \\a A.\n" |
47941 | "/// \\param B\n" |
47942 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47943 | "/// compared.\n" |
47944 | "/// \\param LB\n" |
47945 | "/// An integer that specifies the length of the string in \\a B.\n" |
47946 | "/// \\param M\n" |
47947 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
47948 | "/// words and the type of comparison to perform. \\n\n" |
47949 | "/// Bits [1:0]: Determine source data format. \\n\n" |
47950 | "/// 00: 16 unsigned bytes \\n\n" |
47951 | "/// 01: 8 unsigned words \\n\n" |
47952 | "/// 10: 16 signed bytes \\n\n" |
47953 | "/// 11: 8 signed words \\n\n" |
47954 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
47955 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
47956 | "/// the characters in \\a A. \\n\n" |
47957 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
47958 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
47959 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
47960 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
47961 | "/// \\a B for equality. \\n\n" |
47962 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
47963 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
47964 | "/// mask of the comparison results. \\n\n" |
47965 | "/// 00: No effect. \\n\n" |
47966 | "/// 01: Negate the bit mask. \\n\n" |
47967 | "/// 10: No effect. \\n\n" |
47968 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
47969 | "/// to the size of \\a A or \\a B.\n" |
47970 | "/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n" |
47971 | "/// \\a B is the maximum, otherwise, returns 0.\n" |
47972 | "#define _mm_cmpestra(A, LA, B, LB, M) \\\n" |
47973 | " (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
47974 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
47975 | " (int)(M))\n" |
47976 | "\n" |
47977 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
47978 | "/// data with explicitly defined lengths that is contained in source operands\n" |
47979 | "/// \\a A and \\a B. Returns 1 if the resulting mask is non-zero, otherwise,\n" |
47980 | "/// returns 0.\n" |
47981 | "///\n" |
47982 | "/// \\headerfile <x86intrin.h>\n" |
47983 | "///\n" |
47984 | "/// \\code\n" |
47985 | "/// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
47986 | "/// \\endcode\n" |
47987 | "///\n" |
47988 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
47989 | "/// instruction.\n" |
47990 | "///\n" |
47991 | "/// \\param A\n" |
47992 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47993 | "/// compared.\n" |
47994 | "/// \\param LA\n" |
47995 | "/// An integer that specifies the length of the string in \\a A.\n" |
47996 | "/// \\param B\n" |
47997 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
47998 | "/// compared.\n" |
47999 | "/// \\param LB\n" |
48000 | "/// An integer that specifies the length of the string in \\a B.\n" |
48001 | "/// \\param M\n" |
48002 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
48003 | "/// words and the type of comparison to perform. \\n\n" |
48004 | "/// Bits [1:0]: Determine source data format. \\n\n" |
48005 | "/// 00: 16 unsigned bytes \\n\n" |
48006 | "/// 01: 8 unsigned words \\n\n" |
48007 | "/// 10: 16 signed bytes \\n\n" |
48008 | "/// 11: 8 signed words \\n\n" |
48009 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
48010 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
48011 | "/// the characters in \\a A. \\n\n" |
48012 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
48013 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
48014 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
48015 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
48016 | "/// \\a B for equality. \\n\n" |
48017 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
48018 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
48019 | "/// mask of the comparison results. \\n\n" |
48020 | "/// 00: No effect. \\n\n" |
48021 | "/// 01: Negate the bit mask. \\n\n" |
48022 | "/// 10: No effect. \\n\n" |
48023 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
48024 | "/// to the size of \\a A or \\a B. \\n\n" |
48025 | "/// \\returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0.\n" |
48026 | "#define _mm_cmpestrc(A, LA, B, LB, M) \\\n" |
48027 | " (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
48028 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
48029 | " (int)(M))\n" |
48030 | "\n" |
48031 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
48032 | "/// data with explicitly defined lengths that is contained in source operands\n" |
48033 | "/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n" |
48034 | "///\n" |
48035 | "/// \\headerfile <x86intrin.h>\n" |
48036 | "///\n" |
48037 | "/// \\code\n" |
48038 | "/// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
48039 | "/// \\endcode\n" |
48040 | "///\n" |
48041 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
48042 | "/// instruction.\n" |
48043 | "///\n" |
48044 | "/// \\param A\n" |
48045 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
48046 | "/// compared.\n" |
48047 | "/// \\param LA\n" |
48048 | "/// An integer that specifies the length of the string in \\a A.\n" |
48049 | "/// \\param B\n" |
48050 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
48051 | "/// compared.\n" |
48052 | "/// \\param LB\n" |
48053 | "/// An integer that specifies the length of the string in \\a B.\n" |
48054 | "/// \\param M\n" |
48055 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
48056 | "/// words and the type of comparison to perform. \\n\n" |
48057 | "/// Bits [1:0]: Determine source data format. \\n\n" |
48058 | "/// 00: 16 unsigned bytes \\n\n" |
48059 | "/// 01: 8 unsigned words \\n\n" |
48060 | "/// 10: 16 signed bytes \\n\n" |
48061 | "/// 11: 8 signed words \\n\n" |
48062 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
48063 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
48064 | "/// the characters in \\a A. \\n\n" |
48065 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
48066 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
48067 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
48068 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
48069 | "/// \\a B for equality. \\n\n" |
48070 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
48071 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
48072 | "/// mask of the comparison results. \\n\n" |
48073 | "/// 00: No effect. \\n\n" |
48074 | "/// 01: Negate the bit mask. \\n\n" |
48075 | "/// 10: No effect. \\n\n" |
48076 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
48077 | "/// to the size of \\a A or \\a B.\n" |
48078 | "/// \\returns Returns bit 0 of the resulting bit mask.\n" |
48079 | "#define _mm_cmpestro(A, LA, B, LB, M) \\\n" |
48080 | " (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
48081 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
48082 | " (int)(M))\n" |
48083 | "\n" |
48084 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
48085 | "/// data with explicitly defined lengths that is contained in source operands\n" |
48086 | "/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n" |
48087 | "/// the maximum, otherwise, returns 0.\n" |
48088 | "///\n" |
48089 | "/// \\headerfile <x86intrin.h>\n" |
48090 | "///\n" |
48091 | "/// \\code\n" |
48092 | "/// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
48093 | "/// \\endcode\n" |
48094 | "///\n" |
48095 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
48096 | "/// instruction.\n" |
48097 | "///\n" |
48098 | "/// \\param A\n" |
48099 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
48100 | "/// compared.\n" |
48101 | "/// \\param LA\n" |
48102 | "/// An integer that specifies the length of the string in \\a A.\n" |
48103 | "/// \\param B\n" |
48104 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
48105 | "/// compared.\n" |
48106 | "/// \\param LB\n" |
48107 | "/// An integer that specifies the length of the string in \\a B.\n" |
48108 | "/// \\param M\n" |
48109 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
48110 | "/// words and the type of comparison to perform. \\n\n" |
48111 | "/// Bits [1:0]: Determine source data format. \\n\n" |
48112 | "/// 00: 16 unsigned bytes \\n\n" |
48113 | "/// 01: 8 unsigned words \\n\n" |
48114 | "/// 10: 16 signed bytes \\n\n" |
48115 | "/// 11: 8 signed words \\n\n" |
48116 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
48117 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
48118 | "/// the characters in \\a A. \\n\n" |
48119 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
48120 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
48121 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
48122 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
48123 | "/// \\a B for equality. \\n\n" |
48124 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
48125 | "/// Bits [5:4]: Determine whether to perform a one's complement in the bit\n" |
48126 | "/// mask of the comparison results. \\n\n" |
48127 | "/// 00: No effect. \\n\n" |
48128 | "/// 01: Negate the bit mask. \\n\n" |
48129 | "/// 10: No effect. \\n\n" |
48130 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
48131 | "/// to the size of \\a A or \\a B. \\n\n" |
48132 | "/// \\returns Returns 1 if the length of the string in \\a A is less than the\n" |
48133 | "/// maximum, otherwise, returns 0.\n" |
48134 | "#define _mm_cmpestrs(A, LA, B, LB, M) \\\n" |
48135 | " (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
48136 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
48137 | " (int)(M))\n" |
48138 | "\n" |
48139 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
48140 | "/// data with explicitly defined lengths that is contained in source operands\n" |
48141 | "/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n" |
48142 | "/// the maximum, otherwise, returns 0.\n" |
48143 | "///\n" |
48144 | "/// \\headerfile <x86intrin.h>\n" |
48145 | "///\n" |
48146 | "/// \\code\n" |
48147 | "/// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
48148 | "/// \\endcode\n" |
48149 | "///\n" |
48150 | "/// This intrinsic corresponds to the <c> VPCMPESTRI </c> instruction.\n" |
48151 | "///\n" |
48152 | "/// \\param A\n" |
48153 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
48154 | "/// compared.\n" |
48155 | "/// \\param LA\n" |
48156 | "/// An integer that specifies the length of the string in \\a A.\n" |
48157 | "/// \\param B\n" |
48158 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
48159 | "/// compared.\n" |
48160 | "/// \\param LB\n" |
48161 | "/// An integer that specifies the length of the string in \\a B.\n" |
48162 | "/// \\param M\n" |
48163 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
48164 | "/// words and the type of comparison to perform. \\n\n" |
48165 | "/// Bits [1:0]: Determine source data format. \\n\n" |
48166 | "/// 00: 16 unsigned bytes \\n\n" |
48167 | "/// 01: 8 unsigned words \\n\n" |
48168 | "/// 10: 16 signed bytes \\n\n" |
48169 | "/// 11: 8 signed words \\n\n" |
48170 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
48171 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
48172 | "/// the characters in \\a A. \\n\n" |
48173 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
48174 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
48175 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
48176 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
48177 | "/// \\a B for equality. \\n\n" |
48178 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
48179 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
48180 | "/// mask of the comparison results. \\n\n" |
48181 | "/// 00: No effect. \\n\n" |
48182 | "/// 01: Negate the bit mask. \\n\n" |
48183 | "/// 10: No effect. \\n\n" |
48184 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
48185 | "/// to the size of \\a A or \\a B.\n" |
48186 | "/// \\returns Returns 1 if the length of the string in \\a B is less than the\n" |
48187 | "/// maximum, otherwise, returns 0.\n" |
48188 | "#define _mm_cmpestrz(A, LA, B, LB, M) \\\n" |
48189 | " (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
48190 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
48191 | " (int)(M))\n" |
48192 | "\n" |
48193 | "/* SSE4.2 Compare Packed Data -- Greater Than. */\n" |
48194 | "/// Compares each of the corresponding 64-bit values of the 128-bit\n" |
48195 | "/// integer vectors to determine if the values in the first operand are\n" |
48196 | "/// greater than those in the second operand.\n" |
48197 | "///\n" |
48198 | "/// \\headerfile <x86intrin.h>\n" |
48199 | "///\n" |
48200 | "/// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.\n" |
48201 | "///\n" |
48202 | "/// \\param __V1\n" |
48203 | "/// A 128-bit integer vector.\n" |
48204 | "/// \\param __V2\n" |
48205 | "/// A 128-bit integer vector.\n" |
48206 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
48207 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
48208 | "_mm_cmpgt_epi64(__m128i __V1, __m128i __V2)\n" |
48209 | "{\n" |
48210 | " return (__m128i)((__v2di)__V1 > (__v2di)__V2);\n" |
48211 | "}\n" |
48212 | "\n" |
48213 | "/* SSE4.2 Accumulate CRC32. */\n" |
48214 | "/// Adds the unsigned integer operand to the CRC-32C checksum of the\n" |
48215 | "/// unsigned char operand.\n" |
48216 | "///\n" |
48217 | "/// \\headerfile <x86intrin.h>\n" |
48218 | "///\n" |
48219 | "/// This intrinsic corresponds to the <c> CRC32B </c> instruction.\n" |
48220 | "///\n" |
48221 | "/// \\param __C\n" |
48222 | "/// An unsigned integer operand to add to the CRC-32C checksum of operand\n" |
48223 | "/// \\a __D.\n" |
48224 | "/// \\param __D\n" |
48225 | "/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.\n" |
48226 | "/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n" |
48227 | "/// operand \\a __D.\n" |
48228 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
48229 | "_mm_crc32_u8(unsigned int __C, unsigned char __D)\n" |
48230 | "{\n" |
48231 | " return __builtin_ia32_crc32qi(__C, __D);\n" |
48232 | "}\n" |
48233 | "\n" |
48234 | "/// Adds the unsigned integer operand to the CRC-32C checksum of the\n" |
48235 | "/// unsigned short operand.\n" |
48236 | "///\n" |
48237 | "/// \\headerfile <x86intrin.h>\n" |
48238 | "///\n" |
48239 | "/// This intrinsic corresponds to the <c> CRC32W </c> instruction.\n" |
48240 | "///\n" |
48241 | "/// \\param __C\n" |
48242 | "/// An unsigned integer operand to add to the CRC-32C checksum of operand\n" |
48243 | "/// \\a __D.\n" |
48244 | "/// \\param __D\n" |
48245 | "/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.\n" |
48246 | "/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n" |
48247 | "/// operand \\a __D.\n" |
48248 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
48249 | "_mm_crc32_u16(unsigned int __C, unsigned short __D)\n" |
48250 | "{\n" |
48251 | " return __builtin_ia32_crc32hi(__C, __D);\n" |
48252 | "}\n" |
48253 | "\n" |
48254 | "/// Adds the first unsigned integer operand to the CRC-32C checksum of\n" |
48255 | "/// the second unsigned integer operand.\n" |
48256 | "///\n" |
48257 | "/// \\headerfile <x86intrin.h>\n" |
48258 | "///\n" |
48259 | "/// This intrinsic corresponds to the <c> CRC32L </c> instruction.\n" |
48260 | "///\n" |
48261 | "/// \\param __C\n" |
48262 | "/// An unsigned integer operand to add to the CRC-32C checksum of operand\n" |
48263 | "/// \\a __D.\n" |
48264 | "/// \\param __D\n" |
48265 | "/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.\n" |
48266 | "/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n" |
48267 | "/// operand \\a __D.\n" |
48268 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
48269 | "_mm_crc32_u32(unsigned int __C, unsigned int __D)\n" |
48270 | "{\n" |
48271 | " return __builtin_ia32_crc32si(__C, __D);\n" |
48272 | "}\n" |
48273 | "\n" |
48274 | "#ifdef __x86_64__\n" |
48275 | "/// Adds the unsigned integer operand to the CRC-32C checksum of the\n" |
48276 | "/// unsigned 64-bit integer operand.\n" |
48277 | "///\n" |
48278 | "/// \\headerfile <x86intrin.h>\n" |
48279 | "///\n" |
48280 | "/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.\n" |
48281 | "///\n" |
48282 | "/// \\param __C\n" |
48283 | "/// An unsigned integer operand to add to the CRC-32C checksum of operand\n" |
48284 | "/// \\a __D.\n" |
48285 | "/// \\param __D\n" |
48286 | "/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.\n" |
48287 | "/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n" |
48288 | "/// operand \\a __D.\n" |
48289 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
48290 | "_mm_crc32_u64(unsigned long long __C, unsigned long long __D)\n" |
48291 | "{\n" |
48292 | " return __builtin_ia32_crc32di(__C, __D);\n" |
48293 | "}\n" |
48294 | "#endif /* __x86_64__ */\n" |
48295 | "\n" |
48296 | "#undef __DEFAULT_FN_ATTRS\n" |
48297 | "\n" |
48298 | "#include <popcntintrin.h>\n" |
48299 | "\n" |
48300 | "#endif /* __SMMINTRIN_H */\n" |
48301 | "" } , |
48302 | { "/builtins/stdalign.h" , "/*===---- stdalign.h - Standard header for alignment ------------------------===\n" |
48303 | " *\n" |
48304 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
48305 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
48306 | " * in the Software without restriction, including without limitation the rights\n" |
48307 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
48308 | " * copies of the Software, and to permit persons to whom the Software is\n" |
48309 | " * furnished to do so, subject to the following conditions:\n" |
48310 | " *\n" |
48311 | " * The above copyright notice and this permission notice shall be included in\n" |
48312 | " * all copies or substantial portions of the Software.\n" |
48313 | " *\n" |
48314 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
48315 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
48316 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
48317 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
48318 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
48319 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
48320 | " * THE SOFTWARE.\n" |
48321 | " *\n" |
48322 | " *===-----------------------------------------------------------------------===\n" |
48323 | " */\n" |
48324 | "\n" |
48325 | "#ifndef __STDALIGN_H\n" |
48326 | "#define __STDALIGN_H\n" |
48327 | "\n" |
48328 | "#ifndef __cplusplus\n" |
48329 | "#define alignas _Alignas\n" |
48330 | "#define alignof _Alignof\n" |
48331 | "#endif\n" |
48332 | "\n" |
48333 | "#define __alignas_is_defined 1\n" |
48334 | "#define __alignof_is_defined 1\n" |
48335 | "\n" |
48336 | "#endif /* __STDALIGN_H */\n" |
48337 | "" } , |
48338 | { "/builtins/stdarg.h" , "/*===---- stdarg.h - Variable argument handling ----------------------------===\n" |
48339 | " *\n" |
48340 | " * Copyright (c) 2008 Eli Friedman\n" |
48341 | " *\n" |
48342 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
48343 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
48344 | " * in the Software without restriction, including without limitation the rights\n" |
48345 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
48346 | " * copies of the Software, and to permit persons to whom the Software is\n" |
48347 | " * furnished to do so, subject to the following conditions:\n" |
48348 | " *\n" |
48349 | " * The above copyright notice and this permission notice shall be included in\n" |
48350 | " * all copies or substantial portions of the Software.\n" |
48351 | " *\n" |
48352 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
48353 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
48354 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
48355 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
48356 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
48357 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
48358 | " * THE SOFTWARE.\n" |
48359 | " *\n" |
48360 | " *===-----------------------------------------------------------------------===\n" |
48361 | " */\n" |
48362 | "\n" |
48363 | "#ifndef __STDARG_H\n" |
48364 | "#define __STDARG_H\n" |
48365 | "\n" |
48366 | "#ifndef _VA_LIST\n" |
48367 | "typedef __builtin_va_list va_list;\n" |
48368 | "#define _VA_LIST\n" |
48369 | "#endif\n" |
48370 | "#define va_start(ap, param) __builtin_va_start(ap, param)\n" |
48371 | "#define va_end(ap) __builtin_va_end(ap)\n" |
48372 | "#define va_arg(ap, type) __builtin_va_arg(ap, type)\n" |
48373 | "\n" |
48374 | "/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode\n" |
48375 | " * or -ansi is not specified, since it was not part of C90.\n" |
48376 | " */\n" |
48377 | "#define __va_copy(d,s) __builtin_va_copy(d,s)\n" |
48378 | "\n" |
48379 | "#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L || !defined(__STRICT_ANSI__)\n" |
48380 | "#define va_copy(dest, src) __builtin_va_copy(dest, src)\n" |
48381 | "#endif\n" |
48382 | "\n" |
48383 | "#ifndef __GNUC_VA_LIST\n" |
48384 | "#define __GNUC_VA_LIST 1\n" |
48385 | "typedef __builtin_va_list __gnuc_va_list;\n" |
48386 | "#endif\n" |
48387 | "\n" |
48388 | "#endif /* __STDARG_H */\n" |
48389 | "" } , |
48390 | { "/builtins/stdatomic.h" , "/*===---- stdatomic.h - Standard header for atomic types and operations -----===\n" |
48391 | " *\n" |
48392 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
48393 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
48394 | " * in the Software without restriction, including without limitation the rights\n" |
48395 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
48396 | " * copies of the Software, and to permit persons to whom the Software is\n" |
48397 | " * furnished to do so, subject to the following conditions:\n" |
48398 | " *\n" |
48399 | " * The above copyright notice and this permission notice shall be included in\n" |
48400 | " * all copies or substantial portions of the Software.\n" |
48401 | " *\n" |
48402 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
48403 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
48404 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
48405 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
48406 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
48407 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
48408 | " * THE SOFTWARE.\n" |
48409 | " *\n" |
48410 | " *===-----------------------------------------------------------------------===\n" |
48411 | " */\n" |
48412 | "\n" |
48413 | "#ifndef __CLANG_STDATOMIC_H\n" |
48414 | "#define __CLANG_STDATOMIC_H\n" |
48415 | "\n" |
48416 | "/* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for\n" |
48417 | " * example, already has a Clang-compatible stdatomic.h header.\n" |
48418 | " */\n" |
48419 | "#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>)\n" |
48420 | "# include_next <stdatomic.h>\n" |
48421 | "#else\n" |
48422 | "\n" |
48423 | "#include <stddef.h>\n" |
48424 | "#include <stdint.h>\n" |
48425 | "\n" |
48426 | "#ifdef __cplusplus\n" |
48427 | "extern \"C\" {\n" |
48428 | "#endif\n" |
48429 | "\n" |
48430 | "/* 7.17.1 Introduction */\n" |
48431 | "\n" |
48432 | "#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE\n" |
48433 | "#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE\n" |
48434 | "#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE\n" |
48435 | "#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE\n" |
48436 | "#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE\n" |
48437 | "#define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE\n" |
48438 | "#define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE\n" |
48439 | "#define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE\n" |
48440 | "#define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE\n" |
48441 | "#define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE\n" |
48442 | "\n" |
48443 | "/* 7.17.2 Initialization */\n" |
48444 | "\n" |
48445 | "#define ATOMIC_VAR_INIT(value) (value)\n" |
48446 | "#define atomic_init __c11_atomic_init\n" |
48447 | "\n" |
48448 | "/* 7.17.3 Order and consistency */\n" |
48449 | "\n" |
48450 | "typedef enum memory_order {\n" |
48451 | " memory_order_relaxed = __ATOMIC_RELAXED,\n" |
48452 | " memory_order_consume = __ATOMIC_CONSUME,\n" |
48453 | " memory_order_acquire = __ATOMIC_ACQUIRE,\n" |
48454 | " memory_order_release = __ATOMIC_RELEASE,\n" |
48455 | " memory_order_acq_rel = __ATOMIC_ACQ_REL,\n" |
48456 | " memory_order_seq_cst = __ATOMIC_SEQ_CST\n" |
48457 | "} memory_order;\n" |
48458 | "\n" |
48459 | "#define kill_dependency(y) (y)\n" |
48460 | "\n" |
48461 | "/* 7.17.4 Fences */\n" |
48462 | "\n" |
48463 | "/* These should be provided by the libc implementation. */\n" |
48464 | "void atomic_thread_fence(memory_order);\n" |
48465 | "void atomic_signal_fence(memory_order);\n" |
48466 | "\n" |
48467 | "#define atomic_thread_fence(order) __c11_atomic_thread_fence(order)\n" |
48468 | "#define atomic_signal_fence(order) __c11_atomic_signal_fence(order)\n" |
48469 | "\n" |
48470 | "/* 7.17.5 Lock-free property */\n" |
48471 | "\n" |
48472 | "#define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj)))\n" |
48473 | "\n" |
48474 | "/* 7.17.6 Atomic integer types */\n" |
48475 | "\n" |
48476 | "#ifdef __cplusplus\n" |
48477 | "typedef _Atomic(bool) atomic_bool;\n" |
48478 | "#else\n" |
48479 | "typedef _Atomic(_Bool) atomic_bool;\n" |
48480 | "#endif\n" |
48481 | "typedef _Atomic(char) atomic_char;\n" |
48482 | "typedef _Atomic(signed char) atomic_schar;\n" |
48483 | "typedef _Atomic(unsigned char) atomic_uchar;\n" |
48484 | "typedef _Atomic(short) atomic_short;\n" |
48485 | "typedef _Atomic(unsigned short) atomic_ushort;\n" |
48486 | "typedef _Atomic(int) atomic_int;\n" |
48487 | "typedef _Atomic(unsigned int) atomic_uint;\n" |
48488 | "typedef _Atomic(long) atomic_long;\n" |
48489 | "typedef _Atomic(unsigned long) atomic_ulong;\n" |
48490 | "typedef _Atomic(long long) atomic_llong;\n" |
48491 | "typedef _Atomic(unsigned long long) atomic_ullong;\n" |
48492 | "typedef _Atomic(uint_least16_t) atomic_char16_t;\n" |
48493 | "typedef _Atomic(uint_least32_t) atomic_char32_t;\n" |
48494 | "typedef _Atomic(wchar_t) atomic_wchar_t;\n" |
48495 | "typedef _Atomic(int_least8_t) atomic_int_least8_t;\n" |
48496 | "typedef _Atomic(uint_least8_t) atomic_uint_least8_t;\n" |
48497 | "typedef _Atomic(int_least16_t) atomic_int_least16_t;\n" |
48498 | "typedef _Atomic(uint_least16_t) atomic_uint_least16_t;\n" |
48499 | "typedef _Atomic(int_least32_t) atomic_int_least32_t;\n" |
48500 | "typedef _Atomic(uint_least32_t) atomic_uint_least32_t;\n" |
48501 | "typedef _Atomic(int_least64_t) atomic_int_least64_t;\n" |
48502 | "typedef _Atomic(uint_least64_t) atomic_uint_least64_t;\n" |
48503 | "typedef _Atomic(int_fast8_t) atomic_int_fast8_t;\n" |
48504 | "typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t;\n" |
48505 | "typedef _Atomic(int_fast16_t) atomic_int_fast16_t;\n" |
48506 | "typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t;\n" |
48507 | "typedef _Atomic(int_fast32_t) atomic_int_fast32_t;\n" |
48508 | "typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t;\n" |
48509 | "typedef _Atomic(int_fast64_t) atomic_int_fast64_t;\n" |
48510 | "typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t;\n" |
48511 | "typedef _Atomic(intptr_t) atomic_intptr_t;\n" |
48512 | "typedef _Atomic(uintptr_t) atomic_uintptr_t;\n" |
48513 | "typedef _Atomic(size_t) atomic_size_t;\n" |
48514 | "typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t;\n" |
48515 | "typedef _Atomic(intmax_t) atomic_intmax_t;\n" |
48516 | "typedef _Atomic(uintmax_t) atomic_uintmax_t;\n" |
48517 | "\n" |
48518 | "/* 7.17.7 Operations on atomic types */\n" |
48519 | "\n" |
48520 | "#define atomic_store(object, desired) __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST)\n" |
48521 | "#define atomic_store_explicit __c11_atomic_store\n" |
48522 | "\n" |
48523 | "#define atomic_load(object) __c11_atomic_load(object, __ATOMIC_SEQ_CST)\n" |
48524 | "#define atomic_load_explicit __c11_atomic_load\n" |
48525 | "\n" |
48526 | "#define atomic_exchange(object, desired) __c11_atomic_exchange(object, desired, __ATOMIC_SEQ_CST)\n" |
48527 | "#define atomic_exchange_explicit __c11_atomic_exchange\n" |
48528 | "\n" |
48529 | "#define atomic_compare_exchange_strong(object, expected, desired) __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n" |
48530 | "#define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong\n" |
48531 | "\n" |
48532 | "#define atomic_compare_exchange_weak(object, expected, desired) __c11_atomic_compare_exchange_weak(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n" |
48533 | "#define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak\n" |
48534 | "\n" |
48535 | "#define atomic_fetch_add(object, operand) __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST)\n" |
48536 | "#define atomic_fetch_add_explicit __c11_atomic_fetch_add\n" |
48537 | "\n" |
48538 | "#define atomic_fetch_sub(object, operand) __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST)\n" |
48539 | "#define atomic_fetch_sub_explicit __c11_atomic_fetch_sub\n" |
48540 | "\n" |
48541 | "#define atomic_fetch_or(object, operand) __c11_atomic_fetch_or(object, operand, __ATOMIC_SEQ_CST)\n" |
48542 | "#define atomic_fetch_or_explicit __c11_atomic_fetch_or\n" |
48543 | "\n" |
48544 | "#define atomic_fetch_xor(object, operand) __c11_atomic_fetch_xor(object, operand, __ATOMIC_SEQ_CST)\n" |
48545 | "#define atomic_fetch_xor_explicit __c11_atomic_fetch_xor\n" |
48546 | "\n" |
48547 | "#define atomic_fetch_and(object, operand) __c11_atomic_fetch_and(object, operand, __ATOMIC_SEQ_CST)\n" |
48548 | "#define atomic_fetch_and_explicit __c11_atomic_fetch_and\n" |
48549 | "\n" |
48550 | "/* 7.17.8 Atomic flag type and operations */\n" |
48551 | "\n" |
48552 | "typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;\n" |
48553 | "\n" |
48554 | "#define ATOMIC_FLAG_INIT { 0 }\n" |
48555 | "\n" |
48556 | "/* These should be provided by the libc implementation. */\n" |
48557 | "#ifdef __cplusplus\n" |
48558 | "bool atomic_flag_test_and_set(volatile atomic_flag *);\n" |
48559 | "bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n" |
48560 | "#else\n" |
48561 | "_Bool atomic_flag_test_and_set(volatile atomic_flag *);\n" |
48562 | "_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n" |
48563 | "#endif\n" |
48564 | "void atomic_flag_clear(volatile atomic_flag *);\n" |
48565 | "void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order);\n" |
48566 | "\n" |
48567 | "#define atomic_flag_test_and_set(object) __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST)\n" |
48568 | "#define atomic_flag_test_and_set_explicit(object, order) __c11_atomic_exchange(&(object)->_Value, 1, order)\n" |
48569 | "\n" |
48570 | "#define atomic_flag_clear(object) __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST)\n" |
48571 | "#define atomic_flag_clear_explicit(object, order) __c11_atomic_store(&(object)->_Value, 0, order)\n" |
48572 | "\n" |
48573 | "#ifdef __cplusplus\n" |
48574 | "}\n" |
48575 | "#endif\n" |
48576 | "\n" |
48577 | "#endif /* __STDC_HOSTED__ */\n" |
48578 | "#endif /* __CLANG_STDATOMIC_H */\n" |
48579 | "\n" |
48580 | "" } , |
48581 | { "/builtins/stdbool.h" , "/*===---- stdbool.h - Standard header for booleans -------------------------===\n" |
48582 | " *\n" |
48583 | " * Copyright (c) 2008 Eli Friedman\n" |
48584 | " *\n" |
48585 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
48586 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
48587 | " * in the Software without restriction, including without limitation the rights\n" |
48588 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
48589 | " * copies of the Software, and to permit persons to whom the Software is\n" |
48590 | " * furnished to do so, subject to the following conditions:\n" |
48591 | " *\n" |
48592 | " * The above copyright notice and this permission notice shall be included in\n" |
48593 | " * all copies or substantial portions of the Software.\n" |
48594 | " *\n" |
48595 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
48596 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
48597 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
48598 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
48599 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
48600 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
48601 | " * THE SOFTWARE.\n" |
48602 | " *\n" |
48603 | " *===-----------------------------------------------------------------------===\n" |
48604 | " */\n" |
48605 | "\n" |
48606 | "#ifndef __STDBOOL_H\n" |
48607 | "#define __STDBOOL_H\n" |
48608 | "\n" |
48609 | "/* Don't define bool, true, and false in C++, except as a GNU extension. */\n" |
48610 | "#ifndef __cplusplus\n" |
48611 | "#define bool _Bool\n" |
48612 | "#define true 1\n" |
48613 | "#define false 0\n" |
48614 | "#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)\n" |
48615 | "/* Define _Bool as a GNU extension. */\n" |
48616 | "#define _Bool bool\n" |
48617 | "#if __cplusplus < 201103L\n" |
48618 | "/* For C++98, define bool, false, true as a GNU extension. */\n" |
48619 | "#define bool bool\n" |
48620 | "#define false false\n" |
48621 | "#define true true\n" |
48622 | "#endif\n" |
48623 | "#endif\n" |
48624 | "\n" |
48625 | "#define __bool_true_false_are_defined 1\n" |
48626 | "\n" |
48627 | "#endif /* __STDBOOL_H */\n" |
48628 | "" } , |
48629 | { "/builtins/stddef.h" , "/*===---- stddef.h - Basic type definitions --------------------------------===\n" |
48630 | " *\n" |
48631 | " * Copyright (c) 2008 Eli Friedman\n" |
48632 | " *\n" |
48633 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
48634 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
48635 | " * in the Software without restriction, including without limitation the rights\n" |
48636 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
48637 | " * copies of the Software, and to permit persons to whom the Software is\n" |
48638 | " * furnished to do so, subject to the following conditions:\n" |
48639 | " *\n" |
48640 | " * The above copyright notice and this permission notice shall be included in\n" |
48641 | " * all copies or substantial portions of the Software.\n" |
48642 | " *\n" |
48643 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
48644 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
48645 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
48646 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
48647 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
48648 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
48649 | " * THE SOFTWARE.\n" |
48650 | " *\n" |
48651 | " *===-----------------------------------------------------------------------===\n" |
48652 | " */\n" |
48653 | "\n" |
48654 | "#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \\\n" |
48655 | " defined(__need_size_t) || defined(__need_wchar_t) || \\\n" |
48656 | " defined(__need_NULL) || defined(__need_wint_t)\n" |
48657 | "\n" |
48658 | "#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \\\n" |
48659 | " !defined(__need_wchar_t) && !defined(__need_NULL) && \\\n" |
48660 | " !defined(__need_wint_t)\n" |
48661 | "/* Always define miscellaneous pieces when modules are available. */\n" |
48662 | "#if !__has_feature(modules)\n" |
48663 | "#define __STDDEF_H\n" |
48664 | "#endif\n" |
48665 | "#define __need_ptrdiff_t\n" |
48666 | "#define __need_size_t\n" |
48667 | "#define __need_wchar_t\n" |
48668 | "#define __need_NULL\n" |
48669 | "#define __need_STDDEF_H_misc\n" |
48670 | "/* __need_wint_t is intentionally not defined here. */\n" |
48671 | "#endif\n" |
48672 | "\n" |
48673 | "#if defined(__need_ptrdiff_t)\n" |
48674 | "#if !defined(_PTRDIFF_T) || __has_feature(modules)\n" |
48675 | "/* Always define ptrdiff_t when modules are available. */\n" |
48676 | "#if !__has_feature(modules)\n" |
48677 | "#define _PTRDIFF_T\n" |
48678 | "#endif\n" |
48679 | "typedef __PTRDIFF_TYPE__ ptrdiff_t;\n" |
48680 | "#endif\n" |
48681 | "#undef __need_ptrdiff_t\n" |
48682 | "#endif /* defined(__need_ptrdiff_t) */\n" |
48683 | "\n" |
48684 | "#if defined(__need_size_t)\n" |
48685 | "#if !defined(_SIZE_T) || __has_feature(modules)\n" |
48686 | "/* Always define size_t when modules are available. */\n" |
48687 | "#if !__has_feature(modules)\n" |
48688 | "#define _SIZE_T\n" |
48689 | "#endif\n" |
48690 | "typedef __SIZE_TYPE__ size_t;\n" |
48691 | "#endif\n" |
48692 | "#undef __need_size_t\n" |
48693 | "#endif /*defined(__need_size_t) */\n" |
48694 | "\n" |
48695 | "#if defined(__need_STDDEF_H_misc)\n" |
48696 | "/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is\n" |
48697 | " * enabled. */\n" |
48698 | "#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \\\n" |
48699 | " !defined(_RSIZE_T)) || __has_feature(modules)\n" |
48700 | "/* Always define rsize_t when modules are available. */\n" |
48701 | "#if !__has_feature(modules)\n" |
48702 | "#define _RSIZE_T\n" |
48703 | "#endif\n" |
48704 | "typedef __SIZE_TYPE__ rsize_t;\n" |
48705 | "#endif\n" |
48706 | "#endif /* defined(__need_STDDEF_H_misc) */\n" |
48707 | "\n" |
48708 | "#if defined(__need_wchar_t)\n" |
48709 | "#ifndef __cplusplus\n" |
48710 | "/* Always define wchar_t when modules are available. */\n" |
48711 | "#if !defined(_WCHAR_T) || __has_feature(modules)\n" |
48712 | "#if !__has_feature(modules)\n" |
48713 | "#define _WCHAR_T\n" |
48714 | "#if defined(_MSC_EXTENSIONS)\n" |
48715 | "#define _WCHAR_T_DEFINED\n" |
48716 | "#endif\n" |
48717 | "#endif\n" |
48718 | "typedef __WCHAR_TYPE__ wchar_t;\n" |
48719 | "#endif\n" |
48720 | "#endif\n" |
48721 | "#undef __need_wchar_t\n" |
48722 | "#endif /* defined(__need_wchar_t) */\n" |
48723 | "\n" |
48724 | "#if defined(__need_NULL)\n" |
48725 | "#undef NULL\n" |
48726 | "#ifdef __cplusplus\n" |
48727 | "# if !defined(__MINGW32__) && !defined(_MSC_VER)\n" |
48728 | "# define NULL __null\n" |
48729 | "# else\n" |
48730 | "# define NULL 0\n" |
48731 | "# endif\n" |
48732 | "#else\n" |
48733 | "# define NULL ((void*)0)\n" |
48734 | "#endif\n" |
48735 | "#ifdef __cplusplus\n" |
48736 | "#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)\n" |
48737 | "namespace std { typedef decltype(nullptr) nullptr_t; }\n" |
48738 | "using ::std::nullptr_t;\n" |
48739 | "#endif\n" |
48740 | "#endif\n" |
48741 | "#undef __need_NULL\n" |
48742 | "#endif /* defined(__need_NULL) */\n" |
48743 | "\n" |
48744 | "#if defined(__need_STDDEF_H_misc)\n" |
48745 | "#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L\n" |
48746 | "#include \"__stddef_max_align_t.h\"\n" |
48747 | "#endif\n" |
48748 | "#define offsetof(t, d) __builtin_offsetof(t, d)\n" |
48749 | "#undef __need_STDDEF_H_misc\n" |
48750 | "#endif /* defined(__need_STDDEF_H_misc) */\n" |
48751 | "\n" |
48752 | "/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use\n" |
48753 | "__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */\n" |
48754 | "#if defined(__need_wint_t)\n" |
48755 | "/* Always define wint_t when modules are available. */\n" |
48756 | "#if !defined(_WINT_T) || __has_feature(modules)\n" |
48757 | "#if !__has_feature(modules)\n" |
48758 | "#define _WINT_T\n" |
48759 | "#endif\n" |
48760 | "typedef __WINT_TYPE__ wint_t;\n" |
48761 | "#endif\n" |
48762 | "#undef __need_wint_t\n" |
48763 | "#endif /* __need_wint_t */\n" |
48764 | "\n" |
48765 | "#endif\n" |
48766 | "" } , |
48767 | { "/builtins/stdint.h" , "/*===---- stdint.h - Standard header for sized integer types --------------===*\\\n" |
48768 | " *\n" |
48769 | " * Copyright (c) 2009 Chris Lattner\n" |
48770 | " *\n" |
48771 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
48772 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
48773 | " * in the Software without restriction, including without limitation the rights\n" |
48774 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
48775 | " * copies of the Software, and to permit persons to whom the Software is\n" |
48776 | " * furnished to do so, subject to the following conditions:\n" |
48777 | " *\n" |
48778 | " * The above copyright notice and this permission notice shall be included in\n" |
48779 | " * all copies or substantial portions of the Software.\n" |
48780 | " *\n" |
48781 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
48782 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
48783 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
48784 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
48785 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
48786 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
48787 | " * THE SOFTWARE.\n" |
48788 | " *\n" |
48789 | "\\*===----------------------------------------------------------------------===*/\n" |
48790 | "\n" |
48791 | "#ifndef __CLANG_STDINT_H2\n" |
48792 | "#define __CLANG_STDINT_H2\n" |
48793 | "\n" |
48794 | "/* If we're hosted, fall back to the system's stdint.h, which might have\n" |
48795 | " * additional definitions.\n" |
48796 | " */\n" |
48797 | "#if __STDC_HOSTED__ && __has_include_next(<stdint.h>)\n" |
48798 | "\n" |
48799 | "// C99 7.18.3 Limits of other integer types\n" |
48800 | "//\n" |
48801 | "// Footnote 219, 220: C++ implementations should define these macros only when\n" |
48802 | "// __STDC_LIMIT_MACROS is defined before <stdint.h> is included.\n" |
48803 | "//\n" |
48804 | "// Footnote 222: C++ implementations should define these macros only when\n" |
48805 | "// __STDC_CONSTANT_MACROS is defined before <stdint.h> is included.\n" |
48806 | "//\n" |
48807 | "// C++11 [cstdint.syn]p2:\n" |
48808 | "//\n" |
48809 | "// The macros defined by <cstdint> are provided unconditionally. In particular,\n" |
48810 | "// the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in\n" |
48811 | "// footnotes 219, 220, and 222 in the C standard) play no role in C++.\n" |
48812 | "//\n" |
48813 | "// C11 removed the problematic footnotes.\n" |
48814 | "//\n" |
48815 | "// Work around this inconsistency by always defining those macros in C++ mode,\n" |
48816 | "// so that a C library implementation which follows the C99 standard can be\n" |
48817 | "// used in C++.\n" |
48818 | "# ifdef __cplusplus\n" |
48819 | "# if !defined(__STDC_LIMIT_MACROS)\n" |
48820 | "# define __STDC_LIMIT_MACROS\n" |
48821 | "# define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n" |
48822 | "# endif\n" |
48823 | "# if !defined(__STDC_CONSTANT_MACROS)\n" |
48824 | "# define __STDC_CONSTANT_MACROS\n" |
48825 | "# define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n" |
48826 | "# endif\n" |
48827 | "# endif\n" |
48828 | "\n" |
48829 | "# include_next <stdint.h>\n" |
48830 | "\n" |
48831 | "# ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n" |
48832 | "# undef __STDC_LIMIT_MACROS\n" |
48833 | "# undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n" |
48834 | "# endif\n" |
48835 | "# ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n" |
48836 | "# undef __STDC_CONSTANT_MACROS\n" |
48837 | "# undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n" |
48838 | "# endif\n" |
48839 | "\n" |
48840 | "#else\n" |
48841 | "\n" |
48842 | "/* C99 7.18.1.1 Exact-width integer types.\n" |
48843 | " * C99 7.18.1.2 Minimum-width integer types.\n" |
48844 | " * C99 7.18.1.3 Fastest minimum-width integer types.\n" |
48845 | " *\n" |
48846 | " * The standard requires that exact-width type be defined for 8-, 16-, 32-, and\n" |
48847 | " * 64-bit types if they are implemented. Other exact width types are optional.\n" |
48848 | " * This implementation defines an exact-width types for every integer width\n" |
48849 | " * that is represented in the standard integer types.\n" |
48850 | " *\n" |
48851 | " * The standard also requires minimum-width types be defined for 8-, 16-, 32-,\n" |
48852 | " * and 64-bit widths regardless of whether there are corresponding exact-width\n" |
48853 | " * types.\n" |
48854 | " *\n" |
48855 | " * To accommodate targets that are missing types that are exactly 8, 16, 32, or\n" |
48856 | " * 64 bits wide, this implementation takes an approach of cascading\n" |
48857 | " * redefinitions, redefining __int_leastN_t to successively smaller exact-width\n" |
48858 | " * types. It is therefore important that the types are defined in order of\n" |
48859 | " * descending widths.\n" |
48860 | " *\n" |
48861 | " * We currently assume that the minimum-width types and the fastest\n" |
48862 | " * minimum-width types are the same. This is allowed by the standard, but is\n" |
48863 | " * suboptimal.\n" |
48864 | " *\n" |
48865 | " * In violation of the standard, some targets do not implement a type that is\n" |
48866 | " * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).\n" |
48867 | " * To accommodate these targets, a required minimum-width type is only\n" |
48868 | " * defined if there exists an exact-width type of equal or greater width.\n" |
48869 | " */\n" |
48870 | "\n" |
48871 | "#ifdef __INT64_TYPE__\n" |
48872 | "# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/\n" |
48873 | "typedef __INT64_TYPE__ int64_t;\n" |
48874 | "# endif /* __int8_t_defined */\n" |
48875 | "typedef __UINT64_TYPE__ uint64_t;\n" |
48876 | "# define __int_least64_t int64_t\n" |
48877 | "# define __uint_least64_t uint64_t\n" |
48878 | "# define __int_least32_t int64_t\n" |
48879 | "# define __uint_least32_t uint64_t\n" |
48880 | "# define __int_least16_t int64_t\n" |
48881 | "# define __uint_least16_t uint64_t\n" |
48882 | "# define __int_least8_t int64_t\n" |
48883 | "# define __uint_least8_t uint64_t\n" |
48884 | "#endif /* __INT64_TYPE__ */\n" |
48885 | "\n" |
48886 | "#ifdef __int_least64_t\n" |
48887 | "typedef __int_least64_t int_least64_t;\n" |
48888 | "typedef __uint_least64_t uint_least64_t;\n" |
48889 | "typedef __int_least64_t int_fast64_t;\n" |
48890 | "typedef __uint_least64_t uint_fast64_t;\n" |
48891 | "#endif /* __int_least64_t */\n" |
48892 | "\n" |
48893 | "#ifdef __INT56_TYPE__\n" |
48894 | "typedef __INT56_TYPE__ int56_t;\n" |
48895 | "typedef __UINT56_TYPE__ uint56_t;\n" |
48896 | "typedef int56_t int_least56_t;\n" |
48897 | "typedef uint56_t uint_least56_t;\n" |
48898 | "typedef int56_t int_fast56_t;\n" |
48899 | "typedef uint56_t uint_fast56_t;\n" |
48900 | "# define __int_least32_t int56_t\n" |
48901 | "# define __uint_least32_t uint56_t\n" |
48902 | "# define __int_least16_t int56_t\n" |
48903 | "# define __uint_least16_t uint56_t\n" |
48904 | "# define __int_least8_t int56_t\n" |
48905 | "# define __uint_least8_t uint56_t\n" |
48906 | "#endif /* __INT56_TYPE__ */\n" |
48907 | "\n" |
48908 | "\n" |
48909 | "#ifdef __INT48_TYPE__\n" |
48910 | "typedef __INT48_TYPE__ int48_t;\n" |
48911 | "typedef __UINT48_TYPE__ uint48_t;\n" |
48912 | "typedef int48_t int_least48_t;\n" |
48913 | "typedef uint48_t uint_least48_t;\n" |
48914 | "typedef int48_t int_fast48_t;\n" |
48915 | "typedef uint48_t uint_fast48_t;\n" |
48916 | "# define __int_least32_t int48_t\n" |
48917 | "# define __uint_least32_t uint48_t\n" |
48918 | "# define __int_least16_t int48_t\n" |
48919 | "# define __uint_least16_t uint48_t\n" |
48920 | "# define __int_least8_t int48_t\n" |
48921 | "# define __uint_least8_t uint48_t\n" |
48922 | "#endif /* __INT48_TYPE__ */\n" |
48923 | "\n" |
48924 | "\n" |
48925 | "#ifdef __INT40_TYPE__\n" |
48926 | "typedef __INT40_TYPE__ int40_t;\n" |
48927 | "typedef __UINT40_TYPE__ uint40_t;\n" |
48928 | "typedef int40_t int_least40_t;\n" |
48929 | "typedef uint40_t uint_least40_t;\n" |
48930 | "typedef int40_t int_fast40_t;\n" |
48931 | "typedef uint40_t uint_fast40_t;\n" |
48932 | "# define __int_least32_t int40_t\n" |
48933 | "# define __uint_least32_t uint40_t\n" |
48934 | "# define __int_least16_t int40_t\n" |
48935 | "# define __uint_least16_t uint40_t\n" |
48936 | "# define __int_least8_t int40_t\n" |
48937 | "# define __uint_least8_t uint40_t\n" |
48938 | "#endif /* __INT40_TYPE__ */\n" |
48939 | "\n" |
48940 | "\n" |
48941 | "#ifdef __INT32_TYPE__\n" |
48942 | "\n" |
48943 | "# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/\n" |
48944 | "typedef __INT32_TYPE__ int32_t;\n" |
48945 | "# endif /* __int8_t_defined */\n" |
48946 | "\n" |
48947 | "# ifndef __uint32_t_defined /* more glibc compatibility */\n" |
48948 | "# define __uint32_t_defined\n" |
48949 | "typedef __UINT32_TYPE__ uint32_t;\n" |
48950 | "# endif /* __uint32_t_defined */\n" |
48951 | "\n" |
48952 | "# define __int_least32_t int32_t\n" |
48953 | "# define __uint_least32_t uint32_t\n" |
48954 | "# define __int_least16_t int32_t\n" |
48955 | "# define __uint_least16_t uint32_t\n" |
48956 | "# define __int_least8_t int32_t\n" |
48957 | "# define __uint_least8_t uint32_t\n" |
48958 | "#endif /* __INT32_TYPE__ */\n" |
48959 | "\n" |
48960 | "#ifdef __int_least32_t\n" |
48961 | "typedef __int_least32_t int_least32_t;\n" |
48962 | "typedef __uint_least32_t uint_least32_t;\n" |
48963 | "typedef __int_least32_t int_fast32_t;\n" |
48964 | "typedef __uint_least32_t uint_fast32_t;\n" |
48965 | "#endif /* __int_least32_t */\n" |
48966 | "\n" |
48967 | "#ifdef __INT24_TYPE__\n" |
48968 | "typedef __INT24_TYPE__ int24_t;\n" |
48969 | "typedef __UINT24_TYPE__ uint24_t;\n" |
48970 | "typedef int24_t int_least24_t;\n" |
48971 | "typedef uint24_t uint_least24_t;\n" |
48972 | "typedef int24_t int_fast24_t;\n" |
48973 | "typedef uint24_t uint_fast24_t;\n" |
48974 | "# define __int_least16_t int24_t\n" |
48975 | "# define __uint_least16_t uint24_t\n" |
48976 | "# define __int_least8_t int24_t\n" |
48977 | "# define __uint_least8_t uint24_t\n" |
48978 | "#endif /* __INT24_TYPE__ */\n" |
48979 | "\n" |
48980 | "#ifdef __INT16_TYPE__\n" |
48981 | "#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/\n" |
48982 | "typedef __INT16_TYPE__ int16_t;\n" |
48983 | "#endif /* __int8_t_defined */\n" |
48984 | "typedef __UINT16_TYPE__ uint16_t;\n" |
48985 | "# define __int_least16_t int16_t\n" |
48986 | "# define __uint_least16_t uint16_t\n" |
48987 | "# define __int_least8_t int16_t\n" |
48988 | "# define __uint_least8_t uint16_t\n" |
48989 | "#endif /* __INT16_TYPE__ */\n" |
48990 | "\n" |
48991 | "#ifdef __int_least16_t\n" |
48992 | "typedef __int_least16_t int_least16_t;\n" |
48993 | "typedef __uint_least16_t uint_least16_t;\n" |
48994 | "typedef __int_least16_t int_fast16_t;\n" |
48995 | "typedef __uint_least16_t uint_fast16_t;\n" |
48996 | "#endif /* __int_least16_t */\n" |
48997 | "\n" |
48998 | "\n" |
48999 | "#ifdef __INT8_TYPE__\n" |
49000 | "#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/\n" |
49001 | "typedef __INT8_TYPE__ int8_t;\n" |
49002 | "#endif /* __int8_t_defined */\n" |
49003 | "typedef __UINT8_TYPE__ uint8_t;\n" |
49004 | "# define __int_least8_t int8_t\n" |
49005 | "# define __uint_least8_t uint8_t\n" |
49006 | "#endif /* __INT8_TYPE__ */\n" |
49007 | "\n" |
49008 | "#ifdef __int_least8_t\n" |
49009 | "typedef __int_least8_t int_least8_t;\n" |
49010 | "typedef __uint_least8_t uint_least8_t;\n" |
49011 | "typedef __int_least8_t int_fast8_t;\n" |
49012 | "typedef __uint_least8_t uint_fast8_t;\n" |
49013 | "#endif /* __int_least8_t */\n" |
49014 | "\n" |
49015 | "/* prevent glibc sys/types.h from defining conflicting types */\n" |
49016 | "#ifndef __int8_t_defined\n" |
49017 | "# define __int8_t_defined\n" |
49018 | "#endif /* __int8_t_defined */\n" |
49019 | "\n" |
49020 | "/* C99 7.18.1.4 Integer types capable of holding object pointers.\n" |
49021 | " */\n" |
49022 | "#define __stdint_join3(a,b,c) a ## b ## c\n" |
49023 | "\n" |
49024 | "#ifndef _INTPTR_T\n" |
49025 | "#ifndef __intptr_t_defined\n" |
49026 | "typedef __INTPTR_TYPE__ intptr_t;\n" |
49027 | "#define __intptr_t_defined\n" |
49028 | "#define _INTPTR_T\n" |
49029 | "#endif\n" |
49030 | "#endif\n" |
49031 | "\n" |
49032 | "#ifndef _UINTPTR_T\n" |
49033 | "typedef __UINTPTR_TYPE__ uintptr_t;\n" |
49034 | "#define _UINTPTR_T\n" |
49035 | "#endif\n" |
49036 | "\n" |
49037 | "/* C99 7.18.1.5 Greatest-width integer types.\n" |
49038 | " */\n" |
49039 | "typedef __INTMAX_TYPE__ intmax_t;\n" |
49040 | "typedef __UINTMAX_TYPE__ uintmax_t;\n" |
49041 | "\n" |
49042 | "/* C99 7.18.4 Macros for minimum-width integer constants.\n" |
49043 | " *\n" |
49044 | " * The standard requires that integer constant macros be defined for all the\n" |
49045 | " * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width\n" |
49046 | " * types are required, the corresponding integer constant macros are defined\n" |
49047 | " * here. This implementation also defines minimum-width types for every other\n" |
49048 | " * integer width that the target implements, so corresponding macros are\n" |
49049 | " * defined below, too.\n" |
49050 | " *\n" |
49051 | " * These macros are defined using the same successive-shrinking approach as\n" |
49052 | " * the type definitions above. It is likewise important that macros are defined\n" |
49053 | " * in order of decending width.\n" |
49054 | " *\n" |
49055 | " * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the\n" |
49056 | " * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n" |
49057 | " */\n" |
49058 | "\n" |
49059 | "#define __int_c_join(a, b) a ## b\n" |
49060 | "#define __int_c(v, suffix) __int_c_join(v, suffix)\n" |
49061 | "#define __uint_c(v, suffix) __int_c_join(v##U, suffix)\n" |
49062 | "\n" |
49063 | "\n" |
49064 | "#ifdef __INT64_TYPE__\n" |
49065 | "# ifdef __INT64_C_SUFFIX__\n" |
49066 | "# define __int64_c_suffix __INT64_C_SUFFIX__\n" |
49067 | "# define __int32_c_suffix __INT64_C_SUFFIX__\n" |
49068 | "# define __int16_c_suffix __INT64_C_SUFFIX__\n" |
49069 | "# define __int8_c_suffix __INT64_C_SUFFIX__\n" |
49070 | "# else\n" |
49071 | "# undef __int64_c_suffix\n" |
49072 | "# undef __int32_c_suffix\n" |
49073 | "# undef __int16_c_suffix\n" |
49074 | "# undef __int8_c_suffix\n" |
49075 | "# endif /* __INT64_C_SUFFIX__ */\n" |
49076 | "#endif /* __INT64_TYPE__ */\n" |
49077 | "\n" |
49078 | "#ifdef __int_least64_t\n" |
49079 | "# ifdef __int64_c_suffix\n" |
49080 | "# define INT64_C(v) __int_c(v, __int64_c_suffix)\n" |
49081 | "# define UINT64_C(v) __uint_c(v, __int64_c_suffix)\n" |
49082 | "# else\n" |
49083 | "# define INT64_C(v) v\n" |
49084 | "# define UINT64_C(v) v ## U\n" |
49085 | "# endif /* __int64_c_suffix */\n" |
49086 | "#endif /* __int_least64_t */\n" |
49087 | "\n" |
49088 | "\n" |
49089 | "#ifdef __INT56_TYPE__\n" |
49090 | "# ifdef __INT56_C_SUFFIX__\n" |
49091 | "# define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__)\n" |
49092 | "# define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__)\n" |
49093 | "# define __int32_c_suffix __INT56_C_SUFFIX__\n" |
49094 | "# define __int16_c_suffix __INT56_C_SUFFIX__\n" |
49095 | "# define __int8_c_suffix __INT56_C_SUFFIX__\n" |
49096 | "# else\n" |
49097 | "# define INT56_C(v) v\n" |
49098 | "# define UINT56_C(v) v ## U\n" |
49099 | "# undef __int32_c_suffix\n" |
49100 | "# undef __int16_c_suffix\n" |
49101 | "# undef __int8_c_suffix\n" |
49102 | "# endif /* __INT56_C_SUFFIX__ */\n" |
49103 | "#endif /* __INT56_TYPE__ */\n" |
49104 | "\n" |
49105 | "\n" |
49106 | "#ifdef __INT48_TYPE__\n" |
49107 | "# ifdef __INT48_C_SUFFIX__\n" |
49108 | "# define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__)\n" |
49109 | "# define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__)\n" |
49110 | "# define __int32_c_suffix __INT48_C_SUFFIX__\n" |
49111 | "# define __int16_c_suffix __INT48_C_SUFFIX__\n" |
49112 | "# define __int8_c_suffix __INT48_C_SUFFIX__\n" |
49113 | "# else\n" |
49114 | "# define INT48_C(v) v\n" |
49115 | "# define UINT48_C(v) v ## U\n" |
49116 | "# undef __int32_c_suffix\n" |
49117 | "# undef __int16_c_suffix\n" |
49118 | "# undef __int8_c_suffix\n" |
49119 | "# endif /* __INT48_C_SUFFIX__ */\n" |
49120 | "#endif /* __INT48_TYPE__ */\n" |
49121 | "\n" |
49122 | "\n" |
49123 | "#ifdef __INT40_TYPE__\n" |
49124 | "# ifdef __INT40_C_SUFFIX__\n" |
49125 | "# define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__)\n" |
49126 | "# define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__)\n" |
49127 | "# define __int32_c_suffix __INT40_C_SUFFIX__\n" |
49128 | "# define __int16_c_suffix __INT40_C_SUFFIX__\n" |
49129 | "# define __int8_c_suffix __INT40_C_SUFFIX__\n" |
49130 | "# else\n" |
49131 | "# define INT40_C(v) v\n" |
49132 | "# define UINT40_C(v) v ## U\n" |
49133 | "# undef __int32_c_suffix\n" |
49134 | "# undef __int16_c_suffix\n" |
49135 | "# undef __int8_c_suffix\n" |
49136 | "# endif /* __INT40_C_SUFFIX__ */\n" |
49137 | "#endif /* __INT40_TYPE__ */\n" |
49138 | "\n" |
49139 | "\n" |
49140 | "#ifdef __INT32_TYPE__\n" |
49141 | "# ifdef __INT32_C_SUFFIX__\n" |
49142 | "# define __int32_c_suffix __INT32_C_SUFFIX__\n" |
49143 | "# define __int16_c_suffix __INT32_C_SUFFIX__\n" |
49144 | "# define __int8_c_suffix __INT32_C_SUFFIX__\n" |
49145 | "#else\n" |
49146 | "# undef __int32_c_suffix\n" |
49147 | "# undef __int16_c_suffix\n" |
49148 | "# undef __int8_c_suffix\n" |
49149 | "# endif /* __INT32_C_SUFFIX__ */\n" |
49150 | "#endif /* __INT32_TYPE__ */\n" |
49151 | "\n" |
49152 | "#ifdef __int_least32_t\n" |
49153 | "# ifdef __int32_c_suffix\n" |
49154 | "# define INT32_C(v) __int_c(v, __int32_c_suffix)\n" |
49155 | "# define UINT32_C(v) __uint_c(v, __int32_c_suffix)\n" |
49156 | "# else\n" |
49157 | "# define INT32_C(v) v\n" |
49158 | "# define UINT32_C(v) v ## U\n" |
49159 | "# endif /* __int32_c_suffix */\n" |
49160 | "#endif /* __int_least32_t */\n" |
49161 | "\n" |
49162 | "\n" |
49163 | "#ifdef __INT24_TYPE__\n" |
49164 | "# ifdef __INT24_C_SUFFIX__\n" |
49165 | "# define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__)\n" |
49166 | "# define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__)\n" |
49167 | "# define __int16_c_suffix __INT24_C_SUFFIX__\n" |
49168 | "# define __int8_c_suffix __INT24_C_SUFFIX__\n" |
49169 | "# else\n" |
49170 | "# define INT24_C(v) v\n" |
49171 | "# define UINT24_C(v) v ## U\n" |
49172 | "# undef __int16_c_suffix\n" |
49173 | "# undef __int8_c_suffix\n" |
49174 | "# endif /* __INT24_C_SUFFIX__ */\n" |
49175 | "#endif /* __INT24_TYPE__ */\n" |
49176 | "\n" |
49177 | "\n" |
49178 | "#ifdef __INT16_TYPE__\n" |
49179 | "# ifdef __INT16_C_SUFFIX__\n" |
49180 | "# define __int16_c_suffix __INT16_C_SUFFIX__\n" |
49181 | "# define __int8_c_suffix __INT16_C_SUFFIX__\n" |
49182 | "#else\n" |
49183 | "# undef __int16_c_suffix\n" |
49184 | "# undef __int8_c_suffix\n" |
49185 | "# endif /* __INT16_C_SUFFIX__ */\n" |
49186 | "#endif /* __INT16_TYPE__ */\n" |
49187 | "\n" |
49188 | "#ifdef __int_least16_t\n" |
49189 | "# ifdef __int16_c_suffix\n" |
49190 | "# define INT16_C(v) __int_c(v, __int16_c_suffix)\n" |
49191 | "# define UINT16_C(v) __uint_c(v, __int16_c_suffix)\n" |
49192 | "# else\n" |
49193 | "# define INT16_C(v) v\n" |
49194 | "# define UINT16_C(v) v ## U\n" |
49195 | "# endif /* __int16_c_suffix */\n" |
49196 | "#endif /* __int_least16_t */\n" |
49197 | "\n" |
49198 | "\n" |
49199 | "#ifdef __INT8_TYPE__\n" |
49200 | "# ifdef __INT8_C_SUFFIX__\n" |
49201 | "# define __int8_c_suffix __INT8_C_SUFFIX__\n" |
49202 | "#else\n" |
49203 | "# undef __int8_c_suffix\n" |
49204 | "# endif /* __INT8_C_SUFFIX__ */\n" |
49205 | "#endif /* __INT8_TYPE__ */\n" |
49206 | "\n" |
49207 | "#ifdef __int_least8_t\n" |
49208 | "# ifdef __int8_c_suffix\n" |
49209 | "# define INT8_C(v) __int_c(v, __int8_c_suffix)\n" |
49210 | "# define UINT8_C(v) __uint_c(v, __int8_c_suffix)\n" |
49211 | "# else\n" |
49212 | "# define INT8_C(v) v\n" |
49213 | "# define UINT8_C(v) v ## U\n" |
49214 | "# endif /* __int8_c_suffix */\n" |
49215 | "#endif /* __int_least8_t */\n" |
49216 | "\n" |
49217 | "\n" |
49218 | "/* C99 7.18.2.1 Limits of exact-width integer types.\n" |
49219 | " * C99 7.18.2.2 Limits of minimum-width integer types.\n" |
49220 | " * C99 7.18.2.3 Limits of fastest minimum-width integer types.\n" |
49221 | " *\n" |
49222 | " * The presence of limit macros are completely optional in C99. This\n" |
49223 | " * implementation defines limits for all of the types (exact- and\n" |
49224 | " * minimum-width) that it defines above, using the limits of the minimum-width\n" |
49225 | " * type for any types that do not have exact-width representations.\n" |
49226 | " *\n" |
49227 | " * As in the type definitions, this section takes an approach of\n" |
49228 | " * successive-shrinking to determine which limits to use for the standard (8,\n" |
49229 | " * 16, 32, 64) bit widths when they don't have exact representations. It is\n" |
49230 | " * therefore important that the definitions be kept in order of decending\n" |
49231 | " * widths.\n" |
49232 | " *\n" |
49233 | " * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the\n" |
49234 | " * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n" |
49235 | " */\n" |
49236 | "\n" |
49237 | "#ifdef __INT64_TYPE__\n" |
49238 | "# define INT64_MAX INT64_C( 9223372036854775807)\n" |
49239 | "# define INT64_MIN (-INT64_C( 9223372036854775807)-1)\n" |
49240 | "# define UINT64_MAX UINT64_C(18446744073709551615)\n" |
49241 | "# define __INT_LEAST64_MIN INT64_MIN\n" |
49242 | "# define __INT_LEAST64_MAX INT64_MAX\n" |
49243 | "# define __UINT_LEAST64_MAX UINT64_MAX\n" |
49244 | "# define __INT_LEAST32_MIN INT64_MIN\n" |
49245 | "# define __INT_LEAST32_MAX INT64_MAX\n" |
49246 | "# define __UINT_LEAST32_MAX UINT64_MAX\n" |
49247 | "# define __INT_LEAST16_MIN INT64_MIN\n" |
49248 | "# define __INT_LEAST16_MAX INT64_MAX\n" |
49249 | "# define __UINT_LEAST16_MAX UINT64_MAX\n" |
49250 | "# define __INT_LEAST8_MIN INT64_MIN\n" |
49251 | "# define __INT_LEAST8_MAX INT64_MAX\n" |
49252 | "# define __UINT_LEAST8_MAX UINT64_MAX\n" |
49253 | "#endif /* __INT64_TYPE__ */\n" |
49254 | "\n" |
49255 | "#ifdef __INT_LEAST64_MIN\n" |
49256 | "# define INT_LEAST64_MIN __INT_LEAST64_MIN\n" |
49257 | "# define INT_LEAST64_MAX __INT_LEAST64_MAX\n" |
49258 | "# define UINT_LEAST64_MAX __UINT_LEAST64_MAX\n" |
49259 | "# define INT_FAST64_MIN __INT_LEAST64_MIN\n" |
49260 | "# define INT_FAST64_MAX __INT_LEAST64_MAX\n" |
49261 | "# define UINT_FAST64_MAX __UINT_LEAST64_MAX\n" |
49262 | "#endif /* __INT_LEAST64_MIN */\n" |
49263 | "\n" |
49264 | "\n" |
49265 | "#ifdef __INT56_TYPE__\n" |
49266 | "# define INT56_MAX INT56_C(36028797018963967)\n" |
49267 | "# define INT56_MIN (-INT56_C(36028797018963967)-1)\n" |
49268 | "# define UINT56_MAX UINT56_C(72057594037927935)\n" |
49269 | "# define INT_LEAST56_MIN INT56_MIN\n" |
49270 | "# define INT_LEAST56_MAX INT56_MAX\n" |
49271 | "# define UINT_LEAST56_MAX UINT56_MAX\n" |
49272 | "# define INT_FAST56_MIN INT56_MIN\n" |
49273 | "# define INT_FAST56_MAX INT56_MAX\n" |
49274 | "# define UINT_FAST56_MAX UINT56_MAX\n" |
49275 | "# define __INT_LEAST32_MIN INT56_MIN\n" |
49276 | "# define __INT_LEAST32_MAX INT56_MAX\n" |
49277 | "# define __UINT_LEAST32_MAX UINT56_MAX\n" |
49278 | "# define __INT_LEAST16_MIN INT56_MIN\n" |
49279 | "# define __INT_LEAST16_MAX INT56_MAX\n" |
49280 | "# define __UINT_LEAST16_MAX UINT56_MAX\n" |
49281 | "# define __INT_LEAST8_MIN INT56_MIN\n" |
49282 | "# define __INT_LEAST8_MAX INT56_MAX\n" |
49283 | "# define __UINT_LEAST8_MAX UINT56_MAX\n" |
49284 | "#endif /* __INT56_TYPE__ */\n" |
49285 | "\n" |
49286 | "\n" |
49287 | "#ifdef __INT48_TYPE__\n" |
49288 | "# define INT48_MAX INT48_C(140737488355327)\n" |
49289 | "# define INT48_MIN (-INT48_C(140737488355327)-1)\n" |
49290 | "# define UINT48_MAX UINT48_C(281474976710655)\n" |
49291 | "# define INT_LEAST48_MIN INT48_MIN\n" |
49292 | "# define INT_LEAST48_MAX INT48_MAX\n" |
49293 | "# define UINT_LEAST48_MAX UINT48_MAX\n" |
49294 | "# define INT_FAST48_MIN INT48_MIN\n" |
49295 | "# define INT_FAST48_MAX INT48_MAX\n" |
49296 | "# define UINT_FAST48_MAX UINT48_MAX\n" |
49297 | "# define __INT_LEAST32_MIN INT48_MIN\n" |
49298 | "# define __INT_LEAST32_MAX INT48_MAX\n" |
49299 | "# define __UINT_LEAST32_MAX UINT48_MAX\n" |
49300 | "# define __INT_LEAST16_MIN INT48_MIN\n" |
49301 | "# define __INT_LEAST16_MAX INT48_MAX\n" |
49302 | "# define __UINT_LEAST16_MAX UINT48_MAX\n" |
49303 | "# define __INT_LEAST8_MIN INT48_MIN\n" |
49304 | "# define __INT_LEAST8_MAX INT48_MAX\n" |
49305 | "# define __UINT_LEAST8_MAX UINT48_MAX\n" |
49306 | "#endif /* __INT48_TYPE__ */\n" |
49307 | "\n" |
49308 | "\n" |
49309 | "#ifdef __INT40_TYPE__\n" |
49310 | "# define INT40_MAX INT40_C(549755813887)\n" |
49311 | "# define INT40_MIN (-INT40_C(549755813887)-1)\n" |
49312 | "# define UINT40_MAX UINT40_C(1099511627775)\n" |
49313 | "# define INT_LEAST40_MIN INT40_MIN\n" |
49314 | "# define INT_LEAST40_MAX INT40_MAX\n" |
49315 | "# define UINT_LEAST40_MAX UINT40_MAX\n" |
49316 | "# define INT_FAST40_MIN INT40_MIN\n" |
49317 | "# define INT_FAST40_MAX INT40_MAX\n" |
49318 | "# define UINT_FAST40_MAX UINT40_MAX\n" |
49319 | "# define __INT_LEAST32_MIN INT40_MIN\n" |
49320 | "# define __INT_LEAST32_MAX INT40_MAX\n" |
49321 | "# define __UINT_LEAST32_MAX UINT40_MAX\n" |
49322 | "# define __INT_LEAST16_MIN INT40_MIN\n" |
49323 | "# define __INT_LEAST16_MAX INT40_MAX\n" |
49324 | "# define __UINT_LEAST16_MAX UINT40_MAX\n" |
49325 | "# define __INT_LEAST8_MIN INT40_MIN\n" |
49326 | "# define __INT_LEAST8_MAX INT40_MAX\n" |
49327 | "# define __UINT_LEAST8_MAX UINT40_MAX\n" |
49328 | "#endif /* __INT40_TYPE__ */\n" |
49329 | "\n" |
49330 | "\n" |
49331 | "#ifdef __INT32_TYPE__\n" |
49332 | "# define INT32_MAX INT32_C(2147483647)\n" |
49333 | "# define INT32_MIN (-INT32_C(2147483647)-1)\n" |
49334 | "# define UINT32_MAX UINT32_C(4294967295)\n" |
49335 | "# define __INT_LEAST32_MIN INT32_MIN\n" |
49336 | "# define __INT_LEAST32_MAX INT32_MAX\n" |
49337 | "# define __UINT_LEAST32_MAX UINT32_MAX\n" |
49338 | "# define __INT_LEAST16_MIN INT32_MIN\n" |
49339 | "# define __INT_LEAST16_MAX INT32_MAX\n" |
49340 | "# define __UINT_LEAST16_MAX UINT32_MAX\n" |
49341 | "# define __INT_LEAST8_MIN INT32_MIN\n" |
49342 | "# define __INT_LEAST8_MAX INT32_MAX\n" |
49343 | "# define __UINT_LEAST8_MAX UINT32_MAX\n" |
49344 | "#endif /* __INT32_TYPE__ */\n" |
49345 | "\n" |
49346 | "#ifdef __INT_LEAST32_MIN\n" |
49347 | "# define INT_LEAST32_MIN __INT_LEAST32_MIN\n" |
49348 | "# define INT_LEAST32_MAX __INT_LEAST32_MAX\n" |
49349 | "# define UINT_LEAST32_MAX __UINT_LEAST32_MAX\n" |
49350 | "# define INT_FAST32_MIN __INT_LEAST32_MIN\n" |
49351 | "# define INT_FAST32_MAX __INT_LEAST32_MAX\n" |
49352 | "# define UINT_FAST32_MAX __UINT_LEAST32_MAX\n" |
49353 | "#endif /* __INT_LEAST32_MIN */\n" |
49354 | "\n" |
49355 | "\n" |
49356 | "#ifdef __INT24_TYPE__\n" |
49357 | "# define INT24_MAX INT24_C(8388607)\n" |
49358 | "# define INT24_MIN (-INT24_C(8388607)-1)\n" |
49359 | "# define UINT24_MAX UINT24_C(16777215)\n" |
49360 | "# define INT_LEAST24_MIN INT24_MIN\n" |
49361 | "# define INT_LEAST24_MAX INT24_MAX\n" |
49362 | "# define UINT_LEAST24_MAX UINT24_MAX\n" |
49363 | "# define INT_FAST24_MIN INT24_MIN\n" |
49364 | "# define INT_FAST24_MAX INT24_MAX\n" |
49365 | "# define UINT_FAST24_MAX UINT24_MAX\n" |
49366 | "# define __INT_LEAST16_MIN INT24_MIN\n" |
49367 | "# define __INT_LEAST16_MAX INT24_MAX\n" |
49368 | "# define __UINT_LEAST16_MAX UINT24_MAX\n" |
49369 | "# define __INT_LEAST8_MIN INT24_MIN\n" |
49370 | "# define __INT_LEAST8_MAX INT24_MAX\n" |
49371 | "# define __UINT_LEAST8_MAX UINT24_MAX\n" |
49372 | "#endif /* __INT24_TYPE__ */\n" |
49373 | "\n" |
49374 | "\n" |
49375 | "#ifdef __INT16_TYPE__\n" |
49376 | "#define INT16_MAX INT16_C(32767)\n" |
49377 | "#define INT16_MIN (-INT16_C(32767)-1)\n" |
49378 | "#define UINT16_MAX UINT16_C(65535)\n" |
49379 | "# define __INT_LEAST16_MIN INT16_MIN\n" |
49380 | "# define __INT_LEAST16_MAX INT16_MAX\n" |
49381 | "# define __UINT_LEAST16_MAX UINT16_MAX\n" |
49382 | "# define __INT_LEAST8_MIN INT16_MIN\n" |
49383 | "# define __INT_LEAST8_MAX INT16_MAX\n" |
49384 | "# define __UINT_LEAST8_MAX UINT16_MAX\n" |
49385 | "#endif /* __INT16_TYPE__ */\n" |
49386 | "\n" |
49387 | "#ifdef __INT_LEAST16_MIN\n" |
49388 | "# define INT_LEAST16_MIN __INT_LEAST16_MIN\n" |
49389 | "# define INT_LEAST16_MAX __INT_LEAST16_MAX\n" |
49390 | "# define UINT_LEAST16_MAX __UINT_LEAST16_MAX\n" |
49391 | "# define INT_FAST16_MIN __INT_LEAST16_MIN\n" |
49392 | "# define INT_FAST16_MAX __INT_LEAST16_MAX\n" |
49393 | "# define UINT_FAST16_MAX __UINT_LEAST16_MAX\n" |
49394 | "#endif /* __INT_LEAST16_MIN */\n" |
49395 | "\n" |
49396 | "\n" |
49397 | "#ifdef __INT8_TYPE__\n" |
49398 | "# define INT8_MAX INT8_C(127)\n" |
49399 | "# define INT8_MIN (-INT8_C(127)-1)\n" |
49400 | "# define UINT8_MAX UINT8_C(255)\n" |
49401 | "# define __INT_LEAST8_MIN INT8_MIN\n" |
49402 | "# define __INT_LEAST8_MAX INT8_MAX\n" |
49403 | "# define __UINT_LEAST8_MAX UINT8_MAX\n" |
49404 | "#endif /* __INT8_TYPE__ */\n" |
49405 | "\n" |
49406 | "#ifdef __INT_LEAST8_MIN\n" |
49407 | "# define INT_LEAST8_MIN __INT_LEAST8_MIN\n" |
49408 | "# define INT_LEAST8_MAX __INT_LEAST8_MAX\n" |
49409 | "# define UINT_LEAST8_MAX __UINT_LEAST8_MAX\n" |
49410 | "# define INT_FAST8_MIN __INT_LEAST8_MIN\n" |
49411 | "# define INT_FAST8_MAX __INT_LEAST8_MAX\n" |
49412 | "# define UINT_FAST8_MAX __UINT_LEAST8_MAX\n" |
49413 | "#endif /* __INT_LEAST8_MIN */\n" |
49414 | "\n" |
49415 | "/* Some utility macros */\n" |
49416 | "#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN)\n" |
49417 | "#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX)\n" |
49418 | "#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)\n" |
49419 | "#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v))\n" |
49420 | "#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))\n" |
49421 | "\n" |
49422 | "/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */\n" |
49423 | "/* C99 7.18.3 Limits of other integer types. */\n" |
49424 | "\n" |
49425 | "#define INTPTR_MIN (-__INTPTR_MAX__-1)\n" |
49426 | "#define INTPTR_MAX __INTPTR_MAX__\n" |
49427 | "#define UINTPTR_MAX __UINTPTR_MAX__\n" |
49428 | "#define PTRDIFF_MIN (-__PTRDIFF_MAX__-1)\n" |
49429 | "#define PTRDIFF_MAX __PTRDIFF_MAX__\n" |
49430 | "#define SIZE_MAX __SIZE_MAX__\n" |
49431 | "\n" |
49432 | "/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__\n" |
49433 | " * is enabled. */\n" |
49434 | "#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1\n" |
49435 | "#define RSIZE_MAX (SIZE_MAX >> 1)\n" |
49436 | "#endif\n" |
49437 | "\n" |
49438 | "/* C99 7.18.2.5 Limits of greatest-width integer types. */\n" |
49439 | "#define INTMAX_MIN (-__INTMAX_MAX__-1)\n" |
49440 | "#define INTMAX_MAX __INTMAX_MAX__\n" |
49441 | "#define UINTMAX_MAX __UINTMAX_MAX__\n" |
49442 | "\n" |
49443 | "/* C99 7.18.3 Limits of other integer types. */\n" |
49444 | "#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)\n" |
49445 | "#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)\n" |
49446 | "#ifdef __WINT_UNSIGNED__\n" |
49447 | "# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)\n" |
49448 | "# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)\n" |
49449 | "#else\n" |
49450 | "# define WINT_MIN __INTN_MIN(__WINT_WIDTH__)\n" |
49451 | "# define WINT_MAX __INTN_MAX(__WINT_WIDTH__)\n" |
49452 | "#endif\n" |
49453 | "\n" |
49454 | "#ifndef WCHAR_MAX\n" |
49455 | "# define WCHAR_MAX __WCHAR_MAX__\n" |
49456 | "#endif\n" |
49457 | "#ifndef WCHAR_MIN\n" |
49458 | "# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)\n" |
49459 | "# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)\n" |
49460 | "# else\n" |
49461 | "# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)\n" |
49462 | "# endif\n" |
49463 | "#endif\n" |
49464 | "\n" |
49465 | "/* 7.18.4.2 Macros for greatest-width integer constants. */\n" |
49466 | "#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)\n" |
49467 | "#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)\n" |
49468 | "\n" |
49469 | "#endif /* __STDC_HOSTED__ */\n" |
49470 | "#endif /* __CLANG_STDINT_H2 */\n" |
49471 | "" } , |
49472 | { "/builtins/stdnoreturn.h" , "/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------===\n" |
49473 | " *\n" |
49474 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
49475 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
49476 | " * in the Software without restriction, including without limitation the rights\n" |
49477 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
49478 | " * copies of the Software, and to permit persons to whom the Software is\n" |
49479 | " * furnished to do so, subject to the following conditions:\n" |
49480 | " *\n" |
49481 | " * The above copyright notice and this permission notice shall be included in\n" |
49482 | " * all copies or substantial portions of the Software.\n" |
49483 | " *\n" |
49484 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
49485 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
49486 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
49487 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
49488 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
49489 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
49490 | " * THE SOFTWARE.\n" |
49491 | " *\n" |
49492 | " *===-----------------------------------------------------------------------===\n" |
49493 | " */\n" |
49494 | "\n" |
49495 | "#ifndef __STDNORETURN_H\n" |
49496 | "#define __STDNORETURN_H\n" |
49497 | "\n" |
49498 | "#define noreturn _Noreturn\n" |
49499 | "#define __noreturn_is_defined 1\n" |
49500 | "\n" |
49501 | "#endif /* __STDNORETURN_H */\n" |
49502 | "" } , |
49503 | { "/builtins/tbmintrin.h" , "/*===---- tbmintrin.h - TBM intrinsics -------------------------------------===\n" |
49504 | " *\n" |
49505 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
49506 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
49507 | " * in the Software without restriction, including without limitation the rights\n" |
49508 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
49509 | " * copies of the Software, and to permit persons to whom the Software is\n" |
49510 | " * furnished to do so, subject to the following conditions:\n" |
49511 | " *\n" |
49512 | " * The above copyright notice and this permission notice shall be included in\n" |
49513 | " * all copies or substantial portions of the Software.\n" |
49514 | " *\n" |
49515 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
49516 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
49517 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
49518 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
49519 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
49520 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
49521 | " * THE SOFTWARE.\n" |
49522 | " *\n" |
49523 | " *===-----------------------------------------------------------------------===\n" |
49524 | " */\n" |
49525 | "\n" |
49526 | "#ifndef __X86INTRIN_H\n" |
49527 | "#error \"Never use <tbmintrin.h> directly; include <x86intrin.h> instead.\"\n" |
49528 | "#endif\n" |
49529 | "\n" |
49530 | "#ifndef __TBMINTRIN_H\n" |
49531 | "#define __TBMINTRIN_H\n" |
49532 | "\n" |
49533 | "/* Define the default attributes for the functions in this file. */\n" |
49534 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"tbm\")))\n" |
49535 | "\n" |
49536 | "#define __bextri_u32(a, b) \\\n" |
49537 | " ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \\\n" |
49538 | " (unsigned int)(b)))\n" |
49539 | "\n" |
49540 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
49541 | "__blcfill_u32(unsigned int __a)\n" |
49542 | "{\n" |
49543 | " return __a & (__a + 1);\n" |
49544 | "}\n" |
49545 | "\n" |
49546 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
49547 | "__blci_u32(unsigned int __a)\n" |
49548 | "{\n" |
49549 | " return __a | ~(__a + 1);\n" |
49550 | "}\n" |
49551 | "\n" |
49552 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
49553 | "__blcic_u32(unsigned int __a)\n" |
49554 | "{\n" |
49555 | " return ~__a & (__a + 1);\n" |
49556 | "}\n" |
49557 | "\n" |
49558 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
49559 | "__blcmsk_u32(unsigned int __a)\n" |
49560 | "{\n" |
49561 | " return __a ^ (__a + 1);\n" |
49562 | "}\n" |
49563 | "\n" |
49564 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
49565 | "__blcs_u32(unsigned int __a)\n" |
49566 | "{\n" |
49567 | " return __a | (__a + 1);\n" |
49568 | "}\n" |
49569 | "\n" |
49570 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
49571 | "__blsfill_u32(unsigned int __a)\n" |
49572 | "{\n" |
49573 | " return __a | (__a - 1);\n" |
49574 | "}\n" |
49575 | "\n" |
49576 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
49577 | "__blsic_u32(unsigned int __a)\n" |
49578 | "{\n" |
49579 | " return ~__a | (__a - 1);\n" |
49580 | "}\n" |
49581 | "\n" |
49582 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
49583 | "__t1mskc_u32(unsigned int __a)\n" |
49584 | "{\n" |
49585 | " return ~__a | (__a + 1);\n" |
49586 | "}\n" |
49587 | "\n" |
49588 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
49589 | "__tzmsk_u32(unsigned int __a)\n" |
49590 | "{\n" |
49591 | " return ~__a & (__a - 1);\n" |
49592 | "}\n" |
49593 | "\n" |
49594 | "#ifdef __x86_64__\n" |
49595 | "#define __bextri_u64(a, b) \\\n" |
49596 | " ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \\\n" |
49597 | " (unsigned long long)(b)))\n" |
49598 | "\n" |
49599 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
49600 | "__blcfill_u64(unsigned long long __a)\n" |
49601 | "{\n" |
49602 | " return __a & (__a + 1);\n" |
49603 | "}\n" |
49604 | "\n" |
49605 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
49606 | "__blci_u64(unsigned long long __a)\n" |
49607 | "{\n" |
49608 | " return __a | ~(__a + 1);\n" |
49609 | "}\n" |
49610 | "\n" |
49611 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
49612 | "__blcic_u64(unsigned long long __a)\n" |
49613 | "{\n" |
49614 | " return ~__a & (__a + 1);\n" |
49615 | "}\n" |
49616 | "\n" |
49617 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
49618 | "__blcmsk_u64(unsigned long long __a)\n" |
49619 | "{\n" |
49620 | " return __a ^ (__a + 1);\n" |
49621 | "}\n" |
49622 | "\n" |
49623 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
49624 | "__blcs_u64(unsigned long long __a)\n" |
49625 | "{\n" |
49626 | " return __a | (__a + 1);\n" |
49627 | "}\n" |
49628 | "\n" |
49629 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
49630 | "__blsfill_u64(unsigned long long __a)\n" |
49631 | "{\n" |
49632 | " return __a | (__a - 1);\n" |
49633 | "}\n" |
49634 | "\n" |
49635 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
49636 | "__blsic_u64(unsigned long long __a)\n" |
49637 | "{\n" |
49638 | " return ~__a | (__a - 1);\n" |
49639 | "}\n" |
49640 | "\n" |
49641 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
49642 | "__t1mskc_u64(unsigned long long __a)\n" |
49643 | "{\n" |
49644 | " return ~__a | (__a + 1);\n" |
49645 | "}\n" |
49646 | "\n" |
49647 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
49648 | "__tzmsk_u64(unsigned long long __a)\n" |
49649 | "{\n" |
49650 | " return ~__a & (__a - 1);\n" |
49651 | "}\n" |
49652 | "#endif\n" |
49653 | "\n" |
49654 | "#undef __DEFAULT_FN_ATTRS\n" |
49655 | "\n" |
49656 | "#endif /* __TBMINTRIN_H */\n" |
49657 | "" } , |
49658 | { "/builtins/tgmath.h" , "/*===---- tgmath.h - Standard header for type generic math ----------------===*\\\n" |
49659 | " *\n" |
49660 | " * Copyright (c) 2009 Howard Hinnant\n" |
49661 | " *\n" |
49662 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
49663 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
49664 | " * in the Software without restriction, including without limitation the rights\n" |
49665 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
49666 | " * copies of the Software, and to permit persons to whom the Software is\n" |
49667 | " * furnished to do so, subject to the following conditions:\n" |
49668 | " *\n" |
49669 | " * The above copyright notice and this permission notice shall be included in\n" |
49670 | " * all copies or substantial portions of the Software.\n" |
49671 | " *\n" |
49672 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
49673 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
49674 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
49675 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
49676 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
49677 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
49678 | " * THE SOFTWARE.\n" |
49679 | " *\n" |
49680 | "\\*===----------------------------------------------------------------------===*/\n" |
49681 | "\n" |
49682 | "#ifndef __CLANG_TGMATH_H\n" |
49683 | "#define __CLANG_TGMATH_H\n" |
49684 | "\n" |
49685 | "/* C99 7.22 Type-generic math <tgmath.h>. */\n" |
49686 | "#include <math.h>\n" |
49687 | "\n" |
49688 | "/*\n" |
49689 | " * Allow additional definitions and implementation-defined values on Apple\n" |
49690 | " * platforms. This is done after #include <math.h> to avoid depcycle conflicts\n" |
49691 | " * between libcxx and darwin in C++ modules builds.\n" |
49692 | " */\n" |
49693 | "#if defined(__APPLE__) && __STDC_HOSTED__ && __has_include_next(<tgmath.h>)\n" |
49694 | "# include_next <tgmath.h>\n" |
49695 | "#else\n" |
49696 | "\n" |
49697 | "/* C++ handles type genericity with overloading in math.h. */\n" |
49698 | "#ifndef __cplusplus\n" |
49699 | "#include <complex.h>\n" |
49700 | "\n" |
49701 | "#define _TG_ATTRSp __attribute__((__overloadable__))\n" |
49702 | "#define _TG_ATTRS __attribute__((__overloadable__, __always_inline__))\n" |
49703 | "\n" |
49704 | "// promotion\n" |
49705 | "\n" |
49706 | "typedef void _Argument_type_is_not_arithmetic;\n" |
49707 | "static _Argument_type_is_not_arithmetic __tg_promote(...)\n" |
49708 | " __attribute__((__unavailable__,__overloadable__));\n" |
49709 | "static double _TG_ATTRSp __tg_promote(int);\n" |
49710 | "static double _TG_ATTRSp __tg_promote(unsigned int);\n" |
49711 | "static double _TG_ATTRSp __tg_promote(long);\n" |
49712 | "static double _TG_ATTRSp __tg_promote(unsigned long);\n" |
49713 | "static double _TG_ATTRSp __tg_promote(long long);\n" |
49714 | "static double _TG_ATTRSp __tg_promote(unsigned long long);\n" |
49715 | "static float _TG_ATTRSp __tg_promote(float);\n" |
49716 | "static double _TG_ATTRSp __tg_promote(double);\n" |
49717 | "static long double _TG_ATTRSp __tg_promote(long double);\n" |
49718 | "static float _Complex _TG_ATTRSp __tg_promote(float _Complex);\n" |
49719 | "static double _Complex _TG_ATTRSp __tg_promote(double _Complex);\n" |
49720 | "static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex);\n" |
49721 | "\n" |
49722 | "#define __tg_promote1(__x) (__typeof__(__tg_promote(__x)))\n" |
49723 | "#define __tg_promote2(__x, __y) (__typeof__(__tg_promote(__x) + \\\n" |
49724 | " __tg_promote(__y)))\n" |
49725 | "#define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \\\n" |
49726 | " __tg_promote(__y) + \\\n" |
49727 | " __tg_promote(__z)))\n" |
49728 | "\n" |
49729 | "// acos\n" |
49730 | "\n" |
49731 | "static float\n" |
49732 | " _TG_ATTRS\n" |
49733 | " __tg_acos(float __x) {return acosf(__x);}\n" |
49734 | "\n" |
49735 | "static double\n" |
49736 | " _TG_ATTRS\n" |
49737 | " __tg_acos(double __x) {return acos(__x);}\n" |
49738 | "\n" |
49739 | "static long double\n" |
49740 | " _TG_ATTRS\n" |
49741 | " __tg_acos(long double __x) {return acosl(__x);}\n" |
49742 | "\n" |
49743 | "static float _Complex\n" |
49744 | " _TG_ATTRS\n" |
49745 | " __tg_acos(float _Complex __x) {return cacosf(__x);}\n" |
49746 | "\n" |
49747 | "static double _Complex\n" |
49748 | " _TG_ATTRS\n" |
49749 | " __tg_acos(double _Complex __x) {return cacos(__x);}\n" |
49750 | "\n" |
49751 | "static long double _Complex\n" |
49752 | " _TG_ATTRS\n" |
49753 | " __tg_acos(long double _Complex __x) {return cacosl(__x);}\n" |
49754 | "\n" |
49755 | "#undef acos\n" |
49756 | "#define acos(__x) __tg_acos(__tg_promote1((__x))(__x))\n" |
49757 | "\n" |
49758 | "// asin\n" |
49759 | "\n" |
49760 | "static float\n" |
49761 | " _TG_ATTRS\n" |
49762 | " __tg_asin(float __x) {return asinf(__x);}\n" |
49763 | "\n" |
49764 | "static double\n" |
49765 | " _TG_ATTRS\n" |
49766 | " __tg_asin(double __x) {return asin(__x);}\n" |
49767 | "\n" |
49768 | "static long double\n" |
49769 | " _TG_ATTRS\n" |
49770 | " __tg_asin(long double __x) {return asinl(__x);}\n" |
49771 | "\n" |
49772 | "static float _Complex\n" |
49773 | " _TG_ATTRS\n" |
49774 | " __tg_asin(float _Complex __x) {return casinf(__x);}\n" |
49775 | "\n" |
49776 | "static double _Complex\n" |
49777 | " _TG_ATTRS\n" |
49778 | " __tg_asin(double _Complex __x) {return casin(__x);}\n" |
49779 | "\n" |
49780 | "static long double _Complex\n" |
49781 | " _TG_ATTRS\n" |
49782 | " __tg_asin(long double _Complex __x) {return casinl(__x);}\n" |
49783 | "\n" |
49784 | "#undef asin\n" |
49785 | "#define asin(__x) __tg_asin(__tg_promote1((__x))(__x))\n" |
49786 | "\n" |
49787 | "// atan\n" |
49788 | "\n" |
49789 | "static float\n" |
49790 | " _TG_ATTRS\n" |
49791 | " __tg_atan(float __x) {return atanf(__x);}\n" |
49792 | "\n" |
49793 | "static double\n" |
49794 | " _TG_ATTRS\n" |
49795 | " __tg_atan(double __x) {return atan(__x);}\n" |
49796 | "\n" |
49797 | "static long double\n" |
49798 | " _TG_ATTRS\n" |
49799 | " __tg_atan(long double __x) {return atanl(__x);}\n" |
49800 | "\n" |
49801 | "static float _Complex\n" |
49802 | " _TG_ATTRS\n" |
49803 | " __tg_atan(float _Complex __x) {return catanf(__x);}\n" |
49804 | "\n" |
49805 | "static double _Complex\n" |
49806 | " _TG_ATTRS\n" |
49807 | " __tg_atan(double _Complex __x) {return catan(__x);}\n" |
49808 | "\n" |
49809 | "static long double _Complex\n" |
49810 | " _TG_ATTRS\n" |
49811 | " __tg_atan(long double _Complex __x) {return catanl(__x);}\n" |
49812 | "\n" |
49813 | "#undef atan\n" |
49814 | "#define atan(__x) __tg_atan(__tg_promote1((__x))(__x))\n" |
49815 | "\n" |
49816 | "// acosh\n" |
49817 | "\n" |
49818 | "static float\n" |
49819 | " _TG_ATTRS\n" |
49820 | " __tg_acosh(float __x) {return acoshf(__x);}\n" |
49821 | "\n" |
49822 | "static double\n" |
49823 | " _TG_ATTRS\n" |
49824 | " __tg_acosh(double __x) {return acosh(__x);}\n" |
49825 | "\n" |
49826 | "static long double\n" |
49827 | " _TG_ATTRS\n" |
49828 | " __tg_acosh(long double __x) {return acoshl(__x);}\n" |
49829 | "\n" |
49830 | "static float _Complex\n" |
49831 | " _TG_ATTRS\n" |
49832 | " __tg_acosh(float _Complex __x) {return cacoshf(__x);}\n" |
49833 | "\n" |
49834 | "static double _Complex\n" |
49835 | " _TG_ATTRS\n" |
49836 | " __tg_acosh(double _Complex __x) {return cacosh(__x);}\n" |
49837 | "\n" |
49838 | "static long double _Complex\n" |
49839 | " _TG_ATTRS\n" |
49840 | " __tg_acosh(long double _Complex __x) {return cacoshl(__x);}\n" |
49841 | "\n" |
49842 | "#undef acosh\n" |
49843 | "#define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x))\n" |
49844 | "\n" |
49845 | "// asinh\n" |
49846 | "\n" |
49847 | "static float\n" |
49848 | " _TG_ATTRS\n" |
49849 | " __tg_asinh(float __x) {return asinhf(__x);}\n" |
49850 | "\n" |
49851 | "static double\n" |
49852 | " _TG_ATTRS\n" |
49853 | " __tg_asinh(double __x) {return asinh(__x);}\n" |
49854 | "\n" |
49855 | "static long double\n" |
49856 | " _TG_ATTRS\n" |
49857 | " __tg_asinh(long double __x) {return asinhl(__x);}\n" |
49858 | "\n" |
49859 | "static float _Complex\n" |
49860 | " _TG_ATTRS\n" |
49861 | " __tg_asinh(float _Complex __x) {return casinhf(__x);}\n" |
49862 | "\n" |
49863 | "static double _Complex\n" |
49864 | " _TG_ATTRS\n" |
49865 | " __tg_asinh(double _Complex __x) {return casinh(__x);}\n" |
49866 | "\n" |
49867 | "static long double _Complex\n" |
49868 | " _TG_ATTRS\n" |
49869 | " __tg_asinh(long double _Complex __x) {return casinhl(__x);}\n" |
49870 | "\n" |
49871 | "#undef asinh\n" |
49872 | "#define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x))\n" |
49873 | "\n" |
49874 | "// atanh\n" |
49875 | "\n" |
49876 | "static float\n" |
49877 | " _TG_ATTRS\n" |
49878 | " __tg_atanh(float __x) {return atanhf(__x);}\n" |
49879 | "\n" |
49880 | "static double\n" |
49881 | " _TG_ATTRS\n" |
49882 | " __tg_atanh(double __x) {return atanh(__x);}\n" |
49883 | "\n" |
49884 | "static long double\n" |
49885 | " _TG_ATTRS\n" |
49886 | " __tg_atanh(long double __x) {return atanhl(__x);}\n" |
49887 | "\n" |
49888 | "static float _Complex\n" |
49889 | " _TG_ATTRS\n" |
49890 | " __tg_atanh(float _Complex __x) {return catanhf(__x);}\n" |
49891 | "\n" |
49892 | "static double _Complex\n" |
49893 | " _TG_ATTRS\n" |
49894 | " __tg_atanh(double _Complex __x) {return catanh(__x);}\n" |
49895 | "\n" |
49896 | "static long double _Complex\n" |
49897 | " _TG_ATTRS\n" |
49898 | " __tg_atanh(long double _Complex __x) {return catanhl(__x);}\n" |
49899 | "\n" |
49900 | "#undef atanh\n" |
49901 | "#define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x))\n" |
49902 | "\n" |
49903 | "// cos\n" |
49904 | "\n" |
49905 | "static float\n" |
49906 | " _TG_ATTRS\n" |
49907 | " __tg_cos(float __x) {return cosf(__x);}\n" |
49908 | "\n" |
49909 | "static double\n" |
49910 | " _TG_ATTRS\n" |
49911 | " __tg_cos(double __x) {return cos(__x);}\n" |
49912 | "\n" |
49913 | "static long double\n" |
49914 | " _TG_ATTRS\n" |
49915 | " __tg_cos(long double __x) {return cosl(__x);}\n" |
49916 | "\n" |
49917 | "static float _Complex\n" |
49918 | " _TG_ATTRS\n" |
49919 | " __tg_cos(float _Complex __x) {return ccosf(__x);}\n" |
49920 | "\n" |
49921 | "static double _Complex\n" |
49922 | " _TG_ATTRS\n" |
49923 | " __tg_cos(double _Complex __x) {return ccos(__x);}\n" |
49924 | "\n" |
49925 | "static long double _Complex\n" |
49926 | " _TG_ATTRS\n" |
49927 | " __tg_cos(long double _Complex __x) {return ccosl(__x);}\n" |
49928 | "\n" |
49929 | "#undef cos\n" |
49930 | "#define cos(__x) __tg_cos(__tg_promote1((__x))(__x))\n" |
49931 | "\n" |
49932 | "// sin\n" |
49933 | "\n" |
49934 | "static float\n" |
49935 | " _TG_ATTRS\n" |
49936 | " __tg_sin(float __x) {return sinf(__x);}\n" |
49937 | "\n" |
49938 | "static double\n" |
49939 | " _TG_ATTRS\n" |
49940 | " __tg_sin(double __x) {return sin(__x);}\n" |
49941 | "\n" |
49942 | "static long double\n" |
49943 | " _TG_ATTRS\n" |
49944 | " __tg_sin(long double __x) {return sinl(__x);}\n" |
49945 | "\n" |
49946 | "static float _Complex\n" |
49947 | " _TG_ATTRS\n" |
49948 | " __tg_sin(float _Complex __x) {return csinf(__x);}\n" |
49949 | "\n" |
49950 | "static double _Complex\n" |
49951 | " _TG_ATTRS\n" |
49952 | " __tg_sin(double _Complex __x) {return csin(__x);}\n" |
49953 | "\n" |
49954 | "static long double _Complex\n" |
49955 | " _TG_ATTRS\n" |
49956 | " __tg_sin(long double _Complex __x) {return csinl(__x);}\n" |
49957 | "\n" |
49958 | "#undef sin\n" |
49959 | "#define sin(__x) __tg_sin(__tg_promote1((__x))(__x))\n" |
49960 | "\n" |
49961 | "// tan\n" |
49962 | "\n" |
49963 | "static float\n" |
49964 | " _TG_ATTRS\n" |
49965 | " __tg_tan(float __x) {return tanf(__x);}\n" |
49966 | "\n" |
49967 | "static double\n" |
49968 | " _TG_ATTRS\n" |
49969 | " __tg_tan(double __x) {return tan(__x);}\n" |
49970 | "\n" |
49971 | "static long double\n" |
49972 | " _TG_ATTRS\n" |
49973 | " __tg_tan(long double __x) {return tanl(__x);}\n" |
49974 | "\n" |
49975 | "static float _Complex\n" |
49976 | " _TG_ATTRS\n" |
49977 | " __tg_tan(float _Complex __x) {return ctanf(__x);}\n" |
49978 | "\n" |
49979 | "static double _Complex\n" |
49980 | " _TG_ATTRS\n" |
49981 | " __tg_tan(double _Complex __x) {return ctan(__x);}\n" |
49982 | "\n" |
49983 | "static long double _Complex\n" |
49984 | " _TG_ATTRS\n" |
49985 | " __tg_tan(long double _Complex __x) {return ctanl(__x);}\n" |
49986 | "\n" |
49987 | "#undef tan\n" |
49988 | "#define tan(__x) __tg_tan(__tg_promote1((__x))(__x))\n" |
49989 | "\n" |
49990 | "// cosh\n" |
49991 | "\n" |
49992 | "static float\n" |
49993 | " _TG_ATTRS\n" |
49994 | " __tg_cosh(float __x) {return coshf(__x);}\n" |
49995 | "\n" |
49996 | "static double\n" |
49997 | " _TG_ATTRS\n" |
49998 | " __tg_cosh(double __x) {return cosh(__x);}\n" |
49999 | "\n" |
50000 | "static long double\n" |
50001 | " _TG_ATTRS\n" |
50002 | " __tg_cosh(long double __x) {return coshl(__x);}\n" |
50003 | "\n" |
50004 | "static float _Complex\n" |
50005 | " _TG_ATTRS\n" |
50006 | " __tg_cosh(float _Complex __x) {return ccoshf(__x);}\n" |
50007 | "\n" |
50008 | "static double _Complex\n" |
50009 | " _TG_ATTRS\n" |
50010 | " __tg_cosh(double _Complex __x) {return ccosh(__x);}\n" |
50011 | "\n" |
50012 | "static long double _Complex\n" |
50013 | " _TG_ATTRS\n" |
50014 | " __tg_cosh(long double _Complex __x) {return ccoshl(__x);}\n" |
50015 | "\n" |
50016 | "#undef cosh\n" |
50017 | "#define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x))\n" |
50018 | "\n" |
50019 | "// sinh\n" |
50020 | "\n" |
50021 | "static float\n" |
50022 | " _TG_ATTRS\n" |
50023 | " __tg_sinh(float __x) {return sinhf(__x);}\n" |
50024 | "\n" |
50025 | "static double\n" |
50026 | " _TG_ATTRS\n" |
50027 | " __tg_sinh(double __x) {return sinh(__x);}\n" |
50028 | "\n" |
50029 | "static long double\n" |
50030 | " _TG_ATTRS\n" |
50031 | " __tg_sinh(long double __x) {return sinhl(__x);}\n" |
50032 | "\n" |
50033 | "static float _Complex\n" |
50034 | " _TG_ATTRS\n" |
50035 | " __tg_sinh(float _Complex __x) {return csinhf(__x);}\n" |
50036 | "\n" |
50037 | "static double _Complex\n" |
50038 | " _TG_ATTRS\n" |
50039 | " __tg_sinh(double _Complex __x) {return csinh(__x);}\n" |
50040 | "\n" |
50041 | "static long double _Complex\n" |
50042 | " _TG_ATTRS\n" |
50043 | " __tg_sinh(long double _Complex __x) {return csinhl(__x);}\n" |
50044 | "\n" |
50045 | "#undef sinh\n" |
50046 | "#define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x))\n" |
50047 | "\n" |
50048 | "// tanh\n" |
50049 | "\n" |
50050 | "static float\n" |
50051 | " _TG_ATTRS\n" |
50052 | " __tg_tanh(float __x) {return tanhf(__x);}\n" |
50053 | "\n" |
50054 | "static double\n" |
50055 | " _TG_ATTRS\n" |
50056 | " __tg_tanh(double __x) {return tanh(__x);}\n" |
50057 | "\n" |
50058 | "static long double\n" |
50059 | " _TG_ATTRS\n" |
50060 | " __tg_tanh(long double __x) {return tanhl(__x);}\n" |
50061 | "\n" |
50062 | "static float _Complex\n" |
50063 | " _TG_ATTRS\n" |
50064 | " __tg_tanh(float _Complex __x) {return ctanhf(__x);}\n" |
50065 | "\n" |
50066 | "static double _Complex\n" |
50067 | " _TG_ATTRS\n" |
50068 | " __tg_tanh(double _Complex __x) {return ctanh(__x);}\n" |
50069 | "\n" |
50070 | "static long double _Complex\n" |
50071 | " _TG_ATTRS\n" |
50072 | " __tg_tanh(long double _Complex __x) {return ctanhl(__x);}\n" |
50073 | "\n" |
50074 | "#undef tanh\n" |
50075 | "#define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x))\n" |
50076 | "\n" |
50077 | "// exp\n" |
50078 | "\n" |
50079 | "static float\n" |
50080 | " _TG_ATTRS\n" |
50081 | " __tg_exp(float __x) {return expf(__x);}\n" |
50082 | "\n" |
50083 | "static double\n" |
50084 | " _TG_ATTRS\n" |
50085 | " __tg_exp(double __x) {return exp(__x);}\n" |
50086 | "\n" |
50087 | "static long double\n" |
50088 | " _TG_ATTRS\n" |
50089 | " __tg_exp(long double __x) {return expl(__x);}\n" |
50090 | "\n" |
50091 | "static float _Complex\n" |
50092 | " _TG_ATTRS\n" |
50093 | " __tg_exp(float _Complex __x) {return cexpf(__x);}\n" |
50094 | "\n" |
50095 | "static double _Complex\n" |
50096 | " _TG_ATTRS\n" |
50097 | " __tg_exp(double _Complex __x) {return cexp(__x);}\n" |
50098 | "\n" |
50099 | "static long double _Complex\n" |
50100 | " _TG_ATTRS\n" |
50101 | " __tg_exp(long double _Complex __x) {return cexpl(__x);}\n" |
50102 | "\n" |
50103 | "#undef exp\n" |
50104 | "#define exp(__x) __tg_exp(__tg_promote1((__x))(__x))\n" |
50105 | "\n" |
50106 | "// log\n" |
50107 | "\n" |
50108 | "static float\n" |
50109 | " _TG_ATTRS\n" |
50110 | " __tg_log(float __x) {return logf(__x);}\n" |
50111 | "\n" |
50112 | "static double\n" |
50113 | " _TG_ATTRS\n" |
50114 | " __tg_log(double __x) {return log(__x);}\n" |
50115 | "\n" |
50116 | "static long double\n" |
50117 | " _TG_ATTRS\n" |
50118 | " __tg_log(long double __x) {return logl(__x);}\n" |
50119 | "\n" |
50120 | "static float _Complex\n" |
50121 | " _TG_ATTRS\n" |
50122 | " __tg_log(float _Complex __x) {return clogf(__x);}\n" |
50123 | "\n" |
50124 | "static double _Complex\n" |
50125 | " _TG_ATTRS\n" |
50126 | " __tg_log(double _Complex __x) {return clog(__x);}\n" |
50127 | "\n" |
50128 | "static long double _Complex\n" |
50129 | " _TG_ATTRS\n" |
50130 | " __tg_log(long double _Complex __x) {return clogl(__x);}\n" |
50131 | "\n" |
50132 | "#undef log\n" |
50133 | "#define log(__x) __tg_log(__tg_promote1((__x))(__x))\n" |
50134 | "\n" |
50135 | "// pow\n" |
50136 | "\n" |
50137 | "static float\n" |
50138 | " _TG_ATTRS\n" |
50139 | " __tg_pow(float __x, float __y) {return powf(__x, __y);}\n" |
50140 | "\n" |
50141 | "static double\n" |
50142 | " _TG_ATTRS\n" |
50143 | " __tg_pow(double __x, double __y) {return pow(__x, __y);}\n" |
50144 | "\n" |
50145 | "static long double\n" |
50146 | " _TG_ATTRS\n" |
50147 | " __tg_pow(long double __x, long double __y) {return powl(__x, __y);}\n" |
50148 | "\n" |
50149 | "static float _Complex\n" |
50150 | " _TG_ATTRS\n" |
50151 | " __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);}\n" |
50152 | "\n" |
50153 | "static double _Complex\n" |
50154 | " _TG_ATTRS\n" |
50155 | " __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);}\n" |
50156 | "\n" |
50157 | "static long double _Complex\n" |
50158 | " _TG_ATTRS\n" |
50159 | " __tg_pow(long double _Complex __x, long double _Complex __y)\n" |
50160 | " {return cpowl(__x, __y);}\n" |
50161 | "\n" |
50162 | "#undef pow\n" |
50163 | "#define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \\\n" |
50164 | " __tg_promote2((__x), (__y))(__y))\n" |
50165 | "\n" |
50166 | "// sqrt\n" |
50167 | "\n" |
50168 | "static float\n" |
50169 | " _TG_ATTRS\n" |
50170 | " __tg_sqrt(float __x) {return sqrtf(__x);}\n" |
50171 | "\n" |
50172 | "static double\n" |
50173 | " _TG_ATTRS\n" |
50174 | " __tg_sqrt(double __x) {return sqrt(__x);}\n" |
50175 | "\n" |
50176 | "static long double\n" |
50177 | " _TG_ATTRS\n" |
50178 | " __tg_sqrt(long double __x) {return sqrtl(__x);}\n" |
50179 | "\n" |
50180 | "static float _Complex\n" |
50181 | " _TG_ATTRS\n" |
50182 | " __tg_sqrt(float _Complex __x) {return csqrtf(__x);}\n" |
50183 | "\n" |
50184 | "static double _Complex\n" |
50185 | " _TG_ATTRS\n" |
50186 | " __tg_sqrt(double _Complex __x) {return csqrt(__x);}\n" |
50187 | "\n" |
50188 | "static long double _Complex\n" |
50189 | " _TG_ATTRS\n" |
50190 | " __tg_sqrt(long double _Complex __x) {return csqrtl(__x);}\n" |
50191 | "\n" |
50192 | "#undef sqrt\n" |
50193 | "#define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x))\n" |
50194 | "\n" |
50195 | "// fabs\n" |
50196 | "\n" |
50197 | "static float\n" |
50198 | " _TG_ATTRS\n" |
50199 | " __tg_fabs(float __x) {return fabsf(__x);}\n" |
50200 | "\n" |
50201 | "static double\n" |
50202 | " _TG_ATTRS\n" |
50203 | " __tg_fabs(double __x) {return fabs(__x);}\n" |
50204 | "\n" |
50205 | "static long double\n" |
50206 | " _TG_ATTRS\n" |
50207 | " __tg_fabs(long double __x) {return fabsl(__x);}\n" |
50208 | "\n" |
50209 | "static float\n" |
50210 | " _TG_ATTRS\n" |
50211 | " __tg_fabs(float _Complex __x) {return cabsf(__x);}\n" |
50212 | "\n" |
50213 | "static double\n" |
50214 | " _TG_ATTRS\n" |
50215 | " __tg_fabs(double _Complex __x) {return cabs(__x);}\n" |
50216 | "\n" |
50217 | "static long double\n" |
50218 | " _TG_ATTRS\n" |
50219 | " __tg_fabs(long double _Complex __x) {return cabsl(__x);}\n" |
50220 | "\n" |
50221 | "#undef fabs\n" |
50222 | "#define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x))\n" |
50223 | "\n" |
50224 | "// atan2\n" |
50225 | "\n" |
50226 | "static float\n" |
50227 | " _TG_ATTRS\n" |
50228 | " __tg_atan2(float __x, float __y) {return atan2f(__x, __y);}\n" |
50229 | "\n" |
50230 | "static double\n" |
50231 | " _TG_ATTRS\n" |
50232 | " __tg_atan2(double __x, double __y) {return atan2(__x, __y);}\n" |
50233 | "\n" |
50234 | "static long double\n" |
50235 | " _TG_ATTRS\n" |
50236 | " __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);}\n" |
50237 | "\n" |
50238 | "#undef atan2\n" |
50239 | "#define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \\\n" |
50240 | " __tg_promote2((__x), (__y))(__y))\n" |
50241 | "\n" |
50242 | "// cbrt\n" |
50243 | "\n" |
50244 | "static float\n" |
50245 | " _TG_ATTRS\n" |
50246 | " __tg_cbrt(float __x) {return cbrtf(__x);}\n" |
50247 | "\n" |
50248 | "static double\n" |
50249 | " _TG_ATTRS\n" |
50250 | " __tg_cbrt(double __x) {return cbrt(__x);}\n" |
50251 | "\n" |
50252 | "static long double\n" |
50253 | " _TG_ATTRS\n" |
50254 | " __tg_cbrt(long double __x) {return cbrtl(__x);}\n" |
50255 | "\n" |
50256 | "#undef cbrt\n" |
50257 | "#define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x))\n" |
50258 | "\n" |
50259 | "// ceil\n" |
50260 | "\n" |
50261 | "static float\n" |
50262 | " _TG_ATTRS\n" |
50263 | " __tg_ceil(float __x) {return ceilf(__x);}\n" |
50264 | "\n" |
50265 | "static double\n" |
50266 | " _TG_ATTRS\n" |
50267 | " __tg_ceil(double __x) {return ceil(__x);}\n" |
50268 | "\n" |
50269 | "static long double\n" |
50270 | " _TG_ATTRS\n" |
50271 | " __tg_ceil(long double __x) {return ceill(__x);}\n" |
50272 | "\n" |
50273 | "#undef ceil\n" |
50274 | "#define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x))\n" |
50275 | "\n" |
50276 | "// copysign\n" |
50277 | "\n" |
50278 | "static float\n" |
50279 | " _TG_ATTRS\n" |
50280 | " __tg_copysign(float __x, float __y) {return copysignf(__x, __y);}\n" |
50281 | "\n" |
50282 | "static double\n" |
50283 | " _TG_ATTRS\n" |
50284 | " __tg_copysign(double __x, double __y) {return copysign(__x, __y);}\n" |
50285 | "\n" |
50286 | "static long double\n" |
50287 | " _TG_ATTRS\n" |
50288 | " __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);}\n" |
50289 | "\n" |
50290 | "#undef copysign\n" |
50291 | "#define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \\\n" |
50292 | " __tg_promote2((__x), (__y))(__y))\n" |
50293 | "\n" |
50294 | "// erf\n" |
50295 | "\n" |
50296 | "static float\n" |
50297 | " _TG_ATTRS\n" |
50298 | " __tg_erf(float __x) {return erff(__x);}\n" |
50299 | "\n" |
50300 | "static double\n" |
50301 | " _TG_ATTRS\n" |
50302 | " __tg_erf(double __x) {return erf(__x);}\n" |
50303 | "\n" |
50304 | "static long double\n" |
50305 | " _TG_ATTRS\n" |
50306 | " __tg_erf(long double __x) {return erfl(__x);}\n" |
50307 | "\n" |
50308 | "#undef erf\n" |
50309 | "#define erf(__x) __tg_erf(__tg_promote1((__x))(__x))\n" |
50310 | "\n" |
50311 | "// erfc\n" |
50312 | "\n" |
50313 | "static float\n" |
50314 | " _TG_ATTRS\n" |
50315 | " __tg_erfc(float __x) {return erfcf(__x);}\n" |
50316 | "\n" |
50317 | "static double\n" |
50318 | " _TG_ATTRS\n" |
50319 | " __tg_erfc(double __x) {return erfc(__x);}\n" |
50320 | "\n" |
50321 | "static long double\n" |
50322 | " _TG_ATTRS\n" |
50323 | " __tg_erfc(long double __x) {return erfcl(__x);}\n" |
50324 | "\n" |
50325 | "#undef erfc\n" |
50326 | "#define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x))\n" |
50327 | "\n" |
50328 | "// exp2\n" |
50329 | "\n" |
50330 | "static float\n" |
50331 | " _TG_ATTRS\n" |
50332 | " __tg_exp2(float __x) {return exp2f(__x);}\n" |
50333 | "\n" |
50334 | "static double\n" |
50335 | " _TG_ATTRS\n" |
50336 | " __tg_exp2(double __x) {return exp2(__x);}\n" |
50337 | "\n" |
50338 | "static long double\n" |
50339 | " _TG_ATTRS\n" |
50340 | " __tg_exp2(long double __x) {return exp2l(__x);}\n" |
50341 | "\n" |
50342 | "#undef exp2\n" |
50343 | "#define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x))\n" |
50344 | "\n" |
50345 | "// expm1\n" |
50346 | "\n" |
50347 | "static float\n" |
50348 | " _TG_ATTRS\n" |
50349 | " __tg_expm1(float __x) {return expm1f(__x);}\n" |
50350 | "\n" |
50351 | "static double\n" |
50352 | " _TG_ATTRS\n" |
50353 | " __tg_expm1(double __x) {return expm1(__x);}\n" |
50354 | "\n" |
50355 | "static long double\n" |
50356 | " _TG_ATTRS\n" |
50357 | " __tg_expm1(long double __x) {return expm1l(__x);}\n" |
50358 | "\n" |
50359 | "#undef expm1\n" |
50360 | "#define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x))\n" |
50361 | "\n" |
50362 | "// fdim\n" |
50363 | "\n" |
50364 | "static float\n" |
50365 | " _TG_ATTRS\n" |
50366 | " __tg_fdim(float __x, float __y) {return fdimf(__x, __y);}\n" |
50367 | "\n" |
50368 | "static double\n" |
50369 | " _TG_ATTRS\n" |
50370 | " __tg_fdim(double __x, double __y) {return fdim(__x, __y);}\n" |
50371 | "\n" |
50372 | "static long double\n" |
50373 | " _TG_ATTRS\n" |
50374 | " __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);}\n" |
50375 | "\n" |
50376 | "#undef fdim\n" |
50377 | "#define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \\\n" |
50378 | " __tg_promote2((__x), (__y))(__y))\n" |
50379 | "\n" |
50380 | "// floor\n" |
50381 | "\n" |
50382 | "static float\n" |
50383 | " _TG_ATTRS\n" |
50384 | " __tg_floor(float __x) {return floorf(__x);}\n" |
50385 | "\n" |
50386 | "static double\n" |
50387 | " _TG_ATTRS\n" |
50388 | " __tg_floor(double __x) {return floor(__x);}\n" |
50389 | "\n" |
50390 | "static long double\n" |
50391 | " _TG_ATTRS\n" |
50392 | " __tg_floor(long double __x) {return floorl(__x);}\n" |
50393 | "\n" |
50394 | "#undef floor\n" |
50395 | "#define floor(__x) __tg_floor(__tg_promote1((__x))(__x))\n" |
50396 | "\n" |
50397 | "// fma\n" |
50398 | "\n" |
50399 | "static float\n" |
50400 | " _TG_ATTRS\n" |
50401 | " __tg_fma(float __x, float __y, float __z)\n" |
50402 | " {return fmaf(__x, __y, __z);}\n" |
50403 | "\n" |
50404 | "static double\n" |
50405 | " _TG_ATTRS\n" |
50406 | " __tg_fma(double __x, double __y, double __z)\n" |
50407 | " {return fma(__x, __y, __z);}\n" |
50408 | "\n" |
50409 | "static long double\n" |
50410 | " _TG_ATTRS\n" |
50411 | " __tg_fma(long double __x,long double __y, long double __z)\n" |
50412 | " {return fmal(__x, __y, __z);}\n" |
50413 | "\n" |
50414 | "#undef fma\n" |
50415 | "#define fma(__x, __y, __z) \\\n" |
50416 | " __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \\\n" |
50417 | " __tg_promote3((__x), (__y), (__z))(__y), \\\n" |
50418 | " __tg_promote3((__x), (__y), (__z))(__z))\n" |
50419 | "\n" |
50420 | "// fmax\n" |
50421 | "\n" |
50422 | "static float\n" |
50423 | " _TG_ATTRS\n" |
50424 | " __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);}\n" |
50425 | "\n" |
50426 | "static double\n" |
50427 | " _TG_ATTRS\n" |
50428 | " __tg_fmax(double __x, double __y) {return fmax(__x, __y);}\n" |
50429 | "\n" |
50430 | "static long double\n" |
50431 | " _TG_ATTRS\n" |
50432 | " __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);}\n" |
50433 | "\n" |
50434 | "#undef fmax\n" |
50435 | "#define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \\\n" |
50436 | " __tg_promote2((__x), (__y))(__y))\n" |
50437 | "\n" |
50438 | "// fmin\n" |
50439 | "\n" |
50440 | "static float\n" |
50441 | " _TG_ATTRS\n" |
50442 | " __tg_fmin(float __x, float __y) {return fminf(__x, __y);}\n" |
50443 | "\n" |
50444 | "static double\n" |
50445 | " _TG_ATTRS\n" |
50446 | " __tg_fmin(double __x, double __y) {return fmin(__x, __y);}\n" |
50447 | "\n" |
50448 | "static long double\n" |
50449 | " _TG_ATTRS\n" |
50450 | " __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);}\n" |
50451 | "\n" |
50452 | "#undef fmin\n" |
50453 | "#define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \\\n" |
50454 | " __tg_promote2((__x), (__y))(__y))\n" |
50455 | "\n" |
50456 | "// fmod\n" |
50457 | "\n" |
50458 | "static float\n" |
50459 | " _TG_ATTRS\n" |
50460 | " __tg_fmod(float __x, float __y) {return fmodf(__x, __y);}\n" |
50461 | "\n" |
50462 | "static double\n" |
50463 | " _TG_ATTRS\n" |
50464 | " __tg_fmod(double __x, double __y) {return fmod(__x, __y);}\n" |
50465 | "\n" |
50466 | "static long double\n" |
50467 | " _TG_ATTRS\n" |
50468 | " __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);}\n" |
50469 | "\n" |
50470 | "#undef fmod\n" |
50471 | "#define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \\\n" |
50472 | " __tg_promote2((__x), (__y))(__y))\n" |
50473 | "\n" |
50474 | "// frexp\n" |
50475 | "\n" |
50476 | "static float\n" |
50477 | " _TG_ATTRS\n" |
50478 | " __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);}\n" |
50479 | "\n" |
50480 | "static double\n" |
50481 | " _TG_ATTRS\n" |
50482 | " __tg_frexp(double __x, int* __y) {return frexp(__x, __y);}\n" |
50483 | "\n" |
50484 | "static long double\n" |
50485 | " _TG_ATTRS\n" |
50486 | " __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);}\n" |
50487 | "\n" |
50488 | "#undef frexp\n" |
50489 | "#define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y)\n" |
50490 | "\n" |
50491 | "// hypot\n" |
50492 | "\n" |
50493 | "static float\n" |
50494 | " _TG_ATTRS\n" |
50495 | " __tg_hypot(float __x, float __y) {return hypotf(__x, __y);}\n" |
50496 | "\n" |
50497 | "static double\n" |
50498 | " _TG_ATTRS\n" |
50499 | " __tg_hypot(double __x, double __y) {return hypot(__x, __y);}\n" |
50500 | "\n" |
50501 | "static long double\n" |
50502 | " _TG_ATTRS\n" |
50503 | " __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);}\n" |
50504 | "\n" |
50505 | "#undef hypot\n" |
50506 | "#define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \\\n" |
50507 | " __tg_promote2((__x), (__y))(__y))\n" |
50508 | "\n" |
50509 | "// ilogb\n" |
50510 | "\n" |
50511 | "static int\n" |
50512 | " _TG_ATTRS\n" |
50513 | " __tg_ilogb(float __x) {return ilogbf(__x);}\n" |
50514 | "\n" |
50515 | "static int\n" |
50516 | " _TG_ATTRS\n" |
50517 | " __tg_ilogb(double __x) {return ilogb(__x);}\n" |
50518 | "\n" |
50519 | "static int\n" |
50520 | " _TG_ATTRS\n" |
50521 | " __tg_ilogb(long double __x) {return ilogbl(__x);}\n" |
50522 | "\n" |
50523 | "#undef ilogb\n" |
50524 | "#define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x))\n" |
50525 | "\n" |
50526 | "// ldexp\n" |
50527 | "\n" |
50528 | "static float\n" |
50529 | " _TG_ATTRS\n" |
50530 | " __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);}\n" |
50531 | "\n" |
50532 | "static double\n" |
50533 | " _TG_ATTRS\n" |
50534 | " __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);}\n" |
50535 | "\n" |
50536 | "static long double\n" |
50537 | " _TG_ATTRS\n" |
50538 | " __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);}\n" |
50539 | "\n" |
50540 | "#undef ldexp\n" |
50541 | "#define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y)\n" |
50542 | "\n" |
50543 | "// lgamma\n" |
50544 | "\n" |
50545 | "static float\n" |
50546 | " _TG_ATTRS\n" |
50547 | " __tg_lgamma(float __x) {return lgammaf(__x);}\n" |
50548 | "\n" |
50549 | "static double\n" |
50550 | " _TG_ATTRS\n" |
50551 | " __tg_lgamma(double __x) {return lgamma(__x);}\n" |
50552 | "\n" |
50553 | "static long double\n" |
50554 | " _TG_ATTRS\n" |
50555 | " __tg_lgamma(long double __x) {return lgammal(__x);}\n" |
50556 | "\n" |
50557 | "#undef lgamma\n" |
50558 | "#define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x))\n" |
50559 | "\n" |
50560 | "// llrint\n" |
50561 | "\n" |
50562 | "static long long\n" |
50563 | " _TG_ATTRS\n" |
50564 | " __tg_llrint(float __x) {return llrintf(__x);}\n" |
50565 | "\n" |
50566 | "static long long\n" |
50567 | " _TG_ATTRS\n" |
50568 | " __tg_llrint(double __x) {return llrint(__x);}\n" |
50569 | "\n" |
50570 | "static long long\n" |
50571 | " _TG_ATTRS\n" |
50572 | " __tg_llrint(long double __x) {return llrintl(__x);}\n" |
50573 | "\n" |
50574 | "#undef llrint\n" |
50575 | "#define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x))\n" |
50576 | "\n" |
50577 | "// llround\n" |
50578 | "\n" |
50579 | "static long long\n" |
50580 | " _TG_ATTRS\n" |
50581 | " __tg_llround(float __x) {return llroundf(__x);}\n" |
50582 | "\n" |
50583 | "static long long\n" |
50584 | " _TG_ATTRS\n" |
50585 | " __tg_llround(double __x) {return llround(__x);}\n" |
50586 | "\n" |
50587 | "static long long\n" |
50588 | " _TG_ATTRS\n" |
50589 | " __tg_llround(long double __x) {return llroundl(__x);}\n" |
50590 | "\n" |
50591 | "#undef llround\n" |
50592 | "#define llround(__x) __tg_llround(__tg_promote1((__x))(__x))\n" |
50593 | "\n" |
50594 | "// log10\n" |
50595 | "\n" |
50596 | "static float\n" |
50597 | " _TG_ATTRS\n" |
50598 | " __tg_log10(float __x) {return log10f(__x);}\n" |
50599 | "\n" |
50600 | "static double\n" |
50601 | " _TG_ATTRS\n" |
50602 | " __tg_log10(double __x) {return log10(__x);}\n" |
50603 | "\n" |
50604 | "static long double\n" |
50605 | " _TG_ATTRS\n" |
50606 | " __tg_log10(long double __x) {return log10l(__x);}\n" |
50607 | "\n" |
50608 | "#undef log10\n" |
50609 | "#define log10(__x) __tg_log10(__tg_promote1((__x))(__x))\n" |
50610 | "\n" |
50611 | "// log1p\n" |
50612 | "\n" |
50613 | "static float\n" |
50614 | " _TG_ATTRS\n" |
50615 | " __tg_log1p(float __x) {return log1pf(__x);}\n" |
50616 | "\n" |
50617 | "static double\n" |
50618 | " _TG_ATTRS\n" |
50619 | " __tg_log1p(double __x) {return log1p(__x);}\n" |
50620 | "\n" |
50621 | "static long double\n" |
50622 | " _TG_ATTRS\n" |
50623 | " __tg_log1p(long double __x) {return log1pl(__x);}\n" |
50624 | "\n" |
50625 | "#undef log1p\n" |
50626 | "#define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x))\n" |
50627 | "\n" |
50628 | "// log2\n" |
50629 | "\n" |
50630 | "static float\n" |
50631 | " _TG_ATTRS\n" |
50632 | " __tg_log2(float __x) {return log2f(__x);}\n" |
50633 | "\n" |
50634 | "static double\n" |
50635 | " _TG_ATTRS\n" |
50636 | " __tg_log2(double __x) {return log2(__x);}\n" |
50637 | "\n" |
50638 | "static long double\n" |
50639 | " _TG_ATTRS\n" |
50640 | " __tg_log2(long double __x) {return log2l(__x);}\n" |
50641 | "\n" |
50642 | "#undef log2\n" |
50643 | "#define log2(__x) __tg_log2(__tg_promote1((__x))(__x))\n" |
50644 | "\n" |
50645 | "// logb\n" |
50646 | "\n" |
50647 | "static float\n" |
50648 | " _TG_ATTRS\n" |
50649 | " __tg_logb(float __x) {return logbf(__x);}\n" |
50650 | "\n" |
50651 | "static double\n" |
50652 | " _TG_ATTRS\n" |
50653 | " __tg_logb(double __x) {return logb(__x);}\n" |
50654 | "\n" |
50655 | "static long double\n" |
50656 | " _TG_ATTRS\n" |
50657 | " __tg_logb(long double __x) {return logbl(__x);}\n" |
50658 | "\n" |
50659 | "#undef logb\n" |
50660 | "#define logb(__x) __tg_logb(__tg_promote1((__x))(__x))\n" |
50661 | "\n" |
50662 | "// lrint\n" |
50663 | "\n" |
50664 | "static long\n" |
50665 | " _TG_ATTRS\n" |
50666 | " __tg_lrint(float __x) {return lrintf(__x);}\n" |
50667 | "\n" |
50668 | "static long\n" |
50669 | " _TG_ATTRS\n" |
50670 | " __tg_lrint(double __x) {return lrint(__x);}\n" |
50671 | "\n" |
50672 | "static long\n" |
50673 | " _TG_ATTRS\n" |
50674 | " __tg_lrint(long double __x) {return lrintl(__x);}\n" |
50675 | "\n" |
50676 | "#undef lrint\n" |
50677 | "#define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x))\n" |
50678 | "\n" |
50679 | "// lround\n" |
50680 | "\n" |
50681 | "static long\n" |
50682 | " _TG_ATTRS\n" |
50683 | " __tg_lround(float __x) {return lroundf(__x);}\n" |
50684 | "\n" |
50685 | "static long\n" |
50686 | " _TG_ATTRS\n" |
50687 | " __tg_lround(double __x) {return lround(__x);}\n" |
50688 | "\n" |
50689 | "static long\n" |
50690 | " _TG_ATTRS\n" |
50691 | " __tg_lround(long double __x) {return lroundl(__x);}\n" |
50692 | "\n" |
50693 | "#undef lround\n" |
50694 | "#define lround(__x) __tg_lround(__tg_promote1((__x))(__x))\n" |
50695 | "\n" |
50696 | "// nearbyint\n" |
50697 | "\n" |
50698 | "static float\n" |
50699 | " _TG_ATTRS\n" |
50700 | " __tg_nearbyint(float __x) {return nearbyintf(__x);}\n" |
50701 | "\n" |
50702 | "static double\n" |
50703 | " _TG_ATTRS\n" |
50704 | " __tg_nearbyint(double __x) {return nearbyint(__x);}\n" |
50705 | "\n" |
50706 | "static long double\n" |
50707 | " _TG_ATTRS\n" |
50708 | " __tg_nearbyint(long double __x) {return nearbyintl(__x);}\n" |
50709 | "\n" |
50710 | "#undef nearbyint\n" |
50711 | "#define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x))\n" |
50712 | "\n" |
50713 | "// nextafter\n" |
50714 | "\n" |
50715 | "static float\n" |
50716 | " _TG_ATTRS\n" |
50717 | " __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);}\n" |
50718 | "\n" |
50719 | "static double\n" |
50720 | " _TG_ATTRS\n" |
50721 | " __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);}\n" |
50722 | "\n" |
50723 | "static long double\n" |
50724 | " _TG_ATTRS\n" |
50725 | " __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);}\n" |
50726 | "\n" |
50727 | "#undef nextafter\n" |
50728 | "#define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \\\n" |
50729 | " __tg_promote2((__x), (__y))(__y))\n" |
50730 | "\n" |
50731 | "// nexttoward\n" |
50732 | "\n" |
50733 | "static float\n" |
50734 | " _TG_ATTRS\n" |
50735 | " __tg_nexttoward(float __x, long double __y) {return nexttowardf(__x, __y);}\n" |
50736 | "\n" |
50737 | "static double\n" |
50738 | " _TG_ATTRS\n" |
50739 | " __tg_nexttoward(double __x, long double __y) {return nexttoward(__x, __y);}\n" |
50740 | "\n" |
50741 | "static long double\n" |
50742 | " _TG_ATTRS\n" |
50743 | " __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);}\n" |
50744 | "\n" |
50745 | "#undef nexttoward\n" |
50746 | "#define nexttoward(__x, __y) __tg_nexttoward(__tg_promote1((__x))(__x), (__y))\n" |
50747 | "\n" |
50748 | "// remainder\n" |
50749 | "\n" |
50750 | "static float\n" |
50751 | " _TG_ATTRS\n" |
50752 | " __tg_remainder(float __x, float __y) {return remainderf(__x, __y);}\n" |
50753 | "\n" |
50754 | "static double\n" |
50755 | " _TG_ATTRS\n" |
50756 | " __tg_remainder(double __x, double __y) {return remainder(__x, __y);}\n" |
50757 | "\n" |
50758 | "static long double\n" |
50759 | " _TG_ATTRS\n" |
50760 | " __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);}\n" |
50761 | "\n" |
50762 | "#undef remainder\n" |
50763 | "#define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \\\n" |
50764 | " __tg_promote2((__x), (__y))(__y))\n" |
50765 | "\n" |
50766 | "// remquo\n" |
50767 | "\n" |
50768 | "static float\n" |
50769 | " _TG_ATTRS\n" |
50770 | " __tg_remquo(float __x, float __y, int* __z)\n" |
50771 | " {return remquof(__x, __y, __z);}\n" |
50772 | "\n" |
50773 | "static double\n" |
50774 | " _TG_ATTRS\n" |
50775 | " __tg_remquo(double __x, double __y, int* __z)\n" |
50776 | " {return remquo(__x, __y, __z);}\n" |
50777 | "\n" |
50778 | "static long double\n" |
50779 | " _TG_ATTRS\n" |
50780 | " __tg_remquo(long double __x,long double __y, int* __z)\n" |
50781 | " {return remquol(__x, __y, __z);}\n" |
50782 | "\n" |
50783 | "#undef remquo\n" |
50784 | "#define remquo(__x, __y, __z) \\\n" |
50785 | " __tg_remquo(__tg_promote2((__x), (__y))(__x), \\\n" |
50786 | " __tg_promote2((__x), (__y))(__y), \\\n" |
50787 | " (__z))\n" |
50788 | "\n" |
50789 | "// rint\n" |
50790 | "\n" |
50791 | "static float\n" |
50792 | " _TG_ATTRS\n" |
50793 | " __tg_rint(float __x) {return rintf(__x);}\n" |
50794 | "\n" |
50795 | "static double\n" |
50796 | " _TG_ATTRS\n" |
50797 | " __tg_rint(double __x) {return rint(__x);}\n" |
50798 | "\n" |
50799 | "static long double\n" |
50800 | " _TG_ATTRS\n" |
50801 | " __tg_rint(long double __x) {return rintl(__x);}\n" |
50802 | "\n" |
50803 | "#undef rint\n" |
50804 | "#define rint(__x) __tg_rint(__tg_promote1((__x))(__x))\n" |
50805 | "\n" |
50806 | "// round\n" |
50807 | "\n" |
50808 | "static float\n" |
50809 | " _TG_ATTRS\n" |
50810 | " __tg_round(float __x) {return roundf(__x);}\n" |
50811 | "\n" |
50812 | "static double\n" |
50813 | " _TG_ATTRS\n" |
50814 | " __tg_round(double __x) {return round(__x);}\n" |
50815 | "\n" |
50816 | "static long double\n" |
50817 | " _TG_ATTRS\n" |
50818 | " __tg_round(long double __x) {return roundl(__x);}\n" |
50819 | "\n" |
50820 | "#undef round\n" |
50821 | "#define round(__x) __tg_round(__tg_promote1((__x))(__x))\n" |
50822 | "\n" |
50823 | "// scalbn\n" |
50824 | "\n" |
50825 | "static float\n" |
50826 | " _TG_ATTRS\n" |
50827 | " __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);}\n" |
50828 | "\n" |
50829 | "static double\n" |
50830 | " _TG_ATTRS\n" |
50831 | " __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);}\n" |
50832 | "\n" |
50833 | "static long double\n" |
50834 | " _TG_ATTRS\n" |
50835 | " __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);}\n" |
50836 | "\n" |
50837 | "#undef scalbn\n" |
50838 | "#define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y)\n" |
50839 | "\n" |
50840 | "// scalbln\n" |
50841 | "\n" |
50842 | "static float\n" |
50843 | " _TG_ATTRS\n" |
50844 | " __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);}\n" |
50845 | "\n" |
50846 | "static double\n" |
50847 | " _TG_ATTRS\n" |
50848 | " __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);}\n" |
50849 | "\n" |
50850 | "static long double\n" |
50851 | " _TG_ATTRS\n" |
50852 | " __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);}\n" |
50853 | "\n" |
50854 | "#undef scalbln\n" |
50855 | "#define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y)\n" |
50856 | "\n" |
50857 | "// tgamma\n" |
50858 | "\n" |
50859 | "static float\n" |
50860 | " _TG_ATTRS\n" |
50861 | " __tg_tgamma(float __x) {return tgammaf(__x);}\n" |
50862 | "\n" |
50863 | "static double\n" |
50864 | " _TG_ATTRS\n" |
50865 | " __tg_tgamma(double __x) {return tgamma(__x);}\n" |
50866 | "\n" |
50867 | "static long double\n" |
50868 | " _TG_ATTRS\n" |
50869 | " __tg_tgamma(long double __x) {return tgammal(__x);}\n" |
50870 | "\n" |
50871 | "#undef tgamma\n" |
50872 | "#define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x))\n" |
50873 | "\n" |
50874 | "// trunc\n" |
50875 | "\n" |
50876 | "static float\n" |
50877 | " _TG_ATTRS\n" |
50878 | " __tg_trunc(float __x) {return truncf(__x);}\n" |
50879 | "\n" |
50880 | "static double\n" |
50881 | " _TG_ATTRS\n" |
50882 | " __tg_trunc(double __x) {return trunc(__x);}\n" |
50883 | "\n" |
50884 | "static long double\n" |
50885 | " _TG_ATTRS\n" |
50886 | " __tg_trunc(long double __x) {return truncl(__x);}\n" |
50887 | "\n" |
50888 | "#undef trunc\n" |
50889 | "#define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x))\n" |
50890 | "\n" |
50891 | "// carg\n" |
50892 | "\n" |
50893 | "static float\n" |
50894 | " _TG_ATTRS\n" |
50895 | " __tg_carg(float __x) {return atan2f(0.F, __x);}\n" |
50896 | "\n" |
50897 | "static double\n" |
50898 | " _TG_ATTRS\n" |
50899 | " __tg_carg(double __x) {return atan2(0., __x);}\n" |
50900 | "\n" |
50901 | "static long double\n" |
50902 | " _TG_ATTRS\n" |
50903 | " __tg_carg(long double __x) {return atan2l(0.L, __x);}\n" |
50904 | "\n" |
50905 | "static float\n" |
50906 | " _TG_ATTRS\n" |
50907 | " __tg_carg(float _Complex __x) {return cargf(__x);}\n" |
50908 | "\n" |
50909 | "static double\n" |
50910 | " _TG_ATTRS\n" |
50911 | " __tg_carg(double _Complex __x) {return carg(__x);}\n" |
50912 | "\n" |
50913 | "static long double\n" |
50914 | " _TG_ATTRS\n" |
50915 | " __tg_carg(long double _Complex __x) {return cargl(__x);}\n" |
50916 | "\n" |
50917 | "#undef carg\n" |
50918 | "#define carg(__x) __tg_carg(__tg_promote1((__x))(__x))\n" |
50919 | "\n" |
50920 | "// cimag\n" |
50921 | "\n" |
50922 | "static float\n" |
50923 | " _TG_ATTRS\n" |
50924 | " __tg_cimag(float __x) {return 0;}\n" |
50925 | "\n" |
50926 | "static double\n" |
50927 | " _TG_ATTRS\n" |
50928 | " __tg_cimag(double __x) {return 0;}\n" |
50929 | "\n" |
50930 | "static long double\n" |
50931 | " _TG_ATTRS\n" |
50932 | " __tg_cimag(long double __x) {return 0;}\n" |
50933 | "\n" |
50934 | "static float\n" |
50935 | " _TG_ATTRS\n" |
50936 | " __tg_cimag(float _Complex __x) {return cimagf(__x);}\n" |
50937 | "\n" |
50938 | "static double\n" |
50939 | " _TG_ATTRS\n" |
50940 | " __tg_cimag(double _Complex __x) {return cimag(__x);}\n" |
50941 | "\n" |
50942 | "static long double\n" |
50943 | " _TG_ATTRS\n" |
50944 | " __tg_cimag(long double _Complex __x) {return cimagl(__x);}\n" |
50945 | "\n" |
50946 | "#undef cimag\n" |
50947 | "#define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x))\n" |
50948 | "\n" |
50949 | "// conj\n" |
50950 | "\n" |
50951 | "static float _Complex\n" |
50952 | " _TG_ATTRS\n" |
50953 | " __tg_conj(float __x) {return __x;}\n" |
50954 | "\n" |
50955 | "static double _Complex\n" |
50956 | " _TG_ATTRS\n" |
50957 | " __tg_conj(double __x) {return __x;}\n" |
50958 | "\n" |
50959 | "static long double _Complex\n" |
50960 | " _TG_ATTRS\n" |
50961 | " __tg_conj(long double __x) {return __x;}\n" |
50962 | "\n" |
50963 | "static float _Complex\n" |
50964 | " _TG_ATTRS\n" |
50965 | " __tg_conj(float _Complex __x) {return conjf(__x);}\n" |
50966 | "\n" |
50967 | "static double _Complex\n" |
50968 | " _TG_ATTRS\n" |
50969 | " __tg_conj(double _Complex __x) {return conj(__x);}\n" |
50970 | "\n" |
50971 | "static long double _Complex\n" |
50972 | " _TG_ATTRS\n" |
50973 | " __tg_conj(long double _Complex __x) {return conjl(__x);}\n" |
50974 | "\n" |
50975 | "#undef conj\n" |
50976 | "#define conj(__x) __tg_conj(__tg_promote1((__x))(__x))\n" |
50977 | "\n" |
50978 | "// cproj\n" |
50979 | "\n" |
50980 | "static float _Complex\n" |
50981 | " _TG_ATTRS\n" |
50982 | " __tg_cproj(float __x) {return cprojf(__x);}\n" |
50983 | "\n" |
50984 | "static double _Complex\n" |
50985 | " _TG_ATTRS\n" |
50986 | " __tg_cproj(double __x) {return cproj(__x);}\n" |
50987 | "\n" |
50988 | "static long double _Complex\n" |
50989 | " _TG_ATTRS\n" |
50990 | " __tg_cproj(long double __x) {return cprojl(__x);}\n" |
50991 | "\n" |
50992 | "static float _Complex\n" |
50993 | " _TG_ATTRS\n" |
50994 | " __tg_cproj(float _Complex __x) {return cprojf(__x);}\n" |
50995 | "\n" |
50996 | "static double _Complex\n" |
50997 | " _TG_ATTRS\n" |
50998 | " __tg_cproj(double _Complex __x) {return cproj(__x);}\n" |
50999 | "\n" |
51000 | "static long double _Complex\n" |
51001 | " _TG_ATTRS\n" |
51002 | " __tg_cproj(long double _Complex __x) {return cprojl(__x);}\n" |
51003 | "\n" |
51004 | "#undef cproj\n" |
51005 | "#define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x))\n" |
51006 | "\n" |
51007 | "// creal\n" |
51008 | "\n" |
51009 | "static float\n" |
51010 | " _TG_ATTRS\n" |
51011 | " __tg_creal(float __x) {return __x;}\n" |
51012 | "\n" |
51013 | "static double\n" |
51014 | " _TG_ATTRS\n" |
51015 | " __tg_creal(double __x) {return __x;}\n" |
51016 | "\n" |
51017 | "static long double\n" |
51018 | " _TG_ATTRS\n" |
51019 | " __tg_creal(long double __x) {return __x;}\n" |
51020 | "\n" |
51021 | "static float\n" |
51022 | " _TG_ATTRS\n" |
51023 | " __tg_creal(float _Complex __x) {return crealf(__x);}\n" |
51024 | "\n" |
51025 | "static double\n" |
51026 | " _TG_ATTRS\n" |
51027 | " __tg_creal(double _Complex __x) {return creal(__x);}\n" |
51028 | "\n" |
51029 | "static long double\n" |
51030 | " _TG_ATTRS\n" |
51031 | " __tg_creal(long double _Complex __x) {return creall(__x);}\n" |
51032 | "\n" |
51033 | "#undef creal\n" |
51034 | "#define creal(__x) __tg_creal(__tg_promote1((__x))(__x))\n" |
51035 | "\n" |
51036 | "#undef _TG_ATTRSp\n" |
51037 | "#undef _TG_ATTRS\n" |
51038 | "\n" |
51039 | "#endif /* __cplusplus */\n" |
51040 | "#endif /* __has_include_next */\n" |
51041 | "#endif /* __CLANG_TGMATH_H */\n" |
51042 | "" } , |
51043 | { "/builtins/tmmintrin.h" , "/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===\n" |
51044 | " *\n" |
51045 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
51046 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
51047 | " * in the Software without restriction, including without limitation the rights\n" |
51048 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
51049 | " * copies of the Software, and to permit persons to whom the Software is\n" |
51050 | " * furnished to do so, subject to the following conditions:\n" |
51051 | " *\n" |
51052 | " * The above copyright notice and this permission notice shall be included in\n" |
51053 | " * all copies or substantial portions of the Software.\n" |
51054 | " *\n" |
51055 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
51056 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
51057 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
51058 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
51059 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
51060 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
51061 | " * THE SOFTWARE.\n" |
51062 | " *\n" |
51063 | " *===-----------------------------------------------------------------------===\n" |
51064 | " */\n" |
51065 | "\n" |
51066 | "#ifndef __TMMINTRIN_H\n" |
51067 | "#define __TMMINTRIN_H\n" |
51068 | "\n" |
51069 | "#include <pmmintrin.h>\n" |
51070 | "\n" |
51071 | "/* Define the default attributes for the functions in this file. */\n" |
51072 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"ssse3\"), __min_vector_width__(64)))\n" |
51073 | "#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,ssse3\"), __min_vector_width__(64)))\n" |
51074 | "\n" |
51075 | "/// Computes the absolute value of each of the packed 8-bit signed\n" |
51076 | "/// integers in the source operand and stores the 8-bit unsigned integer\n" |
51077 | "/// results in the destination.\n" |
51078 | "///\n" |
51079 | "/// \\headerfile <x86intrin.h>\n" |
51080 | "///\n" |
51081 | "/// This intrinsic corresponds to the \\c PABSB instruction.\n" |
51082 | "///\n" |
51083 | "/// \\param __a\n" |
51084 | "/// A 64-bit vector of [8 x i8].\n" |
51085 | "/// \\returns A 64-bit integer vector containing the absolute values of the\n" |
51086 | "/// elements in the operand.\n" |
51087 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51088 | "_mm_abs_pi8(__m64 __a)\n" |
51089 | "{\n" |
51090 | " return (__m64)__builtin_ia32_pabsb((__v8qi)__a);\n" |
51091 | "}\n" |
51092 | "\n" |
51093 | "/// Computes the absolute value of each of the packed 8-bit signed\n" |
51094 | "/// integers in the source operand and stores the 8-bit unsigned integer\n" |
51095 | "/// results in the destination.\n" |
51096 | "///\n" |
51097 | "/// \\headerfile <x86intrin.h>\n" |
51098 | "///\n" |
51099 | "/// This intrinsic corresponds to the \\c VPABSB instruction.\n" |
51100 | "///\n" |
51101 | "/// \\param __a\n" |
51102 | "/// A 128-bit vector of [16 x i8].\n" |
51103 | "/// \\returns A 128-bit integer vector containing the absolute values of the\n" |
51104 | "/// elements in the operand.\n" |
51105 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51106 | "_mm_abs_epi8(__m128i __a)\n" |
51107 | "{\n" |
51108 | " return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);\n" |
51109 | "}\n" |
51110 | "\n" |
51111 | "/// Computes the absolute value of each of the packed 16-bit signed\n" |
51112 | "/// integers in the source operand and stores the 16-bit unsigned integer\n" |
51113 | "/// results in the destination.\n" |
51114 | "///\n" |
51115 | "/// \\headerfile <x86intrin.h>\n" |
51116 | "///\n" |
51117 | "/// This intrinsic corresponds to the \\c PABSW instruction.\n" |
51118 | "///\n" |
51119 | "/// \\param __a\n" |
51120 | "/// A 64-bit vector of [4 x i16].\n" |
51121 | "/// \\returns A 64-bit integer vector containing the absolute values of the\n" |
51122 | "/// elements in the operand.\n" |
51123 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51124 | "_mm_abs_pi16(__m64 __a)\n" |
51125 | "{\n" |
51126 | " return (__m64)__builtin_ia32_pabsw((__v4hi)__a);\n" |
51127 | "}\n" |
51128 | "\n" |
51129 | "/// Computes the absolute value of each of the packed 16-bit signed\n" |
51130 | "/// integers in the source operand and stores the 16-bit unsigned integer\n" |
51131 | "/// results in the destination.\n" |
51132 | "///\n" |
51133 | "/// \\headerfile <x86intrin.h>\n" |
51134 | "///\n" |
51135 | "/// This intrinsic corresponds to the \\c VPABSW instruction.\n" |
51136 | "///\n" |
51137 | "/// \\param __a\n" |
51138 | "/// A 128-bit vector of [8 x i16].\n" |
51139 | "/// \\returns A 128-bit integer vector containing the absolute values of the\n" |
51140 | "/// elements in the operand.\n" |
51141 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51142 | "_mm_abs_epi16(__m128i __a)\n" |
51143 | "{\n" |
51144 | " return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);\n" |
51145 | "}\n" |
51146 | "\n" |
51147 | "/// Computes the absolute value of each of the packed 32-bit signed\n" |
51148 | "/// integers in the source operand and stores the 32-bit unsigned integer\n" |
51149 | "/// results in the destination.\n" |
51150 | "///\n" |
51151 | "/// \\headerfile <x86intrin.h>\n" |
51152 | "///\n" |
51153 | "/// This intrinsic corresponds to the \\c PABSD instruction.\n" |
51154 | "///\n" |
51155 | "/// \\param __a\n" |
51156 | "/// A 64-bit vector of [2 x i32].\n" |
51157 | "/// \\returns A 64-bit integer vector containing the absolute values of the\n" |
51158 | "/// elements in the operand.\n" |
51159 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51160 | "_mm_abs_pi32(__m64 __a)\n" |
51161 | "{\n" |
51162 | " return (__m64)__builtin_ia32_pabsd((__v2si)__a);\n" |
51163 | "}\n" |
51164 | "\n" |
51165 | "/// Computes the absolute value of each of the packed 32-bit signed\n" |
51166 | "/// integers in the source operand and stores the 32-bit unsigned integer\n" |
51167 | "/// results in the destination.\n" |
51168 | "///\n" |
51169 | "/// \\headerfile <x86intrin.h>\n" |
51170 | "///\n" |
51171 | "/// This intrinsic corresponds to the \\c VPABSD instruction.\n" |
51172 | "///\n" |
51173 | "/// \\param __a\n" |
51174 | "/// A 128-bit vector of [4 x i32].\n" |
51175 | "/// \\returns A 128-bit integer vector containing the absolute values of the\n" |
51176 | "/// elements in the operand.\n" |
51177 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51178 | "_mm_abs_epi32(__m128i __a)\n" |
51179 | "{\n" |
51180 | " return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);\n" |
51181 | "}\n" |
51182 | "\n" |
51183 | "/// Concatenates the two 128-bit integer vector operands, and\n" |
51184 | "/// right-shifts the result by the number of bytes specified in the immediate\n" |
51185 | "/// operand.\n" |
51186 | "///\n" |
51187 | "/// \\headerfile <x86intrin.h>\n" |
51188 | "///\n" |
51189 | "/// \\code\n" |
51190 | "/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);\n" |
51191 | "/// \\endcode\n" |
51192 | "///\n" |
51193 | "/// This intrinsic corresponds to the \\c PALIGNR instruction.\n" |
51194 | "///\n" |
51195 | "/// \\param a\n" |
51196 | "/// A 128-bit vector of [16 x i8] containing one of the source operands.\n" |
51197 | "/// \\param b\n" |
51198 | "/// A 128-bit vector of [16 x i8] containing one of the source operands.\n" |
51199 | "/// \\param n\n" |
51200 | "/// An immediate operand specifying how many bytes to right-shift the result.\n" |
51201 | "/// \\returns A 128-bit integer vector containing the concatenated right-shifted\n" |
51202 | "/// value.\n" |
51203 | "#define _mm_alignr_epi8(a, b, n) \\\n" |
51204 | " (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \\\n" |
51205 | " (__v16qi)(__m128i)(b), (n))\n" |
51206 | "\n" |
51207 | "/// Concatenates the two 64-bit integer vector operands, and right-shifts\n" |
51208 | "/// the result by the number of bytes specified in the immediate operand.\n" |
51209 | "///\n" |
51210 | "/// \\headerfile <x86intrin.h>\n" |
51211 | "///\n" |
51212 | "/// \\code\n" |
51213 | "/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);\n" |
51214 | "/// \\endcode\n" |
51215 | "///\n" |
51216 | "/// This intrinsic corresponds to the \\c PALIGNR instruction.\n" |
51217 | "///\n" |
51218 | "/// \\param a\n" |
51219 | "/// A 64-bit vector of [8 x i8] containing one of the source operands.\n" |
51220 | "/// \\param b\n" |
51221 | "/// A 64-bit vector of [8 x i8] containing one of the source operands.\n" |
51222 | "/// \\param n\n" |
51223 | "/// An immediate operand specifying how many bytes to right-shift the result.\n" |
51224 | "/// \\returns A 64-bit integer vector containing the concatenated right-shifted\n" |
51225 | "/// value.\n" |
51226 | "#define _mm_alignr_pi8(a, b, n) \\\n" |
51227 | " (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))\n" |
51228 | "\n" |
51229 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
51230 | "/// 128-bit vectors of [8 x i16].\n" |
51231 | "///\n" |
51232 | "/// \\headerfile <x86intrin.h>\n" |
51233 | "///\n" |
51234 | "/// This intrinsic corresponds to the \\c VPHADDW instruction.\n" |
51235 | "///\n" |
51236 | "/// \\param __a\n" |
51237 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
51238 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
51239 | "/// destination.\n" |
51240 | "/// \\param __b\n" |
51241 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
51242 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
51243 | "/// destination.\n" |
51244 | "/// \\returns A 128-bit vector of [8 x i16] containing the horizontal sums of\n" |
51245 | "/// both operands.\n" |
51246 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51247 | "_mm_hadd_epi16(__m128i __a, __m128i __b)\n" |
51248 | "{\n" |
51249 | " return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);\n" |
51250 | "}\n" |
51251 | "\n" |
51252 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
51253 | "/// 128-bit vectors of [4 x i32].\n" |
51254 | "///\n" |
51255 | "/// \\headerfile <x86intrin.h>\n" |
51256 | "///\n" |
51257 | "/// This intrinsic corresponds to the \\c VPHADDD instruction.\n" |
51258 | "///\n" |
51259 | "/// \\param __a\n" |
51260 | "/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n" |
51261 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
51262 | "/// destination.\n" |
51263 | "/// \\param __b\n" |
51264 | "/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n" |
51265 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
51266 | "/// destination.\n" |
51267 | "/// \\returns A 128-bit vector of [4 x i32] containing the horizontal sums of\n" |
51268 | "/// both operands.\n" |
51269 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51270 | "_mm_hadd_epi32(__m128i __a, __m128i __b)\n" |
51271 | "{\n" |
51272 | " return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);\n" |
51273 | "}\n" |
51274 | "\n" |
51275 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
51276 | "/// 64-bit vectors of [4 x i16].\n" |
51277 | "///\n" |
51278 | "/// \\headerfile <x86intrin.h>\n" |
51279 | "///\n" |
51280 | "/// This intrinsic corresponds to the \\c PHADDW instruction.\n" |
51281 | "///\n" |
51282 | "/// \\param __a\n" |
51283 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
51284 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
51285 | "/// destination.\n" |
51286 | "/// \\param __b\n" |
51287 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
51288 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
51289 | "/// destination.\n" |
51290 | "/// \\returns A 64-bit vector of [4 x i16] containing the horizontal sums of both\n" |
51291 | "/// operands.\n" |
51292 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51293 | "_mm_hadd_pi16(__m64 __a, __m64 __b)\n" |
51294 | "{\n" |
51295 | " return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);\n" |
51296 | "}\n" |
51297 | "\n" |
51298 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
51299 | "/// 64-bit vectors of [2 x i32].\n" |
51300 | "///\n" |
51301 | "/// \\headerfile <x86intrin.h>\n" |
51302 | "///\n" |
51303 | "/// This intrinsic corresponds to the \\c PHADDD instruction.\n" |
51304 | "///\n" |
51305 | "/// \\param __a\n" |
51306 | "/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n" |
51307 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
51308 | "/// destination.\n" |
51309 | "/// \\param __b\n" |
51310 | "/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n" |
51311 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
51312 | "/// destination.\n" |
51313 | "/// \\returns A 64-bit vector of [2 x i32] containing the horizontal sums of both\n" |
51314 | "/// operands.\n" |
51315 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51316 | "_mm_hadd_pi32(__m64 __a, __m64 __b)\n" |
51317 | "{\n" |
51318 | " return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);\n" |
51319 | "}\n" |
51320 | "\n" |
51321 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
51322 | "/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are\n" |
51323 | "/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n" |
51324 | "/// 0x8000.\n" |
51325 | "///\n" |
51326 | "/// \\headerfile <x86intrin.h>\n" |
51327 | "///\n" |
51328 | "/// This intrinsic corresponds to the \\c VPHADDSW instruction.\n" |
51329 | "///\n" |
51330 | "/// \\param __a\n" |
51331 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
51332 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
51333 | "/// destination.\n" |
51334 | "/// \\param __b\n" |
51335 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
51336 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
51337 | "/// destination.\n" |
51338 | "/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n" |
51339 | "/// sums of both operands.\n" |
51340 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51341 | "_mm_hadds_epi16(__m128i __a, __m128i __b)\n" |
51342 | "{\n" |
51343 | " return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);\n" |
51344 | "}\n" |
51345 | "\n" |
51346 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
51347 | "/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are\n" |
51348 | "/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n" |
51349 | "/// 0x8000.\n" |
51350 | "///\n" |
51351 | "/// \\headerfile <x86intrin.h>\n" |
51352 | "///\n" |
51353 | "/// This intrinsic corresponds to the \\c PHADDSW instruction.\n" |
51354 | "///\n" |
51355 | "/// \\param __a\n" |
51356 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
51357 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
51358 | "/// destination.\n" |
51359 | "/// \\param __b\n" |
51360 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
51361 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
51362 | "/// destination.\n" |
51363 | "/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n" |
51364 | "/// sums of both operands.\n" |
51365 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51366 | "_mm_hadds_pi16(__m64 __a, __m64 __b)\n" |
51367 | "{\n" |
51368 | " return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);\n" |
51369 | "}\n" |
51370 | "\n" |
51371 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
51372 | "/// packed 128-bit vectors of [8 x i16].\n" |
51373 | "///\n" |
51374 | "/// \\headerfile <x86intrin.h>\n" |
51375 | "///\n" |
51376 | "/// This intrinsic corresponds to the \\c VPHSUBW instruction.\n" |
51377 | "///\n" |
51378 | "/// \\param __a\n" |
51379 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
51380 | "/// horizontal differences between the values are stored in the lower bits of\n" |
51381 | "/// the destination.\n" |
51382 | "/// \\param __b\n" |
51383 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
51384 | "/// horizontal differences between the values are stored in the upper bits of\n" |
51385 | "/// the destination.\n" |
51386 | "/// \\returns A 128-bit vector of [8 x i16] containing the horizontal differences\n" |
51387 | "/// of both operands.\n" |
51388 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51389 | "_mm_hsub_epi16(__m128i __a, __m128i __b)\n" |
51390 | "{\n" |
51391 | " return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);\n" |
51392 | "}\n" |
51393 | "\n" |
51394 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
51395 | "/// packed 128-bit vectors of [4 x i32].\n" |
51396 | "///\n" |
51397 | "/// \\headerfile <x86intrin.h>\n" |
51398 | "///\n" |
51399 | "/// This intrinsic corresponds to the \\c VPHSUBD instruction.\n" |
51400 | "///\n" |
51401 | "/// \\param __a\n" |
51402 | "/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n" |
51403 | "/// horizontal differences between the values are stored in the lower bits of\n" |
51404 | "/// the destination.\n" |
51405 | "/// \\param __b\n" |
51406 | "/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n" |
51407 | "/// horizontal differences between the values are stored in the upper bits of\n" |
51408 | "/// the destination.\n" |
51409 | "/// \\returns A 128-bit vector of [4 x i32] containing the horizontal differences\n" |
51410 | "/// of both operands.\n" |
51411 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51412 | "_mm_hsub_epi32(__m128i __a, __m128i __b)\n" |
51413 | "{\n" |
51414 | " return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);\n" |
51415 | "}\n" |
51416 | "\n" |
51417 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
51418 | "/// packed 64-bit vectors of [4 x i16].\n" |
51419 | "///\n" |
51420 | "/// \\headerfile <x86intrin.h>\n" |
51421 | "///\n" |
51422 | "/// This intrinsic corresponds to the \\c PHSUBW instruction.\n" |
51423 | "///\n" |
51424 | "/// \\param __a\n" |
51425 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
51426 | "/// horizontal differences between the values are stored in the lower bits of\n" |
51427 | "/// the destination.\n" |
51428 | "/// \\param __b\n" |
51429 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
51430 | "/// horizontal differences between the values are stored in the upper bits of\n" |
51431 | "/// the destination.\n" |
51432 | "/// \\returns A 64-bit vector of [4 x i16] containing the horizontal differences\n" |
51433 | "/// of both operands.\n" |
51434 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51435 | "_mm_hsub_pi16(__m64 __a, __m64 __b)\n" |
51436 | "{\n" |
51437 | " return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);\n" |
51438 | "}\n" |
51439 | "\n" |
51440 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
51441 | "/// packed 64-bit vectors of [2 x i32].\n" |
51442 | "///\n" |
51443 | "/// \\headerfile <x86intrin.h>\n" |
51444 | "///\n" |
51445 | "/// This intrinsic corresponds to the \\c PHSUBD instruction.\n" |
51446 | "///\n" |
51447 | "/// \\param __a\n" |
51448 | "/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n" |
51449 | "/// horizontal differences between the values are stored in the lower bits of\n" |
51450 | "/// the destination.\n" |
51451 | "/// \\param __b\n" |
51452 | "/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n" |
51453 | "/// horizontal differences between the values are stored in the upper bits of\n" |
51454 | "/// the destination.\n" |
51455 | "/// \\returns A 64-bit vector of [2 x i32] containing the horizontal differences\n" |
51456 | "/// of both operands.\n" |
51457 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51458 | "_mm_hsub_pi32(__m64 __a, __m64 __b)\n" |
51459 | "{\n" |
51460 | " return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);\n" |
51461 | "}\n" |
51462 | "\n" |
51463 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
51464 | "/// packed 128-bit vectors of [8 x i16]. Positive differences greater than\n" |
51465 | "/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n" |
51466 | "/// saturated to 0x8000.\n" |
51467 | "///\n" |
51468 | "/// \\headerfile <x86intrin.h>\n" |
51469 | "///\n" |
51470 | "/// This intrinsic corresponds to the \\c VPHSUBSW instruction.\n" |
51471 | "///\n" |
51472 | "/// \\param __a\n" |
51473 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
51474 | "/// horizontal differences between the values are stored in the lower bits of\n" |
51475 | "/// the destination.\n" |
51476 | "/// \\param __b\n" |
51477 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
51478 | "/// horizontal differences between the values are stored in the upper bits of\n" |
51479 | "/// the destination.\n" |
51480 | "/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n" |
51481 | "/// differences of both operands.\n" |
51482 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51483 | "_mm_hsubs_epi16(__m128i __a, __m128i __b)\n" |
51484 | "{\n" |
51485 | " return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);\n" |
51486 | "}\n" |
51487 | "\n" |
51488 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
51489 | "/// packed 64-bit vectors of [4 x i16]. Positive differences greater than\n" |
51490 | "/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n" |
51491 | "/// saturated to 0x8000.\n" |
51492 | "///\n" |
51493 | "/// \\headerfile <x86intrin.h>\n" |
51494 | "///\n" |
51495 | "/// This intrinsic corresponds to the \\c PHSUBSW instruction.\n" |
51496 | "///\n" |
51497 | "/// \\param __a\n" |
51498 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
51499 | "/// horizontal differences between the values are stored in the lower bits of\n" |
51500 | "/// the destination.\n" |
51501 | "/// \\param __b\n" |
51502 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
51503 | "/// horizontal differences between the values are stored in the upper bits of\n" |
51504 | "/// the destination.\n" |
51505 | "/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n" |
51506 | "/// differences of both operands.\n" |
51507 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51508 | "_mm_hsubs_pi16(__m64 __a, __m64 __b)\n" |
51509 | "{\n" |
51510 | " return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);\n" |
51511 | "}\n" |
51512 | "\n" |
51513 | "/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n" |
51514 | "/// values contained in the first source operand and packed 8-bit signed\n" |
51515 | "/// integer values contained in the second source operand, adds pairs of\n" |
51516 | "/// contiguous products with signed saturation, and writes the 16-bit sums to\n" |
51517 | "/// the corresponding bits in the destination.\n" |
51518 | "///\n" |
51519 | "/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n" |
51520 | "/// both operands are multiplied, and the sum of both results is written to\n" |
51521 | "/// bits [15:0] of the destination.\n" |
51522 | "///\n" |
51523 | "/// \\headerfile <x86intrin.h>\n" |
51524 | "///\n" |
51525 | "/// This intrinsic corresponds to the \\c VPMADDUBSW instruction.\n" |
51526 | "///\n" |
51527 | "/// \\param __a\n" |
51528 | "/// A 128-bit integer vector containing the first source operand.\n" |
51529 | "/// \\param __b\n" |
51530 | "/// A 128-bit integer vector containing the second source operand.\n" |
51531 | "/// \\returns A 128-bit integer vector containing the sums of products of both\n" |
51532 | "/// operands: \\n\n" |
51533 | "/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n" |
51534 | "/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n" |
51535 | "/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n" |
51536 | "/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7) \\n\n" |
51537 | "/// \\a R4 := (\\a __a8 * \\a __b8) + (\\a __a9 * \\a __b9) \\n\n" |
51538 | "/// \\a R5 := (\\a __a10 * \\a __b10) + (\\a __a11 * \\a __b11) \\n\n" |
51539 | "/// \\a R6 := (\\a __a12 * \\a __b12) + (\\a __a13 * \\a __b13) \\n\n" |
51540 | "/// \\a R7 := (\\a __a14 * \\a __b14) + (\\a __a15 * \\a __b15)\n" |
51541 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51542 | "_mm_maddubs_epi16(__m128i __a, __m128i __b)\n" |
51543 | "{\n" |
51544 | " return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);\n" |
51545 | "}\n" |
51546 | "\n" |
51547 | "/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n" |
51548 | "/// values contained in the first source operand and packed 8-bit signed\n" |
51549 | "/// integer values contained in the second source operand, adds pairs of\n" |
51550 | "/// contiguous products with signed saturation, and writes the 16-bit sums to\n" |
51551 | "/// the corresponding bits in the destination.\n" |
51552 | "///\n" |
51553 | "/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n" |
51554 | "/// both operands are multiplied, and the sum of both results is written to\n" |
51555 | "/// bits [15:0] of the destination.\n" |
51556 | "///\n" |
51557 | "/// \\headerfile <x86intrin.h>\n" |
51558 | "///\n" |
51559 | "/// This intrinsic corresponds to the \\c PMADDUBSW instruction.\n" |
51560 | "///\n" |
51561 | "/// \\param __a\n" |
51562 | "/// A 64-bit integer vector containing the first source operand.\n" |
51563 | "/// \\param __b\n" |
51564 | "/// A 64-bit integer vector containing the second source operand.\n" |
51565 | "/// \\returns A 64-bit integer vector containing the sums of products of both\n" |
51566 | "/// operands: \\n\n" |
51567 | "/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n" |
51568 | "/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n" |
51569 | "/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n" |
51570 | "/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7)\n" |
51571 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51572 | "_mm_maddubs_pi16(__m64 __a, __m64 __b)\n" |
51573 | "{\n" |
51574 | " return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);\n" |
51575 | "}\n" |
51576 | "\n" |
51577 | "/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n" |
51578 | "/// products to the 18 most significant bits by right-shifting, rounds the\n" |
51579 | "/// truncated value by adding 1, and writes bits [16:1] to the destination.\n" |
51580 | "///\n" |
51581 | "/// \\headerfile <x86intrin.h>\n" |
51582 | "///\n" |
51583 | "/// This intrinsic corresponds to the \\c VPMULHRSW instruction.\n" |
51584 | "///\n" |
51585 | "/// \\param __a\n" |
51586 | "/// A 128-bit vector of [8 x i16] containing one of the source operands.\n" |
51587 | "/// \\param __b\n" |
51588 | "/// A 128-bit vector of [8 x i16] containing one of the source operands.\n" |
51589 | "/// \\returns A 128-bit vector of [8 x i16] containing the rounded and scaled\n" |
51590 | "/// products of both operands.\n" |
51591 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51592 | "_mm_mulhrs_epi16(__m128i __a, __m128i __b)\n" |
51593 | "{\n" |
51594 | " return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);\n" |
51595 | "}\n" |
51596 | "\n" |
51597 | "/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n" |
51598 | "/// products to the 18 most significant bits by right-shifting, rounds the\n" |
51599 | "/// truncated value by adding 1, and writes bits [16:1] to the destination.\n" |
51600 | "///\n" |
51601 | "/// \\headerfile <x86intrin.h>\n" |
51602 | "///\n" |
51603 | "/// This intrinsic corresponds to the \\c PMULHRSW instruction.\n" |
51604 | "///\n" |
51605 | "/// \\param __a\n" |
51606 | "/// A 64-bit vector of [4 x i16] containing one of the source operands.\n" |
51607 | "/// \\param __b\n" |
51608 | "/// A 64-bit vector of [4 x i16] containing one of the source operands.\n" |
51609 | "/// \\returns A 64-bit vector of [4 x i16] containing the rounded and scaled\n" |
51610 | "/// products of both operands.\n" |
51611 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51612 | "_mm_mulhrs_pi16(__m64 __a, __m64 __b)\n" |
51613 | "{\n" |
51614 | " return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);\n" |
51615 | "}\n" |
51616 | "\n" |
51617 | "/// Copies the 8-bit integers from a 128-bit integer vector to the\n" |
51618 | "/// destination or clears 8-bit values in the destination, as specified by\n" |
51619 | "/// the second source operand.\n" |
51620 | "///\n" |
51621 | "/// \\headerfile <x86intrin.h>\n" |
51622 | "///\n" |
51623 | "/// This intrinsic corresponds to the \\c VPSHUFB instruction.\n" |
51624 | "///\n" |
51625 | "/// \\param __a\n" |
51626 | "/// A 128-bit integer vector containing the values to be copied.\n" |
51627 | "/// \\param __b\n" |
51628 | "/// A 128-bit integer vector containing control bytes corresponding to\n" |
51629 | "/// positions in the destination:\n" |
51630 | "/// Bit 7: \\n\n" |
51631 | "/// 1: Clear the corresponding byte in the destination. \\n\n" |
51632 | "/// 0: Copy the selected source byte to the corresponding byte in the\n" |
51633 | "/// destination. \\n\n" |
51634 | "/// Bits [6:4] Reserved. \\n\n" |
51635 | "/// Bits [3:0] select the source byte to be copied.\n" |
51636 | "/// \\returns A 128-bit integer vector containing the copied or cleared values.\n" |
51637 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51638 | "_mm_shuffle_epi8(__m128i __a, __m128i __b)\n" |
51639 | "{\n" |
51640 | " return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);\n" |
51641 | "}\n" |
51642 | "\n" |
51643 | "/// Copies the 8-bit integers from a 64-bit integer vector to the\n" |
51644 | "/// destination or clears 8-bit values in the destination, as specified by\n" |
51645 | "/// the second source operand.\n" |
51646 | "///\n" |
51647 | "/// \\headerfile <x86intrin.h>\n" |
51648 | "///\n" |
51649 | "/// This intrinsic corresponds to the \\c PSHUFB instruction.\n" |
51650 | "///\n" |
51651 | "/// \\param __a\n" |
51652 | "/// A 64-bit integer vector containing the values to be copied.\n" |
51653 | "/// \\param __b\n" |
51654 | "/// A 64-bit integer vector containing control bytes corresponding to\n" |
51655 | "/// positions in the destination:\n" |
51656 | "/// Bit 7: \\n\n" |
51657 | "/// 1: Clear the corresponding byte in the destination. \\n\n" |
51658 | "/// 0: Copy the selected source byte to the corresponding byte in the\n" |
51659 | "/// destination. \\n\n" |
51660 | "/// Bits [3:0] select the source byte to be copied.\n" |
51661 | "/// \\returns A 64-bit integer vector containing the copied or cleared values.\n" |
51662 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51663 | "_mm_shuffle_pi8(__m64 __a, __m64 __b)\n" |
51664 | "{\n" |
51665 | " return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);\n" |
51666 | "}\n" |
51667 | "\n" |
51668 | "/// For each 8-bit integer in the first source operand, perform one of\n" |
51669 | "/// the following actions as specified by the second source operand.\n" |
51670 | "///\n" |
51671 | "/// If the byte in the second source is negative, calculate the two's\n" |
51672 | "/// complement of the corresponding byte in the first source, and write that\n" |
51673 | "/// value to the destination. If the byte in the second source is positive,\n" |
51674 | "/// copy the corresponding byte from the first source to the destination. If\n" |
51675 | "/// the byte in the second source is zero, clear the corresponding byte in\n" |
51676 | "/// the destination.\n" |
51677 | "///\n" |
51678 | "/// \\headerfile <x86intrin.h>\n" |
51679 | "///\n" |
51680 | "/// This intrinsic corresponds to the \\c VPSIGNB instruction.\n" |
51681 | "///\n" |
51682 | "/// \\param __a\n" |
51683 | "/// A 128-bit integer vector containing the values to be copied.\n" |
51684 | "/// \\param __b\n" |
51685 | "/// A 128-bit integer vector containing control bytes corresponding to\n" |
51686 | "/// positions in the destination.\n" |
51687 | "/// \\returns A 128-bit integer vector containing the resultant values.\n" |
51688 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51689 | "_mm_sign_epi8(__m128i __a, __m128i __b)\n" |
51690 | "{\n" |
51691 | " return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);\n" |
51692 | "}\n" |
51693 | "\n" |
51694 | "/// For each 16-bit integer in the first source operand, perform one of\n" |
51695 | "/// the following actions as specified by the second source operand.\n" |
51696 | "///\n" |
51697 | "/// If the word in the second source is negative, calculate the two's\n" |
51698 | "/// complement of the corresponding word in the first source, and write that\n" |
51699 | "/// value to the destination. If the word in the second source is positive,\n" |
51700 | "/// copy the corresponding word from the first source to the destination. If\n" |
51701 | "/// the word in the second source is zero, clear the corresponding word in\n" |
51702 | "/// the destination.\n" |
51703 | "///\n" |
51704 | "/// \\headerfile <x86intrin.h>\n" |
51705 | "///\n" |
51706 | "/// This intrinsic corresponds to the \\c VPSIGNW instruction.\n" |
51707 | "///\n" |
51708 | "/// \\param __a\n" |
51709 | "/// A 128-bit integer vector containing the values to be copied.\n" |
51710 | "/// \\param __b\n" |
51711 | "/// A 128-bit integer vector containing control words corresponding to\n" |
51712 | "/// positions in the destination.\n" |
51713 | "/// \\returns A 128-bit integer vector containing the resultant values.\n" |
51714 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51715 | "_mm_sign_epi16(__m128i __a, __m128i __b)\n" |
51716 | "{\n" |
51717 | " return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);\n" |
51718 | "}\n" |
51719 | "\n" |
51720 | "/// For each 32-bit integer in the first source operand, perform one of\n" |
51721 | "/// the following actions as specified by the second source operand.\n" |
51722 | "///\n" |
51723 | "/// If the doubleword in the second source is negative, calculate the two's\n" |
51724 | "/// complement of the corresponding word in the first source, and write that\n" |
51725 | "/// value to the destination. If the doubleword in the second source is\n" |
51726 | "/// positive, copy the corresponding word from the first source to the\n" |
51727 | "/// destination. If the doubleword in the second source is zero, clear the\n" |
51728 | "/// corresponding word in the destination.\n" |
51729 | "///\n" |
51730 | "/// \\headerfile <x86intrin.h>\n" |
51731 | "///\n" |
51732 | "/// This intrinsic corresponds to the \\c VPSIGND instruction.\n" |
51733 | "///\n" |
51734 | "/// \\param __a\n" |
51735 | "/// A 128-bit integer vector containing the values to be copied.\n" |
51736 | "/// \\param __b\n" |
51737 | "/// A 128-bit integer vector containing control doublewords corresponding to\n" |
51738 | "/// positions in the destination.\n" |
51739 | "/// \\returns A 128-bit integer vector containing the resultant values.\n" |
51740 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
51741 | "_mm_sign_epi32(__m128i __a, __m128i __b)\n" |
51742 | "{\n" |
51743 | " return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);\n" |
51744 | "}\n" |
51745 | "\n" |
51746 | "/// For each 8-bit integer in the first source operand, perform one of\n" |
51747 | "/// the following actions as specified by the second source operand.\n" |
51748 | "///\n" |
51749 | "/// If the byte in the second source is negative, calculate the two's\n" |
51750 | "/// complement of the corresponding byte in the first source, and write that\n" |
51751 | "/// value to the destination. If the byte in the second source is positive,\n" |
51752 | "/// copy the corresponding byte from the first source to the destination. If\n" |
51753 | "/// the byte in the second source is zero, clear the corresponding byte in\n" |
51754 | "/// the destination.\n" |
51755 | "///\n" |
51756 | "/// \\headerfile <x86intrin.h>\n" |
51757 | "///\n" |
51758 | "/// This intrinsic corresponds to the \\c PSIGNB instruction.\n" |
51759 | "///\n" |
51760 | "/// \\param __a\n" |
51761 | "/// A 64-bit integer vector containing the values to be copied.\n" |
51762 | "/// \\param __b\n" |
51763 | "/// A 64-bit integer vector containing control bytes corresponding to\n" |
51764 | "/// positions in the destination.\n" |
51765 | "/// \\returns A 64-bit integer vector containing the resultant values.\n" |
51766 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51767 | "_mm_sign_pi8(__m64 __a, __m64 __b)\n" |
51768 | "{\n" |
51769 | " return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);\n" |
51770 | "}\n" |
51771 | "\n" |
51772 | "/// For each 16-bit integer in the first source operand, perform one of\n" |
51773 | "/// the following actions as specified by the second source operand.\n" |
51774 | "///\n" |
51775 | "/// If the word in the second source is negative, calculate the two's\n" |
51776 | "/// complement of the corresponding word in the first source, and write that\n" |
51777 | "/// value to the destination. If the word in the second source is positive,\n" |
51778 | "/// copy the corresponding word from the first source to the destination. If\n" |
51779 | "/// the word in the second source is zero, clear the corresponding word in\n" |
51780 | "/// the destination.\n" |
51781 | "///\n" |
51782 | "/// \\headerfile <x86intrin.h>\n" |
51783 | "///\n" |
51784 | "/// This intrinsic corresponds to the \\c PSIGNW instruction.\n" |
51785 | "///\n" |
51786 | "/// \\param __a\n" |
51787 | "/// A 64-bit integer vector containing the values to be copied.\n" |
51788 | "/// \\param __b\n" |
51789 | "/// A 64-bit integer vector containing control words corresponding to\n" |
51790 | "/// positions in the destination.\n" |
51791 | "/// \\returns A 64-bit integer vector containing the resultant values.\n" |
51792 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51793 | "_mm_sign_pi16(__m64 __a, __m64 __b)\n" |
51794 | "{\n" |
51795 | " return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);\n" |
51796 | "}\n" |
51797 | "\n" |
51798 | "/// For each 32-bit integer in the first source operand, perform one of\n" |
51799 | "/// the following actions as specified by the second source operand.\n" |
51800 | "///\n" |
51801 | "/// If the doubleword in the second source is negative, calculate the two's\n" |
51802 | "/// complement of the corresponding doubleword in the first source, and\n" |
51803 | "/// write that value to the destination. If the doubleword in the second\n" |
51804 | "/// source is positive, copy the corresponding doubleword from the first\n" |
51805 | "/// source to the destination. If the doubleword in the second source is\n" |
51806 | "/// zero, clear the corresponding doubleword in the destination.\n" |
51807 | "///\n" |
51808 | "/// \\headerfile <x86intrin.h>\n" |
51809 | "///\n" |
51810 | "/// This intrinsic corresponds to the \\c PSIGND instruction.\n" |
51811 | "///\n" |
51812 | "/// \\param __a\n" |
51813 | "/// A 64-bit integer vector containing the values to be copied.\n" |
51814 | "/// \\param __b\n" |
51815 | "/// A 64-bit integer vector containing two control doublewords corresponding\n" |
51816 | "/// to positions in the destination.\n" |
51817 | "/// \\returns A 64-bit integer vector containing the resultant values.\n" |
51818 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
51819 | "_mm_sign_pi32(__m64 __a, __m64 __b)\n" |
51820 | "{\n" |
51821 | " return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);\n" |
51822 | "}\n" |
51823 | "\n" |
51824 | "#undef __DEFAULT_FN_ATTRS\n" |
51825 | "#undef __DEFAULT_FN_ATTRS_MMX\n" |
51826 | "\n" |
51827 | "#endif /* __TMMINTRIN_H */\n" |
51828 | "" } , |
51829 | { "/builtins/unwind.h" , "/*===---- unwind.h - Stack unwinding ----------------------------------------===\n" |
51830 | " *\n" |
51831 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
51832 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
51833 | " * in the Software without restriction, including without limitation the rights\n" |
51834 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
51835 | " * copies of the Software, and to permit persons to whom the Software is\n" |
51836 | " * furnished to do so, subject to the following conditions:\n" |
51837 | " *\n" |
51838 | " * The above copyright notice and this permission notice shall be included in\n" |
51839 | " * all copies or substantial portions of the Software.\n" |
51840 | " *\n" |
51841 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
51842 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
51843 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
51844 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
51845 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
51846 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
51847 | " * THE SOFTWARE.\n" |
51848 | " *\n" |
51849 | " *===-----------------------------------------------------------------------===\n" |
51850 | " */\n" |
51851 | "\n" |
51852 | "/* See \"Data Definitions for libgcc_s\" in the Linux Standard Base.*/\n" |
51853 | "\n" |
51854 | "#ifndef __CLANG_UNWIND_H\n" |
51855 | "#define __CLANG_UNWIND_H\n" |
51856 | "\n" |
51857 | "#if defined(__APPLE__) && __has_include_next(<unwind.h>)\n" |
51858 | "/* Darwin (from 11.x on) provide an unwind.h. If that's available,\n" |
51859 | " * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,\n" |
51860 | " * so define that around the include.*/\n" |
51861 | "# ifndef _GNU_SOURCE\n" |
51862 | "# define _SHOULD_UNDEFINE_GNU_SOURCE\n" |
51863 | "# define _GNU_SOURCE\n" |
51864 | "# endif\n" |
51865 | "// libunwind's unwind.h reflects the current visibility. However, Mozilla\n" |
51866 | "// builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the\n" |
51867 | "// visibility to default and export its contents. gcc also allows users to\n" |
51868 | "// override its override by #defining HIDE_EXPORTS (but note, this only obeys\n" |
51869 | "// the user's -fvisibility setting; it doesn't hide any exports on its own). We\n" |
51870 | "// imitate gcc's header here:\n" |
51871 | "# ifdef HIDE_EXPORTS\n" |
51872 | "# include_next <unwind.h>\n" |
51873 | "# else\n" |
51874 | "# pragma GCC visibility push(default)\n" |
51875 | "# include_next <unwind.h>\n" |
51876 | "# pragma GCC visibility pop\n" |
51877 | "# endif\n" |
51878 | "# ifdef _SHOULD_UNDEFINE_GNU_SOURCE\n" |
51879 | "# undef _GNU_SOURCE\n" |
51880 | "# undef _SHOULD_UNDEFINE_GNU_SOURCE\n" |
51881 | "# endif\n" |
51882 | "#else\n" |
51883 | "\n" |
51884 | "#include <stdint.h>\n" |
51885 | "\n" |
51886 | "#ifdef __cplusplus\n" |
51887 | "extern \"C\" {\n" |
51888 | "#endif\n" |
51889 | "\n" |
51890 | "/* It is a bit strange for a header to play with the visibility of the\n" |
51891 | " symbols it declares, but this matches gcc's behavior and some programs\n" |
51892 | " depend on it */\n" |
51893 | "#ifndef HIDE_EXPORTS\n" |
51894 | "#pragma GCC visibility push(default)\n" |
51895 | "#endif\n" |
51896 | "\n" |
51897 | "typedef uintptr_t _Unwind_Word;\n" |
51898 | "typedef intptr_t _Unwind_Sword;\n" |
51899 | "typedef uintptr_t _Unwind_Ptr;\n" |
51900 | "typedef uintptr_t _Unwind_Internal_Ptr;\n" |
51901 | "typedef uint64_t _Unwind_Exception_Class;\n" |
51902 | "\n" |
51903 | "typedef intptr_t _sleb128_t;\n" |
51904 | "typedef uintptr_t _uleb128_t;\n" |
51905 | "\n" |
51906 | "struct _Unwind_Context;\n" |
51907 | "#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n" |
51908 | "struct _Unwind_Control_Block;\n" |
51909 | "typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */\n" |
51910 | "#else\n" |
51911 | "struct _Unwind_Exception;\n" |
51912 | "typedef struct _Unwind_Exception _Unwind_Exception;\n" |
51913 | "#endif\n" |
51914 | "typedef enum {\n" |
51915 | " _URC_NO_REASON = 0,\n" |
51916 | "#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n" |
51917 | " !defined(__ARM_DWARF_EH__)\n" |
51918 | " _URC_OK = 0, /* used by ARM EHABI */\n" |
51919 | "#endif\n" |
51920 | " _URC_FOREIGN_EXCEPTION_CAUGHT = 1,\n" |
51921 | "\n" |
51922 | " _URC_FATAL_PHASE2_ERROR = 2,\n" |
51923 | " _URC_FATAL_PHASE1_ERROR = 3,\n" |
51924 | " _URC_NORMAL_STOP = 4,\n" |
51925 | "\n" |
51926 | " _URC_END_OF_STACK = 5,\n" |
51927 | " _URC_HANDLER_FOUND = 6,\n" |
51928 | " _URC_INSTALL_CONTEXT = 7,\n" |
51929 | " _URC_CONTINUE_UNWIND = 8,\n" |
51930 | "#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n" |
51931 | " !defined(__ARM_DWARF_EH__)\n" |
51932 | " _URC_FAILURE = 9 /* used by ARM EHABI */\n" |
51933 | "#endif\n" |
51934 | "} _Unwind_Reason_Code;\n" |
51935 | "\n" |
51936 | "typedef enum {\n" |
51937 | " _UA_SEARCH_PHASE = 1,\n" |
51938 | " _UA_CLEANUP_PHASE = 2,\n" |
51939 | "\n" |
51940 | " _UA_HANDLER_FRAME = 4,\n" |
51941 | " _UA_FORCE_UNWIND = 8,\n" |
51942 | " _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */\n" |
51943 | "} _Unwind_Action;\n" |
51944 | "\n" |
51945 | "typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,\n" |
51946 | " _Unwind_Exception *);\n" |
51947 | "\n" |
51948 | "#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n" |
51949 | "typedef struct _Unwind_Control_Block _Unwind_Control_Block;\n" |
51950 | "typedef uint32_t _Unwind_EHT_Header;\n" |
51951 | "\n" |
51952 | "struct _Unwind_Control_Block {\n" |
51953 | " uint64_t exception_class;\n" |
51954 | " void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);\n" |
51955 | " /* unwinder cache (private fields for the unwinder's use) */\n" |
51956 | " struct {\n" |
51957 | " uint32_t reserved1; /* forced unwind stop function, 0 if not forced */\n" |
51958 | " uint32_t reserved2; /* personality routine */\n" |
51959 | " uint32_t reserved3; /* callsite */\n" |
51960 | " uint32_t reserved4; /* forced unwind stop argument */\n" |
51961 | " uint32_t reserved5;\n" |
51962 | " } unwinder_cache;\n" |
51963 | " /* propagation barrier cache (valid after phase 1) */\n" |
51964 | " struct {\n" |
51965 | " uint32_t sp;\n" |
51966 | " uint32_t bitpattern[5];\n" |
51967 | " } barrier_cache;\n" |
51968 | " /* cleanup cache (preserved over cleanup) */\n" |
51969 | " struct {\n" |
51970 | " uint32_t bitpattern[4];\n" |
51971 | " } cleanup_cache;\n" |
51972 | " /* personality cache (for personality's benefit) */\n" |
51973 | " struct {\n" |
51974 | " uint32_t fnstart; /* function start address */\n" |
51975 | " _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */\n" |
51976 | " uint32_t additional; /* additional data */\n" |
51977 | " uint32_t reserved1;\n" |
51978 | " } pr_cache;\n" |
51979 | " long long int : 0; /* force alignment of next item to 8-byte boundary */\n" |
51980 | "} __attribute__((__aligned__(8)));\n" |
51981 | "#else\n" |
51982 | "struct _Unwind_Exception {\n" |
51983 | " _Unwind_Exception_Class exception_class;\n" |
51984 | " _Unwind_Exception_Cleanup_Fn exception_cleanup;\n" |
51985 | "#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)\n" |
51986 | " _Unwind_Word private_[6];\n" |
51987 | "#else\n" |
51988 | " _Unwind_Word private_1;\n" |
51989 | " _Unwind_Word private_2;\n" |
51990 | "#endif\n" |
51991 | " /* The Itanium ABI requires that _Unwind_Exception objects are \"double-word\n" |
51992 | " * aligned\". GCC has interpreted this to mean \"use the maximum useful\n" |
51993 | " * alignment for the target\"; so do we. */\n" |
51994 | "} __attribute__((__aligned__));\n" |
51995 | "#endif\n" |
51996 | "\n" |
51997 | "typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,\n" |
51998 | " _Unwind_Exception_Class,\n" |
51999 | " _Unwind_Exception *,\n" |
52000 | " struct _Unwind_Context *,\n" |
52001 | " void *);\n" |
52002 | "\n" |
52003 | "typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,\n" |
52004 | " _Unwind_Exception_Class,\n" |
52005 | " _Unwind_Exception *,\n" |
52006 | " struct _Unwind_Context *);\n" |
52007 | "typedef _Unwind_Personality_Fn __personality_routine;\n" |
52008 | "\n" |
52009 | "typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,\n" |
52010 | " void *);\n" |
52011 | "\n" |
52012 | "#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n" |
52013 | "typedef enum {\n" |
52014 | " _UVRSC_CORE = 0, /* integer register */\n" |
52015 | " _UVRSC_VFP = 1, /* vfp */\n" |
52016 | " _UVRSC_WMMXD = 3, /* Intel WMMX data register */\n" |
52017 | " _UVRSC_WMMXC = 4 /* Intel WMMX control register */\n" |
52018 | "} _Unwind_VRS_RegClass;\n" |
52019 | "\n" |
52020 | "typedef enum {\n" |
52021 | " _UVRSD_UINT32 = 0,\n" |
52022 | " _UVRSD_VFPX = 1,\n" |
52023 | " _UVRSD_UINT64 = 3,\n" |
52024 | " _UVRSD_FLOAT = 4,\n" |
52025 | " _UVRSD_DOUBLE = 5\n" |
52026 | "} _Unwind_VRS_DataRepresentation;\n" |
52027 | "\n" |
52028 | "typedef enum {\n" |
52029 | " _UVRSR_OK = 0,\n" |
52030 | " _UVRSR_NOT_IMPLEMENTED = 1,\n" |
52031 | " _UVRSR_FAILED = 2\n" |
52032 | "} _Unwind_VRS_Result;\n" |
52033 | "\n" |
52034 | "typedef uint32_t _Unwind_State;\n" |
52035 | "#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)\n" |
52036 | "#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)\n" |
52037 | "#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)\n" |
52038 | "#define _US_ACTION_MASK ((_Unwind_State)3)\n" |
52039 | "#define _US_FORCE_UNWIND ((_Unwind_State)8)\n" |
52040 | "\n" |
52041 | "_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,\n" |
52042 | " _Unwind_VRS_RegClass __regclass,\n" |
52043 | " uint32_t __regno,\n" |
52044 | " _Unwind_VRS_DataRepresentation __representation,\n" |
52045 | " void *__valuep);\n" |
52046 | "\n" |
52047 | "_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context,\n" |
52048 | " _Unwind_VRS_RegClass __regclass,\n" |
52049 | " uint32_t __regno,\n" |
52050 | " _Unwind_VRS_DataRepresentation __representation,\n" |
52051 | " void *__valuep);\n" |
52052 | "\n" |
52053 | "static __inline__\n" |
52054 | "_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) {\n" |
52055 | " _Unwind_Word __value;\n" |
52056 | " _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n" |
52057 | " return __value;\n" |
52058 | "}\n" |
52059 | "\n" |
52060 | "static __inline__\n" |
52061 | "void _Unwind_SetGR(struct _Unwind_Context *__context, int __index,\n" |
52062 | " _Unwind_Word __value) {\n" |
52063 | " _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n" |
52064 | "}\n" |
52065 | "\n" |
52066 | "static __inline__\n" |
52067 | "_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) {\n" |
52068 | " _Unwind_Word __ip = _Unwind_GetGR(__context, 15);\n" |
52069 | " return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */\n" |
52070 | "}\n" |
52071 | "\n" |
52072 | "static __inline__\n" |
52073 | "void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) {\n" |
52074 | " _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1;\n" |
52075 | " _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit);\n" |
52076 | "}\n" |
52077 | "#else\n" |
52078 | "_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int);\n" |
52079 | "void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word);\n" |
52080 | "\n" |
52081 | "_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *);\n" |
52082 | "void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word);\n" |
52083 | "#endif\n" |
52084 | "\n" |
52085 | "\n" |
52086 | "_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *);\n" |
52087 | "\n" |
52088 | "_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);\n" |
52089 | "\n" |
52090 | "_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *);\n" |
52091 | "\n" |
52092 | "void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);\n" |
52093 | "\n" |
52094 | "_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);\n" |
52095 | "\n" |
52096 | "/* DWARF EH functions; currently not available on Darwin/ARM */\n" |
52097 | "#if !defined(__APPLE__) || !defined(__arm__)\n" |
52098 | "_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);\n" |
52099 | "_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,\n" |
52100 | " void *);\n" |
52101 | "void _Unwind_DeleteException(_Unwind_Exception *);\n" |
52102 | "void _Unwind_Resume(_Unwind_Exception *);\n" |
52103 | "_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);\n" |
52104 | "\n" |
52105 | "#endif\n" |
52106 | "\n" |
52107 | "_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);\n" |
52108 | "\n" |
52109 | "/* setjmp(3)/longjmp(3) stuff */\n" |
52110 | "typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;\n" |
52111 | "\n" |
52112 | "void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);\n" |
52113 | "void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);\n" |
52114 | "_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);\n" |
52115 | "_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,\n" |
52116 | " _Unwind_Stop_Fn, void *);\n" |
52117 | "void _Unwind_SjLj_Resume(_Unwind_Exception *);\n" |
52118 | "_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);\n" |
52119 | "\n" |
52120 | "void *_Unwind_FindEnclosingFunction(void *);\n" |
52121 | "\n" |
52122 | "#ifdef __APPLE__\n" |
52123 | "\n" |
52124 | "_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)\n" |
52125 | " __attribute__((__unavailable__));\n" |
52126 | "_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)\n" |
52127 | " __attribute__((__unavailable__));\n" |
52128 | "\n" |
52129 | "/* Darwin-specific functions */\n" |
52130 | "void __register_frame(const void *);\n" |
52131 | "void __deregister_frame(const void *);\n" |
52132 | "\n" |
52133 | "struct dwarf_eh_bases {\n" |
52134 | " uintptr_t tbase;\n" |
52135 | " uintptr_t dbase;\n" |
52136 | " uintptr_t func;\n" |
52137 | "};\n" |
52138 | "void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);\n" |
52139 | "\n" |
52140 | "void __register_frame_info_bases(const void *, void *, void *, void *)\n" |
52141 | " __attribute__((__unavailable__));\n" |
52142 | "void __register_frame_info(const void *, void *) __attribute__((__unavailable__));\n" |
52143 | "void __register_frame_info_table_bases(const void *, void*, void *, void *)\n" |
52144 | " __attribute__((__unavailable__));\n" |
52145 | "void __register_frame_info_table(const void *, void *)\n" |
52146 | " __attribute__((__unavailable__));\n" |
52147 | "void __register_frame_table(const void *) __attribute__((__unavailable__));\n" |
52148 | "void __deregister_frame_info(const void *) __attribute__((__unavailable__));\n" |
52149 | "void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__));\n" |
52150 | "\n" |
52151 | "#else\n" |
52152 | "\n" |
52153 | "_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *);\n" |
52154 | "_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *);\n" |
52155 | "\n" |
52156 | "#endif\n" |
52157 | "\n" |
52158 | "\n" |
52159 | "#ifndef HIDE_EXPORTS\n" |
52160 | "#pragma GCC visibility pop\n" |
52161 | "#endif\n" |
52162 | "\n" |
52163 | "#ifdef __cplusplus\n" |
52164 | "}\n" |
52165 | "#endif\n" |
52166 | "\n" |
52167 | "#endif\n" |
52168 | "\n" |
52169 | "#endif /* __CLANG_UNWIND_H */\n" |
52170 | "" } , |
52171 | { "/builtins/vadefs.h" , "/* ===-------- vadefs.h ---------------------------------------------------===\n" |
52172 | " *\n" |
52173 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
52174 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
52175 | " * in the Software without restriction, including without limitation the rights\n" |
52176 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
52177 | " * copies of the Software, and to permit persons to whom the Software is\n" |
52178 | " * furnished to do so, subject to the following conditions:\n" |
52179 | " *\n" |
52180 | " * The above copyright notice and this permission notice shall be included in\n" |
52181 | " * all copies or substantial portions of the Software.\n" |
52182 | " *\n" |
52183 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
52184 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52185 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
52186 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
52187 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
52188 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
52189 | " * THE SOFTWARE.\n" |
52190 | " *\n" |
52191 | " *===-----------------------------------------------------------------------===\n" |
52192 | " */\n" |
52193 | "\n" |
52194 | "/* Only include this if we are aiming for MSVC compatibility. */\n" |
52195 | "#ifndef _MSC_VER\n" |
52196 | "#include_next <vadefs.h>\n" |
52197 | "#else\n" |
52198 | "\n" |
52199 | "#ifndef __clang_vadefs_h\n" |
52200 | "#define __clang_vadefs_h\n" |
52201 | "\n" |
52202 | "#include_next <vadefs.h>\n" |
52203 | "\n" |
52204 | "/* Override macros from vadefs.h with definitions that work with Clang. */\n" |
52205 | "#ifdef _crt_va_start\n" |
52206 | "#undef _crt_va_start\n" |
52207 | "#define _crt_va_start(ap, param) __builtin_va_start(ap, param)\n" |
52208 | "#endif\n" |
52209 | "#ifdef _crt_va_end\n" |
52210 | "#undef _crt_va_end\n" |
52211 | "#define _crt_va_end(ap) __builtin_va_end(ap)\n" |
52212 | "#endif\n" |
52213 | "#ifdef _crt_va_arg\n" |
52214 | "#undef _crt_va_arg\n" |
52215 | "#define _crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n" |
52216 | "#endif\n" |
52217 | "\n" |
52218 | "/* VS 2015 switched to double underscore names, which is an improvement, but now\n" |
52219 | " * we have to intercept those names too.\n" |
52220 | " */\n" |
52221 | "#ifdef __crt_va_start\n" |
52222 | "#undef __crt_va_start\n" |
52223 | "#define __crt_va_start(ap, param) __builtin_va_start(ap, param)\n" |
52224 | "#endif\n" |
52225 | "#ifdef __crt_va_end\n" |
52226 | "#undef __crt_va_end\n" |
52227 | "#define __crt_va_end(ap) __builtin_va_end(ap)\n" |
52228 | "#endif\n" |
52229 | "#ifdef __crt_va_arg\n" |
52230 | "#undef __crt_va_arg\n" |
52231 | "#define __crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n" |
52232 | "#endif\n" |
52233 | "\n" |
52234 | "#endif\n" |
52235 | "#endif\n" |
52236 | "" } , |
52237 | { "/builtins/vaesintrin.h" , "/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===\n" |
52238 | " *\n" |
52239 | " *\n" |
52240 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
52241 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
52242 | " * in the Software without restriction, including without limitation the rights\n" |
52243 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
52244 | " * copies of the Software, and to permit persons to whom the Software is\n" |
52245 | " * furnished to do so, subject to the following conditions:\n" |
52246 | " *\n" |
52247 | " * The above copyright notice and this permission notice shall be included in\n" |
52248 | " * all copies or substantial portions of the Software.\n" |
52249 | " *\n" |
52250 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
52251 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52252 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
52253 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
52254 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
52255 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
52256 | " * THE SOFTWARE.\n" |
52257 | " *\n" |
52258 | " *===-----------------------------------------------------------------------===\n" |
52259 | " */\n" |
52260 | "#ifndef __IMMINTRIN_H\n" |
52261 | "#error \"Never use <vaesintrin.h> directly; include <immintrin.h> instead.\"\n" |
52262 | "#endif\n" |
52263 | "\n" |
52264 | "#ifndef __VAESINTRIN_H\n" |
52265 | "#define __VAESINTRIN_H\n" |
52266 | "\n" |
52267 | "/* Default attributes for YMM forms. */\n" |
52268 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"vaes\"), __min_vector_width__(256)))\n" |
52269 | "\n" |
52270 | "/* Default attributes for ZMM forms. */\n" |
52271 | "#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__(\"avx512f,vaes\"), __min_vector_width__(512)))\n" |
52272 | "\n" |
52273 | "\n" |
52274 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
52275 | " _mm256_aesenc_epi128(__m256i __A, __m256i __B)\n" |
52276 | "{\n" |
52277 | " return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,\n" |
52278 | " (__v4di) __B);\n" |
52279 | "}\n" |
52280 | "\n" |
52281 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n" |
52282 | " _mm512_aesenc_epi128(__m512i __A, __m512i __B)\n" |
52283 | "{\n" |
52284 | " return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,\n" |
52285 | " (__v8di) __B);\n" |
52286 | "}\n" |
52287 | "\n" |
52288 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
52289 | " _mm256_aesdec_epi128(__m256i __A, __m256i __B)\n" |
52290 | "{\n" |
52291 | " return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,\n" |
52292 | " (__v4di) __B);\n" |
52293 | "}\n" |
52294 | "\n" |
52295 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n" |
52296 | " _mm512_aesdec_epi128(__m512i __A, __m512i __B)\n" |
52297 | "{\n" |
52298 | " return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,\n" |
52299 | " (__v8di) __B);\n" |
52300 | "}\n" |
52301 | "\n" |
52302 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
52303 | " _mm256_aesenclast_epi128(__m256i __A, __m256i __B)\n" |
52304 | "{\n" |
52305 | " return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,\n" |
52306 | " (__v4di) __B);\n" |
52307 | "}\n" |
52308 | "\n" |
52309 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n" |
52310 | " _mm512_aesenclast_epi128(__m512i __A, __m512i __B)\n" |
52311 | "{\n" |
52312 | " return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,\n" |
52313 | " (__v8di) __B);\n" |
52314 | "}\n" |
52315 | "\n" |
52316 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
52317 | " _mm256_aesdeclast_epi128(__m256i __A, __m256i __B)\n" |
52318 | "{\n" |
52319 | " return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,\n" |
52320 | " (__v4di) __B);\n" |
52321 | "}\n" |
52322 | "\n" |
52323 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n" |
52324 | " _mm512_aesdeclast_epi128(__m512i __A, __m512i __B)\n" |
52325 | "{\n" |
52326 | " return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,\n" |
52327 | " (__v8di) __B);\n" |
52328 | "}\n" |
52329 | "\n" |
52330 | "\n" |
52331 | "#undef __DEFAULT_FN_ATTRS\n" |
52332 | "#undef __DEFAULT_FN_ATTRS_F\n" |
52333 | "\n" |
52334 | "#endif\n" |
52335 | "" } , |
52336 | { "/builtins/varargs.h" , "/*===---- varargs.h - Variable argument handling -------------------------------------===\n" |
52337 | "*\n" |
52338 | "* Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
52339 | "* of this software and associated documentation files (the \"Software\"), to deal\n" |
52340 | "* in the Software without restriction, including without limitation the rights\n" |
52341 | "* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
52342 | "* copies of the Software, and to permit persons to whom the Software is\n" |
52343 | "* furnished to do so, subject to the following conditions:\n" |
52344 | "*\n" |
52345 | "* The above copyright notice and this permission notice shall be included in\n" |
52346 | "* all copies or substantial portions of the Software.\n" |
52347 | "*\n" |
52348 | "* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
52349 | "* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52350 | "* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
52351 | "* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
52352 | "* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
52353 | "* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
52354 | "* THE SOFTWARE.\n" |
52355 | "*\n" |
52356 | "*===-----------------------------------------------------------------------===\n" |
52357 | "*/\n" |
52358 | "#ifndef __VARARGS_H\n" |
52359 | "#define __VARARGS_H\n" |
52360 | " #error \"Please use <stdarg.h> instead of <varargs.h>\"\n" |
52361 | "#endif\n" |
52362 | "" } , |
52363 | { "/builtins/vpclmulqdqintrin.h" , "/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===\n" |
52364 | " *\n" |
52365 | " *\n" |
52366 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
52367 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
52368 | " * in the Software without restriction, including without limitation the rights\n" |
52369 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
52370 | " * copies of the Software, and to permit persons to whom the Software is\n" |
52371 | " * furnished to do so, subject to the following conditions:\n" |
52372 | " *\n" |
52373 | " * The above copyright notice and this permission notice shall be included in\n" |
52374 | " * all copies or substantial portions of the Software.\n" |
52375 | " *\n" |
52376 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
52377 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52378 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
52379 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
52380 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
52381 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
52382 | " * THE SOFTWARE.\n" |
52383 | " *\n" |
52384 | " *===-----------------------------------------------------------------------===\n" |
52385 | " */\n" |
52386 | "#ifndef __IMMINTRIN_H\n" |
52387 | "#error \"Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead.\"\n" |
52388 | "#endif\n" |
52389 | "\n" |
52390 | "#ifndef __VPCLMULQDQINTRIN_H\n" |
52391 | "#define __VPCLMULQDQINTRIN_H\n" |
52392 | "\n" |
52393 | "#define _mm256_clmulepi64_epi128(A, B, I) \\\n" |
52394 | " (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \\\n" |
52395 | " (__v4di)(__m256i)(B), \\\n" |
52396 | " (char)(I))\n" |
52397 | "\n" |
52398 | "#define _mm512_clmulepi64_epi128(A, B, I) \\\n" |
52399 | " (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \\\n" |
52400 | " (__v8di)(__m512i)(B), \\\n" |
52401 | " (char)(I))\n" |
52402 | "\n" |
52403 | "#endif /* __VPCLMULQDQINTRIN_H */\n" |
52404 | "\n" |
52405 | "" } , |
52406 | { "/builtins/waitpkgintrin.h" , "/*===----------------------- waitpkgintrin.h - WAITPKG --------------------===\n" |
52407 | " *\n" |
52408 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
52409 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
52410 | " * in the Software without restriction, including without limitation the rights\n" |
52411 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
52412 | " * copies of the Software, and to permit persons to whom the Software is\n" |
52413 | " * furnished to do so, subject to the following conditions:\n" |
52414 | " *\n" |
52415 | " * The above copyright notice and this permission notice shall be included in\n" |
52416 | " * all copies or substantial portions of the Software.\n" |
52417 | " *\n" |
52418 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
52419 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52420 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
52421 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
52422 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
52423 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
52424 | " * THE SOFTWARE.\n" |
52425 | " *\n" |
52426 | " *===-----------------------------------------------------------------------===\n" |
52427 | " */\n" |
52428 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
52429 | "#error \"Never use <waitpkgintrin.h> directly; include <x86intrin.h> instead.\"\n" |
52430 | "#endif\n" |
52431 | "\n" |
52432 | "#ifndef __WAITPKGINTRIN_H\n" |
52433 | "#define __WAITPKGINTRIN_H\n" |
52434 | "\n" |
52435 | "/* Define the default attributes for the functions in this file. */\n" |
52436 | "#define __DEFAULT_FN_ATTRS \\\n" |
52437 | " __attribute__((__always_inline__, __nodebug__, __target__(\"waitpkg\")))\n" |
52438 | "\n" |
52439 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
52440 | "_umonitor (void * __address)\n" |
52441 | "{\n" |
52442 | " __builtin_ia32_umonitor (__address);\n" |
52443 | "}\n" |
52444 | "\n" |
52445 | "static __inline__ unsigned char __DEFAULT_FN_ATTRS\n" |
52446 | "_umwait (unsigned int __control, unsigned long long __counter)\n" |
52447 | "{\n" |
52448 | " return __builtin_ia32_umwait (__control,\n" |
52449 | " (unsigned int)(__counter >> 32), (unsigned int)__counter);\n" |
52450 | "}\n" |
52451 | "\n" |
52452 | "static __inline__ unsigned char __DEFAULT_FN_ATTRS\n" |
52453 | "_tpause (unsigned int __control, unsigned long long __counter)\n" |
52454 | "{\n" |
52455 | " return __builtin_ia32_tpause (__control,\n" |
52456 | " (unsigned int)(__counter >> 32), (unsigned int)__counter);\n" |
52457 | "}\n" |
52458 | "\n" |
52459 | "#undef __DEFAULT_FN_ATTRS\n" |
52460 | "\n" |
52461 | "#endif /* __WAITPKGINTRIN_H */\n" |
52462 | "" } , |
52463 | { "/builtins/wbnoinvdintrin.h" , "/*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------===\n" |
52464 | " *\n" |
52465 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
52466 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
52467 | " * in the Software without restriction, including without limitation the rights\n" |
52468 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
52469 | " * copies of the Software, and to permit persons to whom the Software is\n" |
52470 | " * furnished to do so, subject to the following conditions:\n" |
52471 | " *\n" |
52472 | " * The above copyright notice and this permission notice shall be included in\n" |
52473 | " * all copies or substantial portions of the Software.\n" |
52474 | " *\n" |
52475 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
52476 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52477 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
52478 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
52479 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
52480 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
52481 | " * THE SOFTWARE.\n" |
52482 | " *\n" |
52483 | " *===-----------------------------------------------------------------------===\n" |
52484 | " */\n" |
52485 | "\n" |
52486 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
52487 | "#error \"Never use <wbnoinvdintrin.h> directly; include <x86intrin.h> instead.\"\n" |
52488 | "#endif\n" |
52489 | "\n" |
52490 | "#ifndef __WBNOINVDINTRIN_H\n" |
52491 | "#define __WBNOINVDINTRIN_H\n" |
52492 | "\n" |
52493 | "static __inline__ void\n" |
52494 | " __attribute__((__always_inline__, __nodebug__, __target__(\"wbnoinvd\")))\n" |
52495 | "_wbnoinvd (void)\n" |
52496 | "{\n" |
52497 | " __builtin_ia32_wbnoinvd ();\n" |
52498 | "}\n" |
52499 | "\n" |
52500 | "#endif /* __WBNOINVDINTRIN_H */\n" |
52501 | "" } , |
52502 | { "/builtins/wmmintrin.h" , "/*===---- wmmintrin.h - AES intrinsics ------------------------------------===\n" |
52503 | " *\n" |
52504 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
52505 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
52506 | " * in the Software without restriction, including without limitation the rights\n" |
52507 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
52508 | " * copies of the Software, and to permit persons to whom the Software is\n" |
52509 | " * furnished to do so, subject to the following conditions:\n" |
52510 | " *\n" |
52511 | " * The above copyright notice and this permission notice shall be included in\n" |
52512 | " * all copies or substantial portions of the Software.\n" |
52513 | " *\n" |
52514 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
52515 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52516 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
52517 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
52518 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
52519 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
52520 | " * THE SOFTWARE.\n" |
52521 | " *\n" |
52522 | " *===-----------------------------------------------------------------------===\n" |
52523 | " */\n" |
52524 | "\n" |
52525 | "#ifndef __WMMINTRIN_H\n" |
52526 | "#define __WMMINTRIN_H\n" |
52527 | "\n" |
52528 | "#include <emmintrin.h>\n" |
52529 | "\n" |
52530 | "#include <__wmmintrin_aes.h>\n" |
52531 | "\n" |
52532 | "#include <__wmmintrin_pclmul.h>\n" |
52533 | "\n" |
52534 | "#endif /* __WMMINTRIN_H */\n" |
52535 | "" } , |
52536 | { "/builtins/x86intrin.h" , "/*===---- x86intrin.h - X86 intrinsics -------------------------------------===\n" |
52537 | " *\n" |
52538 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
52539 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
52540 | " * in the Software without restriction, including without limitation the rights\n" |
52541 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
52542 | " * copies of the Software, and to permit persons to whom the Software is\n" |
52543 | " * furnished to do so, subject to the following conditions:\n" |
52544 | " *\n" |
52545 | " * The above copyright notice and this permission notice shall be included in\n" |
52546 | " * all copies or substantial portions of the Software.\n" |
52547 | " *\n" |
52548 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
52549 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52550 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
52551 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
52552 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
52553 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
52554 | " * THE SOFTWARE.\n" |
52555 | " *\n" |
52556 | " *===-----------------------------------------------------------------------===\n" |
52557 | " */\n" |
52558 | "\n" |
52559 | "#ifndef __X86INTRIN_H\n" |
52560 | "#define __X86INTRIN_H\n" |
52561 | "\n" |
52562 | "#include <ia32intrin.h>\n" |
52563 | "\n" |
52564 | "#include <immintrin.h>\n" |
52565 | "\n" |
52566 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__3dNOW__)\n" |
52567 | "#include <mm3dnow.h>\n" |
52568 | "#endif\n" |
52569 | "\n" |
52570 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__)\n" |
52571 | "#include <prfchwintrin.h>\n" |
52572 | "#endif\n" |
52573 | "\n" |
52574 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE4A__)\n" |
52575 | "#include <ammintrin.h>\n" |
52576 | "#endif\n" |
52577 | "\n" |
52578 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA4__)\n" |
52579 | "#include <fma4intrin.h>\n" |
52580 | "#endif\n" |
52581 | "\n" |
52582 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XOP__)\n" |
52583 | "#include <xopintrin.h>\n" |
52584 | "#endif\n" |
52585 | "\n" |
52586 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__TBM__)\n" |
52587 | "#include <tbmintrin.h>\n" |
52588 | "#endif\n" |
52589 | "\n" |
52590 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__)\n" |
52591 | "#include <lwpintrin.h>\n" |
52592 | "#endif\n" |
52593 | "\n" |
52594 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__)\n" |
52595 | "#include <mwaitxintrin.h>\n" |
52596 | "#endif\n" |
52597 | "\n" |
52598 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)\n" |
52599 | "#include <clzerointrin.h>\n" |
52600 | "#endif\n" |
52601 | "\n" |
52602 | "\n" |
52603 | "#endif /* __X86INTRIN_H */\n" |
52604 | "" } , |
52605 | { "/builtins/xmmintrin.h" , "/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===\n" |
52606 | " *\n" |
52607 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
52608 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
52609 | " * in the Software without restriction, including without limitation the rights\n" |
52610 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
52611 | " * copies of the Software, and to permit persons to whom the Software is\n" |
52612 | " * furnished to do so, subject to the following conditions:\n" |
52613 | " *\n" |
52614 | " * The above copyright notice and this permission notice shall be included in\n" |
52615 | " * all copies or substantial portions of the Software.\n" |
52616 | " *\n" |
52617 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
52618 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
52619 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
52620 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
52621 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
52622 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
52623 | " * THE SOFTWARE.\n" |
52624 | " *\n" |
52625 | " *===-----------------------------------------------------------------------===\n" |
52626 | " */\n" |
52627 | "\n" |
52628 | "#ifndef __XMMINTRIN_H\n" |
52629 | "#define __XMMINTRIN_H\n" |
52630 | "\n" |
52631 | "#include <mmintrin.h>\n" |
52632 | "\n" |
52633 | "typedef int __v4si __attribute__((__vector_size__(16)));\n" |
52634 | "typedef float __v4sf __attribute__((__vector_size__(16)));\n" |
52635 | "typedef float __m128 __attribute__((__vector_size__(16)));\n" |
52636 | "\n" |
52637 | "/* Unsigned types */\n" |
52638 | "typedef unsigned int __v4su __attribute__((__vector_size__(16)));\n" |
52639 | "\n" |
52640 | "/* This header should only be included in a hosted environment as it depends on\n" |
52641 | " * a standard library to provide allocation routines. */\n" |
52642 | "#if __STDC_HOSTED__\n" |
52643 | "#include <mm_malloc.h>\n" |
52644 | "#endif\n" |
52645 | "\n" |
52646 | "/* Define the default attributes for the functions in this file. */\n" |
52647 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse\"), __min_vector_width__(128)))\n" |
52648 | "#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse\"), __min_vector_width__(64)))\n" |
52649 | "\n" |
52650 | "/// Adds the 32-bit float values in the low-order bits of the operands.\n" |
52651 | "///\n" |
52652 | "/// \\headerfile <x86intrin.h>\n" |
52653 | "///\n" |
52654 | "/// This intrinsic corresponds to the <c> VADDSS / ADDSS </c> instructions.\n" |
52655 | "///\n" |
52656 | "/// \\param __a\n" |
52657 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
52658 | "/// The lower 32 bits of this operand are used in the calculation.\n" |
52659 | "/// \\param __b\n" |
52660 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
52661 | "/// The lower 32 bits of this operand are used in the calculation.\n" |
52662 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum\n" |
52663 | "/// of the lower 32 bits of both operands. The upper 96 bits are copied from\n" |
52664 | "/// the upper 96 bits of the first source operand.\n" |
52665 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52666 | "_mm_add_ss(__m128 __a, __m128 __b)\n" |
52667 | "{\n" |
52668 | " __a[0] += __b[0];\n" |
52669 | " return __a;\n" |
52670 | "}\n" |
52671 | "\n" |
52672 | "/// Adds two 128-bit vectors of [4 x float], and returns the results of\n" |
52673 | "/// the addition.\n" |
52674 | "///\n" |
52675 | "/// \\headerfile <x86intrin.h>\n" |
52676 | "///\n" |
52677 | "/// This intrinsic corresponds to the <c> VADDPS / ADDPS </c> instructions.\n" |
52678 | "///\n" |
52679 | "/// \\param __a\n" |
52680 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
52681 | "/// \\param __b\n" |
52682 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
52683 | "/// \\returns A 128-bit vector of [4 x float] containing the sums of both\n" |
52684 | "/// operands.\n" |
52685 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52686 | "_mm_add_ps(__m128 __a, __m128 __b)\n" |
52687 | "{\n" |
52688 | " return (__m128)((__v4sf)__a + (__v4sf)__b);\n" |
52689 | "}\n" |
52690 | "\n" |
52691 | "/// Subtracts the 32-bit float value in the low-order bits of the second\n" |
52692 | "/// operand from the corresponding value in the first operand.\n" |
52693 | "///\n" |
52694 | "/// \\headerfile <x86intrin.h>\n" |
52695 | "///\n" |
52696 | "/// This intrinsic corresponds to the <c> VSUBSS / SUBSS </c> instructions.\n" |
52697 | "///\n" |
52698 | "/// \\param __a\n" |
52699 | "/// A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits\n" |
52700 | "/// of this operand are used in the calculation.\n" |
52701 | "/// \\param __b\n" |
52702 | "/// A 128-bit vector of [4 x float] containing the subtrahend. The lower 32\n" |
52703 | "/// bits of this operand are used in the calculation.\n" |
52704 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
52705 | "/// difference of the lower 32 bits of both operands. The upper 96 bits are\n" |
52706 | "/// copied from the upper 96 bits of the first source operand.\n" |
52707 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52708 | "_mm_sub_ss(__m128 __a, __m128 __b)\n" |
52709 | "{\n" |
52710 | " __a[0] -= __b[0];\n" |
52711 | " return __a;\n" |
52712 | "}\n" |
52713 | "\n" |
52714 | "/// Subtracts each of the values of the second operand from the first\n" |
52715 | "/// operand, both of which are 128-bit vectors of [4 x float] and returns\n" |
52716 | "/// the results of the subtraction.\n" |
52717 | "///\n" |
52718 | "/// \\headerfile <x86intrin.h>\n" |
52719 | "///\n" |
52720 | "/// This intrinsic corresponds to the <c> VSUBPS / SUBPS </c> instructions.\n" |
52721 | "///\n" |
52722 | "/// \\param __a\n" |
52723 | "/// A 128-bit vector of [4 x float] containing the minuend.\n" |
52724 | "/// \\param __b\n" |
52725 | "/// A 128-bit vector of [4 x float] containing the subtrahend.\n" |
52726 | "/// \\returns A 128-bit vector of [4 x float] containing the differences between\n" |
52727 | "/// both operands.\n" |
52728 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52729 | "_mm_sub_ps(__m128 __a, __m128 __b)\n" |
52730 | "{\n" |
52731 | " return (__m128)((__v4sf)__a - (__v4sf)__b);\n" |
52732 | "}\n" |
52733 | "\n" |
52734 | "/// Multiplies two 32-bit float values in the low-order bits of the\n" |
52735 | "/// operands.\n" |
52736 | "///\n" |
52737 | "/// \\headerfile <x86intrin.h>\n" |
52738 | "///\n" |
52739 | "/// This intrinsic corresponds to the <c> VMULSS / MULSS </c> instructions.\n" |
52740 | "///\n" |
52741 | "/// \\param __a\n" |
52742 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
52743 | "/// The lower 32 bits of this operand are used in the calculation.\n" |
52744 | "/// \\param __b\n" |
52745 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
52746 | "/// The lower 32 bits of this operand are used in the calculation.\n" |
52747 | "/// \\returns A 128-bit vector of [4 x float] containing the product of the lower\n" |
52748 | "/// 32 bits of both operands. The upper 96 bits are copied from the upper 96\n" |
52749 | "/// bits of the first source operand.\n" |
52750 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52751 | "_mm_mul_ss(__m128 __a, __m128 __b)\n" |
52752 | "{\n" |
52753 | " __a[0] *= __b[0];\n" |
52754 | " return __a;\n" |
52755 | "}\n" |
52756 | "\n" |
52757 | "/// Multiplies two 128-bit vectors of [4 x float] and returns the\n" |
52758 | "/// results of the multiplication.\n" |
52759 | "///\n" |
52760 | "/// \\headerfile <x86intrin.h>\n" |
52761 | "///\n" |
52762 | "/// This intrinsic corresponds to the <c> VMULPS / MULPS </c> instructions.\n" |
52763 | "///\n" |
52764 | "/// \\param __a\n" |
52765 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
52766 | "/// \\param __b\n" |
52767 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
52768 | "/// \\returns A 128-bit vector of [4 x float] containing the products of both\n" |
52769 | "/// operands.\n" |
52770 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52771 | "_mm_mul_ps(__m128 __a, __m128 __b)\n" |
52772 | "{\n" |
52773 | " return (__m128)((__v4sf)__a * (__v4sf)__b);\n" |
52774 | "}\n" |
52775 | "\n" |
52776 | "/// Divides the value in the low-order 32 bits of the first operand by\n" |
52777 | "/// the corresponding value in the second operand.\n" |
52778 | "///\n" |
52779 | "/// \\headerfile <x86intrin.h>\n" |
52780 | "///\n" |
52781 | "/// This intrinsic corresponds to the <c> VDIVSS / DIVSS </c> instructions.\n" |
52782 | "///\n" |
52783 | "/// \\param __a\n" |
52784 | "/// A 128-bit vector of [4 x float] containing the dividend. The lower 32\n" |
52785 | "/// bits of this operand are used in the calculation.\n" |
52786 | "/// \\param __b\n" |
52787 | "/// A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits\n" |
52788 | "/// of this operand are used in the calculation.\n" |
52789 | "/// \\returns A 128-bit vector of [4 x float] containing the quotients of the\n" |
52790 | "/// lower 32 bits of both operands. The upper 96 bits are copied from the\n" |
52791 | "/// upper 96 bits of the first source operand.\n" |
52792 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52793 | "_mm_div_ss(__m128 __a, __m128 __b)\n" |
52794 | "{\n" |
52795 | " __a[0] /= __b[0];\n" |
52796 | " return __a;\n" |
52797 | "}\n" |
52798 | "\n" |
52799 | "/// Divides two 128-bit vectors of [4 x float].\n" |
52800 | "///\n" |
52801 | "/// \\headerfile <x86intrin.h>\n" |
52802 | "///\n" |
52803 | "/// This intrinsic corresponds to the <c> VDIVPS / DIVPS </c> instructions.\n" |
52804 | "///\n" |
52805 | "/// \\param __a\n" |
52806 | "/// A 128-bit vector of [4 x float] containing the dividend.\n" |
52807 | "/// \\param __b\n" |
52808 | "/// A 128-bit vector of [4 x float] containing the divisor.\n" |
52809 | "/// \\returns A 128-bit vector of [4 x float] containing the quotients of both\n" |
52810 | "/// operands.\n" |
52811 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52812 | "_mm_div_ps(__m128 __a, __m128 __b)\n" |
52813 | "{\n" |
52814 | " return (__m128)((__v4sf)__a / (__v4sf)__b);\n" |
52815 | "}\n" |
52816 | "\n" |
52817 | "/// Calculates the square root of the value stored in the low-order bits\n" |
52818 | "/// of a 128-bit vector of [4 x float].\n" |
52819 | "///\n" |
52820 | "/// \\headerfile <x86intrin.h>\n" |
52821 | "///\n" |
52822 | "/// This intrinsic corresponds to the <c> VSQRTSS / SQRTSS </c> instructions.\n" |
52823 | "///\n" |
52824 | "/// \\param __a\n" |
52825 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
52826 | "/// used in the calculation.\n" |
52827 | "/// \\returns A 128-bit vector of [4 x float] containing the square root of the\n" |
52828 | "/// value in the low-order bits of the operand.\n" |
52829 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52830 | "_mm_sqrt_ss(__m128 __a)\n" |
52831 | "{\n" |
52832 | " return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);\n" |
52833 | "}\n" |
52834 | "\n" |
52835 | "/// Calculates the square roots of the values stored in a 128-bit vector\n" |
52836 | "/// of [4 x float].\n" |
52837 | "///\n" |
52838 | "/// \\headerfile <x86intrin.h>\n" |
52839 | "///\n" |
52840 | "/// This intrinsic corresponds to the <c> VSQRTPS / SQRTPS </c> instructions.\n" |
52841 | "///\n" |
52842 | "/// \\param __a\n" |
52843 | "/// A 128-bit vector of [4 x float].\n" |
52844 | "/// \\returns A 128-bit vector of [4 x float] containing the square roots of the\n" |
52845 | "/// values in the operand.\n" |
52846 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52847 | "_mm_sqrt_ps(__m128 __a)\n" |
52848 | "{\n" |
52849 | " return __builtin_ia32_sqrtps((__v4sf)__a);\n" |
52850 | "}\n" |
52851 | "\n" |
52852 | "/// Calculates the approximate reciprocal of the value stored in the\n" |
52853 | "/// low-order bits of a 128-bit vector of [4 x float].\n" |
52854 | "///\n" |
52855 | "/// \\headerfile <x86intrin.h>\n" |
52856 | "///\n" |
52857 | "/// This intrinsic corresponds to the <c> VRCPSS / RCPSS </c> instructions.\n" |
52858 | "///\n" |
52859 | "/// \\param __a\n" |
52860 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
52861 | "/// used in the calculation.\n" |
52862 | "/// \\returns A 128-bit vector of [4 x float] containing the approximate\n" |
52863 | "/// reciprocal of the value in the low-order bits of the operand.\n" |
52864 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52865 | "_mm_rcp_ss(__m128 __a)\n" |
52866 | "{\n" |
52867 | " return (__m128)__builtin_ia32_rcpss((__v4sf)__a);\n" |
52868 | "}\n" |
52869 | "\n" |
52870 | "/// Calculates the approximate reciprocals of the values stored in a\n" |
52871 | "/// 128-bit vector of [4 x float].\n" |
52872 | "///\n" |
52873 | "/// \\headerfile <x86intrin.h>\n" |
52874 | "///\n" |
52875 | "/// This intrinsic corresponds to the <c> VRCPPS / RCPPS </c> instructions.\n" |
52876 | "///\n" |
52877 | "/// \\param __a\n" |
52878 | "/// A 128-bit vector of [4 x float].\n" |
52879 | "/// \\returns A 128-bit vector of [4 x float] containing the approximate\n" |
52880 | "/// reciprocals of the values in the operand.\n" |
52881 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52882 | "_mm_rcp_ps(__m128 __a)\n" |
52883 | "{\n" |
52884 | " return (__m128)__builtin_ia32_rcpps((__v4sf)__a);\n" |
52885 | "}\n" |
52886 | "\n" |
52887 | "/// Calculates the approximate reciprocal of the square root of the value\n" |
52888 | "/// stored in the low-order bits of a 128-bit vector of [4 x float].\n" |
52889 | "///\n" |
52890 | "/// \\headerfile <x86intrin.h>\n" |
52891 | "///\n" |
52892 | "/// This intrinsic corresponds to the <c> VRSQRTSS / RSQRTSS </c> instructions.\n" |
52893 | "///\n" |
52894 | "/// \\param __a\n" |
52895 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
52896 | "/// used in the calculation.\n" |
52897 | "/// \\returns A 128-bit vector of [4 x float] containing the approximate\n" |
52898 | "/// reciprocal of the square root of the value in the low-order bits of the\n" |
52899 | "/// operand.\n" |
52900 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52901 | "_mm_rsqrt_ss(__m128 __a)\n" |
52902 | "{\n" |
52903 | " return __builtin_ia32_rsqrtss((__v4sf)__a);\n" |
52904 | "}\n" |
52905 | "\n" |
52906 | "/// Calculates the approximate reciprocals of the square roots of the\n" |
52907 | "/// values stored in a 128-bit vector of [4 x float].\n" |
52908 | "///\n" |
52909 | "/// \\headerfile <x86intrin.h>\n" |
52910 | "///\n" |
52911 | "/// This intrinsic corresponds to the <c> VRSQRTPS / RSQRTPS </c> instructions.\n" |
52912 | "///\n" |
52913 | "/// \\param __a\n" |
52914 | "/// A 128-bit vector of [4 x float].\n" |
52915 | "/// \\returns A 128-bit vector of [4 x float] containing the approximate\n" |
52916 | "/// reciprocals of the square roots of the values in the operand.\n" |
52917 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52918 | "_mm_rsqrt_ps(__m128 __a)\n" |
52919 | "{\n" |
52920 | " return __builtin_ia32_rsqrtps((__v4sf)__a);\n" |
52921 | "}\n" |
52922 | "\n" |
52923 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
52924 | "/// operands and returns the lesser value in the low-order bits of the\n" |
52925 | "/// vector of [4 x float].\n" |
52926 | "///\n" |
52927 | "/// \\headerfile <x86intrin.h>\n" |
52928 | "///\n" |
52929 | "/// This intrinsic corresponds to the <c> VMINSS / MINSS </c> instructions.\n" |
52930 | "///\n" |
52931 | "/// \\param __a\n" |
52932 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
52933 | "/// 32 bits of this operand are used in the comparison.\n" |
52934 | "/// \\param __b\n" |
52935 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
52936 | "/// 32 bits of this operand are used in the comparison.\n" |
52937 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
52938 | "/// minimum value between both operands. The upper 96 bits are copied from\n" |
52939 | "/// the upper 96 bits of the first source operand.\n" |
52940 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52941 | "_mm_min_ss(__m128 __a, __m128 __b)\n" |
52942 | "{\n" |
52943 | " return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);\n" |
52944 | "}\n" |
52945 | "\n" |
52946 | "/// Compares two 128-bit vectors of [4 x float] and returns the lesser\n" |
52947 | "/// of each pair of values.\n" |
52948 | "///\n" |
52949 | "/// \\headerfile <x86intrin.h>\n" |
52950 | "///\n" |
52951 | "/// This intrinsic corresponds to the <c> VMINPS / MINPS </c> instructions.\n" |
52952 | "///\n" |
52953 | "/// \\param __a\n" |
52954 | "/// A 128-bit vector of [4 x float] containing one of the operands.\n" |
52955 | "/// \\param __b\n" |
52956 | "/// A 128-bit vector of [4 x float] containing one of the operands.\n" |
52957 | "/// \\returns A 128-bit vector of [4 x float] containing the minimum values\n" |
52958 | "/// between both operands.\n" |
52959 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52960 | "_mm_min_ps(__m128 __a, __m128 __b)\n" |
52961 | "{\n" |
52962 | " return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);\n" |
52963 | "}\n" |
52964 | "\n" |
52965 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
52966 | "/// operands and returns the greater value in the low-order bits of a 128-bit\n" |
52967 | "/// vector of [4 x float].\n" |
52968 | "///\n" |
52969 | "/// \\headerfile <x86intrin.h>\n" |
52970 | "///\n" |
52971 | "/// This intrinsic corresponds to the <c> VMAXSS / MAXSS </c> instructions.\n" |
52972 | "///\n" |
52973 | "/// \\param __a\n" |
52974 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
52975 | "/// 32 bits of this operand are used in the comparison.\n" |
52976 | "/// \\param __b\n" |
52977 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
52978 | "/// 32 bits of this operand are used in the comparison.\n" |
52979 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
52980 | "/// maximum value between both operands. The upper 96 bits are copied from\n" |
52981 | "/// the upper 96 bits of the first source operand.\n" |
52982 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
52983 | "_mm_max_ss(__m128 __a, __m128 __b)\n" |
52984 | "{\n" |
52985 | " return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);\n" |
52986 | "}\n" |
52987 | "\n" |
52988 | "/// Compares two 128-bit vectors of [4 x float] and returns the greater\n" |
52989 | "/// of each pair of values.\n" |
52990 | "///\n" |
52991 | "/// \\headerfile <x86intrin.h>\n" |
52992 | "///\n" |
52993 | "/// This intrinsic corresponds to the <c> VMAXPS / MAXPS </c> instructions.\n" |
52994 | "///\n" |
52995 | "/// \\param __a\n" |
52996 | "/// A 128-bit vector of [4 x float] containing one of the operands.\n" |
52997 | "/// \\param __b\n" |
52998 | "/// A 128-bit vector of [4 x float] containing one of the operands.\n" |
52999 | "/// \\returns A 128-bit vector of [4 x float] containing the maximum values\n" |
53000 | "/// between both operands.\n" |
53001 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53002 | "_mm_max_ps(__m128 __a, __m128 __b)\n" |
53003 | "{\n" |
53004 | " return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);\n" |
53005 | "}\n" |
53006 | "\n" |
53007 | "/// Performs a bitwise AND of two 128-bit vectors of [4 x float].\n" |
53008 | "///\n" |
53009 | "/// \\headerfile <x86intrin.h>\n" |
53010 | "///\n" |
53011 | "/// This intrinsic corresponds to the <c> VANDPS / ANDPS </c> instructions.\n" |
53012 | "///\n" |
53013 | "/// \\param __a\n" |
53014 | "/// A 128-bit vector containing one of the source operands.\n" |
53015 | "/// \\param __b\n" |
53016 | "/// A 128-bit vector containing one of the source operands.\n" |
53017 | "/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n" |
53018 | "/// values between both operands.\n" |
53019 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53020 | "_mm_and_ps(__m128 __a, __m128 __b)\n" |
53021 | "{\n" |
53022 | " return (__m128)((__v4su)__a & (__v4su)__b);\n" |
53023 | "}\n" |
53024 | "\n" |
53025 | "/// Performs a bitwise AND of two 128-bit vectors of [4 x float], using\n" |
53026 | "/// the one's complement of the values contained in the first source\n" |
53027 | "/// operand.\n" |
53028 | "///\n" |
53029 | "/// \\headerfile <x86intrin.h>\n" |
53030 | "///\n" |
53031 | "/// This intrinsic corresponds to the <c> VANDNPS / ANDNPS </c> instructions.\n" |
53032 | "///\n" |
53033 | "/// \\param __a\n" |
53034 | "/// A 128-bit vector of [4 x float] containing the first source operand. The\n" |
53035 | "/// one's complement of this value is used in the bitwise AND.\n" |
53036 | "/// \\param __b\n" |
53037 | "/// A 128-bit vector of [4 x float] containing the second source operand.\n" |
53038 | "/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n" |
53039 | "/// one's complement of the first operand and the values in the second\n" |
53040 | "/// operand.\n" |
53041 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53042 | "_mm_andnot_ps(__m128 __a, __m128 __b)\n" |
53043 | "{\n" |
53044 | " return (__m128)(~(__v4su)__a & (__v4su)__b);\n" |
53045 | "}\n" |
53046 | "\n" |
53047 | "/// Performs a bitwise OR of two 128-bit vectors of [4 x float].\n" |
53048 | "///\n" |
53049 | "/// \\headerfile <x86intrin.h>\n" |
53050 | "///\n" |
53051 | "/// This intrinsic corresponds to the <c> VORPS / ORPS </c> instructions.\n" |
53052 | "///\n" |
53053 | "/// \\param __a\n" |
53054 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
53055 | "/// \\param __b\n" |
53056 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
53057 | "/// \\returns A 128-bit vector of [4 x float] containing the bitwise OR of the\n" |
53058 | "/// values between both operands.\n" |
53059 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53060 | "_mm_or_ps(__m128 __a, __m128 __b)\n" |
53061 | "{\n" |
53062 | " return (__m128)((__v4su)__a | (__v4su)__b);\n" |
53063 | "}\n" |
53064 | "\n" |
53065 | "/// Performs a bitwise exclusive OR of two 128-bit vectors of\n" |
53066 | "/// [4 x float].\n" |
53067 | "///\n" |
53068 | "/// \\headerfile <x86intrin.h>\n" |
53069 | "///\n" |
53070 | "/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instructions.\n" |
53071 | "///\n" |
53072 | "/// \\param __a\n" |
53073 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
53074 | "/// \\param __b\n" |
53075 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
53076 | "/// \\returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR\n" |
53077 | "/// of the values between both operands.\n" |
53078 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53079 | "_mm_xor_ps(__m128 __a, __m128 __b)\n" |
53080 | "{\n" |
53081 | " return (__m128)((__v4su)__a ^ (__v4su)__b);\n" |
53082 | "}\n" |
53083 | "\n" |
53084 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53085 | "/// operands for equality and returns the result of the comparison in the\n" |
53086 | "/// low-order bits of a vector [4 x float].\n" |
53087 | "///\n" |
53088 | "/// \\headerfile <x86intrin.h>\n" |
53089 | "///\n" |
53090 | "/// This intrinsic corresponds to the <c> VCMPEQSS / CMPEQSS </c> instructions.\n" |
53091 | "///\n" |
53092 | "/// \\param __a\n" |
53093 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53094 | "/// 32 bits of this operand are used in the comparison.\n" |
53095 | "/// \\param __b\n" |
53096 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53097 | "/// 32 bits of this operand are used in the comparison.\n" |
53098 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53099 | "/// in the low-order bits.\n" |
53100 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53101 | "_mm_cmpeq_ss(__m128 __a, __m128 __b)\n" |
53102 | "{\n" |
53103 | " return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);\n" |
53104 | "}\n" |
53105 | "\n" |
53106 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53107 | "/// 128-bit vectors of [4 x float] for equality.\n" |
53108 | "///\n" |
53109 | "/// \\headerfile <x86intrin.h>\n" |
53110 | "///\n" |
53111 | "/// This intrinsic corresponds to the <c> VCMPEQPS / CMPEQPS </c> instructions.\n" |
53112 | "///\n" |
53113 | "/// \\param __a\n" |
53114 | "/// A 128-bit vector of [4 x float].\n" |
53115 | "/// \\param __b\n" |
53116 | "/// A 128-bit vector of [4 x float].\n" |
53117 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53118 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53119 | "_mm_cmpeq_ps(__m128 __a, __m128 __b)\n" |
53120 | "{\n" |
53121 | " return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);\n" |
53122 | "}\n" |
53123 | "\n" |
53124 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53125 | "/// operands to determine if the value in the first operand is less than the\n" |
53126 | "/// corresponding value in the second operand and returns the result of the\n" |
53127 | "/// comparison in the low-order bits of a vector of [4 x float].\n" |
53128 | "///\n" |
53129 | "/// \\headerfile <x86intrin.h>\n" |
53130 | "///\n" |
53131 | "/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n" |
53132 | "///\n" |
53133 | "/// \\param __a\n" |
53134 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53135 | "/// 32 bits of this operand are used in the comparison.\n" |
53136 | "/// \\param __b\n" |
53137 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53138 | "/// 32 bits of this operand are used in the comparison.\n" |
53139 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53140 | "/// in the low-order bits.\n" |
53141 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53142 | "_mm_cmplt_ss(__m128 __a, __m128 __b)\n" |
53143 | "{\n" |
53144 | " return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);\n" |
53145 | "}\n" |
53146 | "\n" |
53147 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53148 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53149 | "/// operand are less than those in the second operand.\n" |
53150 | "///\n" |
53151 | "/// \\headerfile <x86intrin.h>\n" |
53152 | "///\n" |
53153 | "/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n" |
53154 | "///\n" |
53155 | "/// \\param __a\n" |
53156 | "/// A 128-bit vector of [4 x float].\n" |
53157 | "/// \\param __b\n" |
53158 | "/// A 128-bit vector of [4 x float].\n" |
53159 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53160 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53161 | "_mm_cmplt_ps(__m128 __a, __m128 __b)\n" |
53162 | "{\n" |
53163 | " return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);\n" |
53164 | "}\n" |
53165 | "\n" |
53166 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53167 | "/// operands to determine if the value in the first operand is less than or\n" |
53168 | "/// equal to the corresponding value in the second operand and returns the\n" |
53169 | "/// result of the comparison in the low-order bits of a vector of\n" |
53170 | "/// [4 x float].\n" |
53171 | "///\n" |
53172 | "/// \\headerfile <x86intrin.h>\n" |
53173 | "///\n" |
53174 | "/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n" |
53175 | "///\n" |
53176 | "/// \\param __a\n" |
53177 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53178 | "/// 32 bits of this operand are used in the comparison.\n" |
53179 | "/// \\param __b\n" |
53180 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53181 | "/// 32 bits of this operand are used in the comparison.\n" |
53182 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53183 | "/// in the low-order bits.\n" |
53184 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53185 | "_mm_cmple_ss(__m128 __a, __m128 __b)\n" |
53186 | "{\n" |
53187 | " return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);\n" |
53188 | "}\n" |
53189 | "\n" |
53190 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53191 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53192 | "/// operand are less than or equal to those in the second operand.\n" |
53193 | "///\n" |
53194 | "/// \\headerfile <x86intrin.h>\n" |
53195 | "///\n" |
53196 | "/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n" |
53197 | "///\n" |
53198 | "/// \\param __a\n" |
53199 | "/// A 128-bit vector of [4 x float].\n" |
53200 | "/// \\param __b\n" |
53201 | "/// A 128-bit vector of [4 x float].\n" |
53202 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53203 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53204 | "_mm_cmple_ps(__m128 __a, __m128 __b)\n" |
53205 | "{\n" |
53206 | " return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);\n" |
53207 | "}\n" |
53208 | "\n" |
53209 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53210 | "/// operands to determine if the value in the first operand is greater than\n" |
53211 | "/// the corresponding value in the second operand and returns the result of\n" |
53212 | "/// the comparison in the low-order bits of a vector of [4 x float].\n" |
53213 | "///\n" |
53214 | "/// \\headerfile <x86intrin.h>\n" |
53215 | "///\n" |
53216 | "/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n" |
53217 | "///\n" |
53218 | "/// \\param __a\n" |
53219 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53220 | "/// 32 bits of this operand are used in the comparison.\n" |
53221 | "/// \\param __b\n" |
53222 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53223 | "/// 32 bits of this operand are used in the comparison.\n" |
53224 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53225 | "/// in the low-order bits.\n" |
53226 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53227 | "_mm_cmpgt_ss(__m128 __a, __m128 __b)\n" |
53228 | "{\n" |
53229 | " return (__m128)__builtin_shufflevector((__v4sf)__a,\n" |
53230 | " (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),\n" |
53231 | " 4, 1, 2, 3);\n" |
53232 | "}\n" |
53233 | "\n" |
53234 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53235 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53236 | "/// operand are greater than those in the second operand.\n" |
53237 | "///\n" |
53238 | "/// \\headerfile <x86intrin.h>\n" |
53239 | "///\n" |
53240 | "/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n" |
53241 | "///\n" |
53242 | "/// \\param __a\n" |
53243 | "/// A 128-bit vector of [4 x float].\n" |
53244 | "/// \\param __b\n" |
53245 | "/// A 128-bit vector of [4 x float].\n" |
53246 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53247 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53248 | "_mm_cmpgt_ps(__m128 __a, __m128 __b)\n" |
53249 | "{\n" |
53250 | " return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);\n" |
53251 | "}\n" |
53252 | "\n" |
53253 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53254 | "/// operands to determine if the value in the first operand is greater than\n" |
53255 | "/// or equal to the corresponding value in the second operand and returns\n" |
53256 | "/// the result of the comparison in the low-order bits of a vector of\n" |
53257 | "/// [4 x float].\n" |
53258 | "///\n" |
53259 | "/// \\headerfile <x86intrin.h>\n" |
53260 | "///\n" |
53261 | "/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n" |
53262 | "///\n" |
53263 | "/// \\param __a\n" |
53264 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53265 | "/// 32 bits of this operand are used in the comparison.\n" |
53266 | "/// \\param __b\n" |
53267 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53268 | "/// 32 bits of this operand are used in the comparison.\n" |
53269 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53270 | "/// in the low-order bits.\n" |
53271 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53272 | "_mm_cmpge_ss(__m128 __a, __m128 __b)\n" |
53273 | "{\n" |
53274 | " return (__m128)__builtin_shufflevector((__v4sf)__a,\n" |
53275 | " (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),\n" |
53276 | " 4, 1, 2, 3);\n" |
53277 | "}\n" |
53278 | "\n" |
53279 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53280 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53281 | "/// operand are greater than or equal to those in the second operand.\n" |
53282 | "///\n" |
53283 | "/// \\headerfile <x86intrin.h>\n" |
53284 | "///\n" |
53285 | "/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n" |
53286 | "///\n" |
53287 | "/// \\param __a\n" |
53288 | "/// A 128-bit vector of [4 x float].\n" |
53289 | "/// \\param __b\n" |
53290 | "/// A 128-bit vector of [4 x float].\n" |
53291 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53292 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53293 | "_mm_cmpge_ps(__m128 __a, __m128 __b)\n" |
53294 | "{\n" |
53295 | " return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);\n" |
53296 | "}\n" |
53297 | "\n" |
53298 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53299 | "/// operands for inequality and returns the result of the comparison in the\n" |
53300 | "/// low-order bits of a vector of [4 x float].\n" |
53301 | "///\n" |
53302 | "/// \\headerfile <x86intrin.h>\n" |
53303 | "///\n" |
53304 | "/// This intrinsic corresponds to the <c> VCMPNEQSS / CMPNEQSS </c>\n" |
53305 | "/// instructions.\n" |
53306 | "///\n" |
53307 | "/// \\param __a\n" |
53308 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53309 | "/// 32 bits of this operand are used in the comparison.\n" |
53310 | "/// \\param __b\n" |
53311 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53312 | "/// 32 bits of this operand are used in the comparison.\n" |
53313 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53314 | "/// in the low-order bits.\n" |
53315 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53316 | "_mm_cmpneq_ss(__m128 __a, __m128 __b)\n" |
53317 | "{\n" |
53318 | " return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);\n" |
53319 | "}\n" |
53320 | "\n" |
53321 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53322 | "/// 128-bit vectors of [4 x float] for inequality.\n" |
53323 | "///\n" |
53324 | "/// \\headerfile <x86intrin.h>\n" |
53325 | "///\n" |
53326 | "/// This intrinsic corresponds to the <c> VCMPNEQPS / CMPNEQPS </c>\n" |
53327 | "/// instructions.\n" |
53328 | "///\n" |
53329 | "/// \\param __a\n" |
53330 | "/// A 128-bit vector of [4 x float].\n" |
53331 | "/// \\param __b\n" |
53332 | "/// A 128-bit vector of [4 x float].\n" |
53333 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53334 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53335 | "_mm_cmpneq_ps(__m128 __a, __m128 __b)\n" |
53336 | "{\n" |
53337 | " return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);\n" |
53338 | "}\n" |
53339 | "\n" |
53340 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53341 | "/// operands to determine if the value in the first operand is not less than\n" |
53342 | "/// the corresponding value in the second operand and returns the result of\n" |
53343 | "/// the comparison in the low-order bits of a vector of [4 x float].\n" |
53344 | "///\n" |
53345 | "/// \\headerfile <x86intrin.h>\n" |
53346 | "///\n" |
53347 | "/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n" |
53348 | "/// instructions.\n" |
53349 | "///\n" |
53350 | "/// \\param __a\n" |
53351 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53352 | "/// 32 bits of this operand are used in the comparison.\n" |
53353 | "/// \\param __b\n" |
53354 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53355 | "/// 32 bits of this operand are used in the comparison.\n" |
53356 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53357 | "/// in the low-order bits.\n" |
53358 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53359 | "_mm_cmpnlt_ss(__m128 __a, __m128 __b)\n" |
53360 | "{\n" |
53361 | " return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);\n" |
53362 | "}\n" |
53363 | "\n" |
53364 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53365 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53366 | "/// operand are not less than those in the second operand.\n" |
53367 | "///\n" |
53368 | "/// \\headerfile <x86intrin.h>\n" |
53369 | "///\n" |
53370 | "/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n" |
53371 | "/// instructions.\n" |
53372 | "///\n" |
53373 | "/// \\param __a\n" |
53374 | "/// A 128-bit vector of [4 x float].\n" |
53375 | "/// \\param __b\n" |
53376 | "/// A 128-bit vector of [4 x float].\n" |
53377 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53378 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53379 | "_mm_cmpnlt_ps(__m128 __a, __m128 __b)\n" |
53380 | "{\n" |
53381 | " return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);\n" |
53382 | "}\n" |
53383 | "\n" |
53384 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53385 | "/// operands to determine if the value in the first operand is not less than\n" |
53386 | "/// or equal to the corresponding value in the second operand and returns\n" |
53387 | "/// the result of the comparison in the low-order bits of a vector of\n" |
53388 | "/// [4 x float].\n" |
53389 | "///\n" |
53390 | "/// \\headerfile <x86intrin.h>\n" |
53391 | "///\n" |
53392 | "/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n" |
53393 | "/// instructions.\n" |
53394 | "///\n" |
53395 | "/// \\param __a\n" |
53396 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53397 | "/// 32 bits of this operand are used in the comparison.\n" |
53398 | "/// \\param __b\n" |
53399 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53400 | "/// 32 bits of this operand are used in the comparison.\n" |
53401 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53402 | "/// in the low-order bits.\n" |
53403 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53404 | "_mm_cmpnle_ss(__m128 __a, __m128 __b)\n" |
53405 | "{\n" |
53406 | " return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);\n" |
53407 | "}\n" |
53408 | "\n" |
53409 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53410 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53411 | "/// operand are not less than or equal to those in the second operand.\n" |
53412 | "///\n" |
53413 | "/// \\headerfile <x86intrin.h>\n" |
53414 | "///\n" |
53415 | "/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n" |
53416 | "/// instructions.\n" |
53417 | "///\n" |
53418 | "/// \\param __a\n" |
53419 | "/// A 128-bit vector of [4 x float].\n" |
53420 | "/// \\param __b\n" |
53421 | "/// A 128-bit vector of [4 x float].\n" |
53422 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53423 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53424 | "_mm_cmpnle_ps(__m128 __a, __m128 __b)\n" |
53425 | "{\n" |
53426 | " return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);\n" |
53427 | "}\n" |
53428 | "\n" |
53429 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53430 | "/// operands to determine if the value in the first operand is not greater\n" |
53431 | "/// than the corresponding value in the second operand and returns the\n" |
53432 | "/// result of the comparison in the low-order bits of a vector of\n" |
53433 | "/// [4 x float].\n" |
53434 | "///\n" |
53435 | "/// \\headerfile <x86intrin.h>\n" |
53436 | "///\n" |
53437 | "/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n" |
53438 | "/// instructions.\n" |
53439 | "///\n" |
53440 | "/// \\param __a\n" |
53441 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53442 | "/// 32 bits of this operand are used in the comparison.\n" |
53443 | "/// \\param __b\n" |
53444 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53445 | "/// 32 bits of this operand are used in the comparison.\n" |
53446 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53447 | "/// in the low-order bits.\n" |
53448 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53449 | "_mm_cmpngt_ss(__m128 __a, __m128 __b)\n" |
53450 | "{\n" |
53451 | " return (__m128)__builtin_shufflevector((__v4sf)__a,\n" |
53452 | " (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),\n" |
53453 | " 4, 1, 2, 3);\n" |
53454 | "}\n" |
53455 | "\n" |
53456 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53457 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53458 | "/// operand are not greater than those in the second operand.\n" |
53459 | "///\n" |
53460 | "/// \\headerfile <x86intrin.h>\n" |
53461 | "///\n" |
53462 | "/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n" |
53463 | "/// instructions.\n" |
53464 | "///\n" |
53465 | "/// \\param __a\n" |
53466 | "/// A 128-bit vector of [4 x float].\n" |
53467 | "/// \\param __b\n" |
53468 | "/// A 128-bit vector of [4 x float].\n" |
53469 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53470 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53471 | "_mm_cmpngt_ps(__m128 __a, __m128 __b)\n" |
53472 | "{\n" |
53473 | " return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);\n" |
53474 | "}\n" |
53475 | "\n" |
53476 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53477 | "/// operands to determine if the value in the first operand is not greater\n" |
53478 | "/// than or equal to the corresponding value in the second operand and\n" |
53479 | "/// returns the result of the comparison in the low-order bits of a vector\n" |
53480 | "/// of [4 x float].\n" |
53481 | "///\n" |
53482 | "/// \\headerfile <x86intrin.h>\n" |
53483 | "///\n" |
53484 | "/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n" |
53485 | "/// instructions.\n" |
53486 | "///\n" |
53487 | "/// \\param __a\n" |
53488 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53489 | "/// 32 bits of this operand are used in the comparison.\n" |
53490 | "/// \\param __b\n" |
53491 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53492 | "/// 32 bits of this operand are used in the comparison.\n" |
53493 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53494 | "/// in the low-order bits.\n" |
53495 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53496 | "_mm_cmpnge_ss(__m128 __a, __m128 __b)\n" |
53497 | "{\n" |
53498 | " return (__m128)__builtin_shufflevector((__v4sf)__a,\n" |
53499 | " (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),\n" |
53500 | " 4, 1, 2, 3);\n" |
53501 | "}\n" |
53502 | "\n" |
53503 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53504 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53505 | "/// operand are not greater than or equal to those in the second operand.\n" |
53506 | "///\n" |
53507 | "/// \\headerfile <x86intrin.h>\n" |
53508 | "///\n" |
53509 | "/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n" |
53510 | "/// instructions.\n" |
53511 | "///\n" |
53512 | "/// \\param __a\n" |
53513 | "/// A 128-bit vector of [4 x float].\n" |
53514 | "/// \\param __b\n" |
53515 | "/// A 128-bit vector of [4 x float].\n" |
53516 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53517 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53518 | "_mm_cmpnge_ps(__m128 __a, __m128 __b)\n" |
53519 | "{\n" |
53520 | " return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);\n" |
53521 | "}\n" |
53522 | "\n" |
53523 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53524 | "/// operands to determine if the value in the first operand is ordered with\n" |
53525 | "/// respect to the corresponding value in the second operand and returns the\n" |
53526 | "/// result of the comparison in the low-order bits of a vector of\n" |
53527 | "/// [4 x float].\n" |
53528 | "///\n" |
53529 | "/// \\headerfile <x86intrin.h>\n" |
53530 | "///\n" |
53531 | "/// This intrinsic corresponds to the <c> VCMPORDSS / CMPORDSS </c>\n" |
53532 | "/// instructions.\n" |
53533 | "///\n" |
53534 | "/// \\param __a\n" |
53535 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53536 | "/// 32 bits of this operand are used in the comparison.\n" |
53537 | "/// \\param __b\n" |
53538 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53539 | "/// 32 bits of this operand are used in the comparison.\n" |
53540 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53541 | "/// in the low-order bits.\n" |
53542 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53543 | "_mm_cmpord_ss(__m128 __a, __m128 __b)\n" |
53544 | "{\n" |
53545 | " return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);\n" |
53546 | "}\n" |
53547 | "\n" |
53548 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53549 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53550 | "/// operand are ordered with respect to those in the second operand.\n" |
53551 | "///\n" |
53552 | "/// \\headerfile <x86intrin.h>\n" |
53553 | "///\n" |
53554 | "/// This intrinsic corresponds to the <c> VCMPORDPS / CMPORDPS </c>\n" |
53555 | "/// instructions.\n" |
53556 | "///\n" |
53557 | "/// \\param __a\n" |
53558 | "/// A 128-bit vector of [4 x float].\n" |
53559 | "/// \\param __b\n" |
53560 | "/// A 128-bit vector of [4 x float].\n" |
53561 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53562 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53563 | "_mm_cmpord_ps(__m128 __a, __m128 __b)\n" |
53564 | "{\n" |
53565 | " return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);\n" |
53566 | "}\n" |
53567 | "\n" |
53568 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53569 | "/// operands to determine if the value in the first operand is unordered\n" |
53570 | "/// with respect to the corresponding value in the second operand and\n" |
53571 | "/// returns the result of the comparison in the low-order bits of a vector\n" |
53572 | "/// of [4 x float].\n" |
53573 | "///\n" |
53574 | "/// \\headerfile <x86intrin.h>\n" |
53575 | "///\n" |
53576 | "/// This intrinsic corresponds to the <c> VCMPUNORDSS / CMPUNORDSS </c>\n" |
53577 | "/// instructions.\n" |
53578 | "///\n" |
53579 | "/// \\param __a\n" |
53580 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53581 | "/// 32 bits of this operand are used in the comparison.\n" |
53582 | "/// \\param __b\n" |
53583 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
53584 | "/// 32 bits of this operand are used in the comparison.\n" |
53585 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
53586 | "/// in the low-order bits.\n" |
53587 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53588 | "_mm_cmpunord_ss(__m128 __a, __m128 __b)\n" |
53589 | "{\n" |
53590 | " return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);\n" |
53591 | "}\n" |
53592 | "\n" |
53593 | "/// Compares each of the corresponding 32-bit float values of the\n" |
53594 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
53595 | "/// operand are unordered with respect to those in the second operand.\n" |
53596 | "///\n" |
53597 | "/// \\headerfile <x86intrin.h>\n" |
53598 | "///\n" |
53599 | "/// This intrinsic corresponds to the <c> VCMPUNORDPS / CMPUNORDPS </c>\n" |
53600 | "/// instructions.\n" |
53601 | "///\n" |
53602 | "/// \\param __a\n" |
53603 | "/// A 128-bit vector of [4 x float].\n" |
53604 | "/// \\param __b\n" |
53605 | "/// A 128-bit vector of [4 x float].\n" |
53606 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
53607 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
53608 | "_mm_cmpunord_ps(__m128 __a, __m128 __b)\n" |
53609 | "{\n" |
53610 | " return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);\n" |
53611 | "}\n" |
53612 | "\n" |
53613 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53614 | "/// operands for equality and returns the result of the comparison.\n" |
53615 | "///\n" |
53616 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53617 | "///\n" |
53618 | "/// \\headerfile <x86intrin.h>\n" |
53619 | "///\n" |
53620 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n" |
53621 | "/// instructions.\n" |
53622 | "///\n" |
53623 | "/// \\param __a\n" |
53624 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53625 | "/// used in the comparison.\n" |
53626 | "/// \\param __b\n" |
53627 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53628 | "/// used in the comparison.\n" |
53629 | "/// \\returns An integer containing the comparison results. If either of the\n" |
53630 | "/// two lower 32-bit values is NaN, 0 is returned.\n" |
53631 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53632 | "_mm_comieq_ss(__m128 __a, __m128 __b)\n" |
53633 | "{\n" |
53634 | " return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);\n" |
53635 | "}\n" |
53636 | "\n" |
53637 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53638 | "/// operands to determine if the first operand is less than the second\n" |
53639 | "/// operand and returns the result of the comparison.\n" |
53640 | "///\n" |
53641 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53642 | "///\n" |
53643 | "/// \\headerfile <x86intrin.h>\n" |
53644 | "///\n" |
53645 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n" |
53646 | "/// instructions.\n" |
53647 | "///\n" |
53648 | "/// \\param __a\n" |
53649 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53650 | "/// used in the comparison.\n" |
53651 | "/// \\param __b\n" |
53652 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53653 | "/// used in the comparison.\n" |
53654 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
53655 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
53656 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53657 | "_mm_comilt_ss(__m128 __a, __m128 __b)\n" |
53658 | "{\n" |
53659 | " return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);\n" |
53660 | "}\n" |
53661 | "\n" |
53662 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53663 | "/// operands to determine if the first operand is less than or equal to the\n" |
53664 | "/// second operand and returns the result of the comparison.\n" |
53665 | "///\n" |
53666 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53667 | "///\n" |
53668 | "/// \\headerfile <x86intrin.h>\n" |
53669 | "///\n" |
53670 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n" |
53671 | "///\n" |
53672 | "/// \\param __a\n" |
53673 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53674 | "/// used in the comparison.\n" |
53675 | "/// \\param __b\n" |
53676 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53677 | "/// used in the comparison.\n" |
53678 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
53679 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
53680 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53681 | "_mm_comile_ss(__m128 __a, __m128 __b)\n" |
53682 | "{\n" |
53683 | " return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);\n" |
53684 | "}\n" |
53685 | "\n" |
53686 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53687 | "/// operands to determine if the first operand is greater than the second\n" |
53688 | "/// operand and returns the result of the comparison.\n" |
53689 | "///\n" |
53690 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53691 | "///\n" |
53692 | "/// \\headerfile <x86intrin.h>\n" |
53693 | "///\n" |
53694 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n" |
53695 | "///\n" |
53696 | "/// \\param __a\n" |
53697 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53698 | "/// used in the comparison.\n" |
53699 | "/// \\param __b\n" |
53700 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53701 | "/// used in the comparison.\n" |
53702 | "/// \\returns An integer containing the comparison results. If either of the\n" |
53703 | "/// two lower 32-bit values is NaN, 0 is returned.\n" |
53704 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53705 | "_mm_comigt_ss(__m128 __a, __m128 __b)\n" |
53706 | "{\n" |
53707 | " return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);\n" |
53708 | "}\n" |
53709 | "\n" |
53710 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53711 | "/// operands to determine if the first operand is greater than or equal to\n" |
53712 | "/// the second operand and returns the result of the comparison.\n" |
53713 | "///\n" |
53714 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53715 | "///\n" |
53716 | "/// \\headerfile <x86intrin.h>\n" |
53717 | "///\n" |
53718 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n" |
53719 | "///\n" |
53720 | "/// \\param __a\n" |
53721 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53722 | "/// used in the comparison.\n" |
53723 | "/// \\param __b\n" |
53724 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53725 | "/// used in the comparison.\n" |
53726 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
53727 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
53728 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53729 | "_mm_comige_ss(__m128 __a, __m128 __b)\n" |
53730 | "{\n" |
53731 | " return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);\n" |
53732 | "}\n" |
53733 | "\n" |
53734 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
53735 | "/// operands to determine if the first operand is not equal to the second\n" |
53736 | "/// operand and returns the result of the comparison.\n" |
53737 | "///\n" |
53738 | "/// If either of the two lower 32-bit values is NaN, 1 is returned.\n" |
53739 | "///\n" |
53740 | "/// \\headerfile <x86intrin.h>\n" |
53741 | "///\n" |
53742 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n" |
53743 | "///\n" |
53744 | "/// \\param __a\n" |
53745 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53746 | "/// used in the comparison.\n" |
53747 | "/// \\param __b\n" |
53748 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53749 | "/// used in the comparison.\n" |
53750 | "/// \\returns An integer containing the comparison results. If either of the\n" |
53751 | "/// two lower 32-bit values is NaN, 1 is returned.\n" |
53752 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53753 | "_mm_comineq_ss(__m128 __a, __m128 __b)\n" |
53754 | "{\n" |
53755 | " return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);\n" |
53756 | "}\n" |
53757 | "\n" |
53758 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
53759 | "/// the low-order bits of both operands to determine equality and returns\n" |
53760 | "/// the result of the comparison.\n" |
53761 | "///\n" |
53762 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53763 | "///\n" |
53764 | "/// \\headerfile <x86intrin.h>\n" |
53765 | "///\n" |
53766 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
53767 | "///\n" |
53768 | "/// \\param __a\n" |
53769 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53770 | "/// used in the comparison.\n" |
53771 | "/// \\param __b\n" |
53772 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53773 | "/// used in the comparison.\n" |
53774 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
53775 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
53776 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53777 | "_mm_ucomieq_ss(__m128 __a, __m128 __b)\n" |
53778 | "{\n" |
53779 | " return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);\n" |
53780 | "}\n" |
53781 | "\n" |
53782 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
53783 | "/// the low-order bits of both operands to determine if the first operand is\n" |
53784 | "/// less than the second operand and returns the result of the comparison.\n" |
53785 | "///\n" |
53786 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53787 | "///\n" |
53788 | "/// \\headerfile <x86intrin.h>\n" |
53789 | "///\n" |
53790 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
53791 | "///\n" |
53792 | "/// \\param __a\n" |
53793 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53794 | "/// used in the comparison.\n" |
53795 | "/// \\param __b\n" |
53796 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53797 | "/// used in the comparison.\n" |
53798 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
53799 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
53800 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53801 | "_mm_ucomilt_ss(__m128 __a, __m128 __b)\n" |
53802 | "{\n" |
53803 | " return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);\n" |
53804 | "}\n" |
53805 | "\n" |
53806 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
53807 | "/// the low-order bits of both operands to determine if the first operand is\n" |
53808 | "/// less than or equal to the second operand and returns the result of the\n" |
53809 | "/// comparison.\n" |
53810 | "///\n" |
53811 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53812 | "///\n" |
53813 | "/// \\headerfile <x86intrin.h>\n" |
53814 | "///\n" |
53815 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
53816 | "///\n" |
53817 | "/// \\param __a\n" |
53818 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53819 | "/// used in the comparison.\n" |
53820 | "/// \\param __b\n" |
53821 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53822 | "/// used in the comparison.\n" |
53823 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
53824 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
53825 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53826 | "_mm_ucomile_ss(__m128 __a, __m128 __b)\n" |
53827 | "{\n" |
53828 | " return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);\n" |
53829 | "}\n" |
53830 | "\n" |
53831 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
53832 | "/// the low-order bits of both operands to determine if the first operand is\n" |
53833 | "/// greater than the second operand and returns the result of the\n" |
53834 | "/// comparison.\n" |
53835 | "///\n" |
53836 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53837 | "///\n" |
53838 | "/// \\headerfile <x86intrin.h>\n" |
53839 | "///\n" |
53840 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
53841 | "///\n" |
53842 | "/// \\param __a\n" |
53843 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53844 | "/// used in the comparison.\n" |
53845 | "/// \\param __b\n" |
53846 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53847 | "/// used in the comparison.\n" |
53848 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
53849 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
53850 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53851 | "_mm_ucomigt_ss(__m128 __a, __m128 __b)\n" |
53852 | "{\n" |
53853 | " return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);\n" |
53854 | "}\n" |
53855 | "\n" |
53856 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
53857 | "/// the low-order bits of both operands to determine if the first operand is\n" |
53858 | "/// greater than or equal to the second operand and returns the result of\n" |
53859 | "/// the comparison.\n" |
53860 | "///\n" |
53861 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
53862 | "///\n" |
53863 | "/// \\headerfile <x86intrin.h>\n" |
53864 | "///\n" |
53865 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
53866 | "///\n" |
53867 | "/// \\param __a\n" |
53868 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53869 | "/// used in the comparison.\n" |
53870 | "/// \\param __b\n" |
53871 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53872 | "/// used in the comparison.\n" |
53873 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
53874 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
53875 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53876 | "_mm_ucomige_ss(__m128 __a, __m128 __b)\n" |
53877 | "{\n" |
53878 | " return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);\n" |
53879 | "}\n" |
53880 | "\n" |
53881 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
53882 | "/// the low-order bits of both operands to determine inequality and returns\n" |
53883 | "/// the result of the comparison.\n" |
53884 | "///\n" |
53885 | "/// If either of the two lower 32-bit values is NaN, 1 is returned.\n" |
53886 | "///\n" |
53887 | "/// \\headerfile <x86intrin.h>\n" |
53888 | "///\n" |
53889 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
53890 | "///\n" |
53891 | "/// \\param __a\n" |
53892 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53893 | "/// used in the comparison.\n" |
53894 | "/// \\param __b\n" |
53895 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53896 | "/// used in the comparison.\n" |
53897 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
53898 | "/// lower 32-bit values is NaN, 1 is returned.\n" |
53899 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53900 | "_mm_ucomineq_ss(__m128 __a, __m128 __b)\n" |
53901 | "{\n" |
53902 | " return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);\n" |
53903 | "}\n" |
53904 | "\n" |
53905 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
53906 | "/// [4 x float] into a 32-bit integer.\n" |
53907 | "///\n" |
53908 | "/// \\headerfile <x86intrin.h>\n" |
53909 | "///\n" |
53910 | "/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n" |
53911 | "/// instructions.\n" |
53912 | "///\n" |
53913 | "/// \\param __a\n" |
53914 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53915 | "/// used in the conversion.\n" |
53916 | "/// \\returns A 32-bit integer containing the converted value.\n" |
53917 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53918 | "_mm_cvtss_si32(__m128 __a)\n" |
53919 | "{\n" |
53920 | " return __builtin_ia32_cvtss2si((__v4sf)__a);\n" |
53921 | "}\n" |
53922 | "\n" |
53923 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
53924 | "/// [4 x float] into a 32-bit integer.\n" |
53925 | "///\n" |
53926 | "/// \\headerfile <x86intrin.h>\n" |
53927 | "///\n" |
53928 | "/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n" |
53929 | "/// instructions.\n" |
53930 | "///\n" |
53931 | "/// \\param __a\n" |
53932 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53933 | "/// used in the conversion.\n" |
53934 | "/// \\returns A 32-bit integer containing the converted value.\n" |
53935 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
53936 | "_mm_cvt_ss2si(__m128 __a)\n" |
53937 | "{\n" |
53938 | " return _mm_cvtss_si32(__a);\n" |
53939 | "}\n" |
53940 | "\n" |
53941 | "#ifdef __x86_64__\n" |
53942 | "\n" |
53943 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
53944 | "/// [4 x float] into a 64-bit integer.\n" |
53945 | "///\n" |
53946 | "/// \\headerfile <x86intrin.h>\n" |
53947 | "///\n" |
53948 | "/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n" |
53949 | "/// instructions.\n" |
53950 | "///\n" |
53951 | "/// \\param __a\n" |
53952 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
53953 | "/// used in the conversion.\n" |
53954 | "/// \\returns A 64-bit integer containing the converted value.\n" |
53955 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
53956 | "_mm_cvtss_si64(__m128 __a)\n" |
53957 | "{\n" |
53958 | " return __builtin_ia32_cvtss2si64((__v4sf)__a);\n" |
53959 | "}\n" |
53960 | "\n" |
53961 | "#endif\n" |
53962 | "\n" |
53963 | "/// Converts two low-order float values in a 128-bit vector of\n" |
53964 | "/// [4 x float] into a 64-bit vector of [2 x i32].\n" |
53965 | "///\n" |
53966 | "/// \\headerfile <x86intrin.h>\n" |
53967 | "///\n" |
53968 | "/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n" |
53969 | "///\n" |
53970 | "/// \\param __a\n" |
53971 | "/// A 128-bit vector of [4 x float].\n" |
53972 | "/// \\returns A 64-bit integer vector containing the converted values.\n" |
53973 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
53974 | "_mm_cvtps_pi32(__m128 __a)\n" |
53975 | "{\n" |
53976 | " return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);\n" |
53977 | "}\n" |
53978 | "\n" |
53979 | "/// Converts two low-order float values in a 128-bit vector of\n" |
53980 | "/// [4 x float] into a 64-bit vector of [2 x i32].\n" |
53981 | "///\n" |
53982 | "/// \\headerfile <x86intrin.h>\n" |
53983 | "///\n" |
53984 | "/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n" |
53985 | "///\n" |
53986 | "/// \\param __a\n" |
53987 | "/// A 128-bit vector of [4 x float].\n" |
53988 | "/// \\returns A 64-bit integer vector containing the converted values.\n" |
53989 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
53990 | "_mm_cvt_ps2pi(__m128 __a)\n" |
53991 | "{\n" |
53992 | " return _mm_cvtps_pi32(__a);\n" |
53993 | "}\n" |
53994 | "\n" |
53995 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
53996 | "/// [4 x float] into a 32-bit integer, truncating the result when it is\n" |
53997 | "/// inexact.\n" |
53998 | "///\n" |
53999 | "/// \\headerfile <x86intrin.h>\n" |
54000 | "///\n" |
54001 | "/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n" |
54002 | "/// instructions.\n" |
54003 | "///\n" |
54004 | "/// \\param __a\n" |
54005 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
54006 | "/// used in the conversion.\n" |
54007 | "/// \\returns A 32-bit integer containing the converted value.\n" |
54008 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
54009 | "_mm_cvttss_si32(__m128 __a)\n" |
54010 | "{\n" |
54011 | " return __builtin_ia32_cvttss2si((__v4sf)__a);\n" |
54012 | "}\n" |
54013 | "\n" |
54014 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
54015 | "/// [4 x float] into a 32-bit integer, truncating the result when it is\n" |
54016 | "/// inexact.\n" |
54017 | "///\n" |
54018 | "/// \\headerfile <x86intrin.h>\n" |
54019 | "///\n" |
54020 | "/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n" |
54021 | "/// instructions.\n" |
54022 | "///\n" |
54023 | "/// \\param __a\n" |
54024 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
54025 | "/// used in the conversion.\n" |
54026 | "/// \\returns A 32-bit integer containing the converted value.\n" |
54027 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
54028 | "_mm_cvtt_ss2si(__m128 __a)\n" |
54029 | "{\n" |
54030 | " return _mm_cvttss_si32(__a);\n" |
54031 | "}\n" |
54032 | "\n" |
54033 | "#ifdef __x86_64__\n" |
54034 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
54035 | "/// [4 x float] into a 64-bit integer, truncating the result when it is\n" |
54036 | "/// inexact.\n" |
54037 | "///\n" |
54038 | "/// \\headerfile <x86intrin.h>\n" |
54039 | "///\n" |
54040 | "/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n" |
54041 | "/// instructions.\n" |
54042 | "///\n" |
54043 | "/// \\param __a\n" |
54044 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
54045 | "/// used in the conversion.\n" |
54046 | "/// \\returns A 64-bit integer containing the converted value.\n" |
54047 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
54048 | "_mm_cvttss_si64(__m128 __a)\n" |
54049 | "{\n" |
54050 | " return __builtin_ia32_cvttss2si64((__v4sf)__a);\n" |
54051 | "}\n" |
54052 | "#endif\n" |
54053 | "\n" |
54054 | "/// Converts two low-order float values in a 128-bit vector of\n" |
54055 | "/// [4 x float] into a 64-bit vector of [2 x i32], truncating the result\n" |
54056 | "/// when it is inexact.\n" |
54057 | "///\n" |
54058 | "/// \\headerfile <x86intrin.h>\n" |
54059 | "///\n" |
54060 | "/// This intrinsic corresponds to the <c> CVTTPS2PI / VTTPS2PI </c>\n" |
54061 | "/// instructions.\n" |
54062 | "///\n" |
54063 | "/// \\param __a\n" |
54064 | "/// A 128-bit vector of [4 x float].\n" |
54065 | "/// \\returns A 64-bit integer vector containing the converted values.\n" |
54066 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
54067 | "_mm_cvttps_pi32(__m128 __a)\n" |
54068 | "{\n" |
54069 | " return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);\n" |
54070 | "}\n" |
54071 | "\n" |
54072 | "/// Converts two low-order float values in a 128-bit vector of [4 x\n" |
54073 | "/// float] into a 64-bit vector of [2 x i32], truncating the result when it\n" |
54074 | "/// is inexact.\n" |
54075 | "///\n" |
54076 | "/// \\headerfile <x86intrin.h>\n" |
54077 | "///\n" |
54078 | "/// This intrinsic corresponds to the <c> CVTTPS2PI </c> instruction.\n" |
54079 | "///\n" |
54080 | "/// \\param __a\n" |
54081 | "/// A 128-bit vector of [4 x float].\n" |
54082 | "/// \\returns A 64-bit integer vector containing the converted values.\n" |
54083 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
54084 | "_mm_cvtt_ps2pi(__m128 __a)\n" |
54085 | "{\n" |
54086 | " return _mm_cvttps_pi32(__a);\n" |
54087 | "}\n" |
54088 | "\n" |
54089 | "/// Converts a 32-bit signed integer value into a floating point value\n" |
54090 | "/// and writes it to the lower 32 bits of the destination. The remaining\n" |
54091 | "/// higher order elements of the destination vector are copied from the\n" |
54092 | "/// corresponding elements in the first operand.\n" |
54093 | "///\n" |
54094 | "/// \\headerfile <x86intrin.h>\n" |
54095 | "///\n" |
54096 | "/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n" |
54097 | "///\n" |
54098 | "/// \\param __a\n" |
54099 | "/// A 128-bit vector of [4 x float].\n" |
54100 | "/// \\param __b\n" |
54101 | "/// A 32-bit signed integer operand containing the value to be converted.\n" |
54102 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
54103 | "/// converted value of the second operand. The upper 96 bits are copied from\n" |
54104 | "/// the upper 96 bits of the first operand.\n" |
54105 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54106 | "_mm_cvtsi32_ss(__m128 __a, int __b)\n" |
54107 | "{\n" |
54108 | " __a[0] = __b;\n" |
54109 | " return __a;\n" |
54110 | "}\n" |
54111 | "\n" |
54112 | "/// Converts a 32-bit signed integer value into a floating point value\n" |
54113 | "/// and writes it to the lower 32 bits of the destination. The remaining\n" |
54114 | "/// higher order elements of the destination are copied from the\n" |
54115 | "/// corresponding elements in the first operand.\n" |
54116 | "///\n" |
54117 | "/// \\headerfile <x86intrin.h>\n" |
54118 | "///\n" |
54119 | "/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n" |
54120 | "///\n" |
54121 | "/// \\param __a\n" |
54122 | "/// A 128-bit vector of [4 x float].\n" |
54123 | "/// \\param __b\n" |
54124 | "/// A 32-bit signed integer operand containing the value to be converted.\n" |
54125 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
54126 | "/// converted value of the second operand. The upper 96 bits are copied from\n" |
54127 | "/// the upper 96 bits of the first operand.\n" |
54128 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54129 | "_mm_cvt_si2ss(__m128 __a, int __b)\n" |
54130 | "{\n" |
54131 | " return _mm_cvtsi32_ss(__a, __b);\n" |
54132 | "}\n" |
54133 | "\n" |
54134 | "#ifdef __x86_64__\n" |
54135 | "\n" |
54136 | "/// Converts a 64-bit signed integer value into a floating point value\n" |
54137 | "/// and writes it to the lower 32 bits of the destination. The remaining\n" |
54138 | "/// higher order elements of the destination are copied from the\n" |
54139 | "/// corresponding elements in the first operand.\n" |
54140 | "///\n" |
54141 | "/// \\headerfile <x86intrin.h>\n" |
54142 | "///\n" |
54143 | "/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n" |
54144 | "///\n" |
54145 | "/// \\param __a\n" |
54146 | "/// A 128-bit vector of [4 x float].\n" |
54147 | "/// \\param __b\n" |
54148 | "/// A 64-bit signed integer operand containing the value to be converted.\n" |
54149 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
54150 | "/// converted value of the second operand. The upper 96 bits are copied from\n" |
54151 | "/// the upper 96 bits of the first operand.\n" |
54152 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54153 | "_mm_cvtsi64_ss(__m128 __a, long long __b)\n" |
54154 | "{\n" |
54155 | " __a[0] = __b;\n" |
54156 | " return __a;\n" |
54157 | "}\n" |
54158 | "\n" |
54159 | "#endif\n" |
54160 | "\n" |
54161 | "/// Converts two elements of a 64-bit vector of [2 x i32] into two\n" |
54162 | "/// floating point values and writes them to the lower 64-bits of the\n" |
54163 | "/// destination. The remaining higher order elements of the destination are\n" |
54164 | "/// copied from the corresponding elements in the first operand.\n" |
54165 | "///\n" |
54166 | "/// \\headerfile <x86intrin.h>\n" |
54167 | "///\n" |
54168 | "/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n" |
54169 | "///\n" |
54170 | "/// \\param __a\n" |
54171 | "/// A 128-bit vector of [4 x float].\n" |
54172 | "/// \\param __b\n" |
54173 | "/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n" |
54174 | "/// and written to the corresponding low-order elements in the destination.\n" |
54175 | "/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n" |
54176 | "/// converted value of the second operand. The upper 64 bits are copied from\n" |
54177 | "/// the upper 64 bits of the first operand.\n" |
54178 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
54179 | "_mm_cvtpi32_ps(__m128 __a, __m64 __b)\n" |
54180 | "{\n" |
54181 | " return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);\n" |
54182 | "}\n" |
54183 | "\n" |
54184 | "/// Converts two elements of a 64-bit vector of [2 x i32] into two\n" |
54185 | "/// floating point values and writes them to the lower 64-bits of the\n" |
54186 | "/// destination. The remaining higher order elements of the destination are\n" |
54187 | "/// copied from the corresponding elements in the first operand.\n" |
54188 | "///\n" |
54189 | "/// \\headerfile <x86intrin.h>\n" |
54190 | "///\n" |
54191 | "/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n" |
54192 | "///\n" |
54193 | "/// \\param __a\n" |
54194 | "/// A 128-bit vector of [4 x float].\n" |
54195 | "/// \\param __b\n" |
54196 | "/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n" |
54197 | "/// and written to the corresponding low-order elements in the destination.\n" |
54198 | "/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n" |
54199 | "/// converted value from the second operand. The upper 64 bits are copied\n" |
54200 | "/// from the upper 64 bits of the first operand.\n" |
54201 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
54202 | "_mm_cvt_pi2ps(__m128 __a, __m64 __b)\n" |
54203 | "{\n" |
54204 | " return _mm_cvtpi32_ps(__a, __b);\n" |
54205 | "}\n" |
54206 | "\n" |
54207 | "/// Extracts a float value contained in the lower 32 bits of a vector of\n" |
54208 | "/// [4 x float].\n" |
54209 | "///\n" |
54210 | "/// \\headerfile <x86intrin.h>\n" |
54211 | "///\n" |
54212 | "/// This intrinsic has no corresponding instruction.\n" |
54213 | "///\n" |
54214 | "/// \\param __a\n" |
54215 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
54216 | "/// used in the extraction.\n" |
54217 | "/// \\returns A 32-bit float containing the extracted value.\n" |
54218 | "static __inline__ float __DEFAULT_FN_ATTRS\n" |
54219 | "_mm_cvtss_f32(__m128 __a)\n" |
54220 | "{\n" |
54221 | " return __a[0];\n" |
54222 | "}\n" |
54223 | "\n" |
54224 | "/// Loads two packed float values from the address \\a __p into the\n" |
54225 | "/// high-order bits of a 128-bit vector of [4 x float]. The low-order bits\n" |
54226 | "/// are copied from the low-order bits of the first operand.\n" |
54227 | "///\n" |
54228 | "/// \\headerfile <x86intrin.h>\n" |
54229 | "///\n" |
54230 | "/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n" |
54231 | "///\n" |
54232 | "/// \\param __a\n" |
54233 | "/// A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0]\n" |
54234 | "/// of the destination.\n" |
54235 | "/// \\param __p\n" |
54236 | "/// A pointer to two packed float values. Bits [63:0] are written to bits\n" |
54237 | "/// [127:64] of the destination.\n" |
54238 | "/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n" |
54239 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54240 | "_mm_loadh_pi(__m128 __a, const __m64 *__p)\n" |
54241 | "{\n" |
54242 | " typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));\n" |
54243 | " struct __mm_loadh_pi_struct {\n" |
54244 | " __mm_loadh_pi_v2f32 __u;\n" |
54245 | " } __attribute__((__packed__, __may_alias__));\n" |
54246 | " __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;\n" |
54247 | " __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n" |
54248 | " return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);\n" |
54249 | "}\n" |
54250 | "\n" |
54251 | "/// Loads two packed float values from the address \\a __p into the\n" |
54252 | "/// low-order bits of a 128-bit vector of [4 x float]. The high-order bits\n" |
54253 | "/// are copied from the high-order bits of the first operand.\n" |
54254 | "///\n" |
54255 | "/// \\headerfile <x86intrin.h>\n" |
54256 | "///\n" |
54257 | "/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n" |
54258 | "///\n" |
54259 | "/// \\param __a\n" |
54260 | "/// A 128-bit vector of [4 x float]. Bits [127:64] are written to bits\n" |
54261 | "/// [127:64] of the destination.\n" |
54262 | "/// \\param __p\n" |
54263 | "/// A pointer to two packed float values. Bits [63:0] are written to bits\n" |
54264 | "/// [63:0] of the destination.\n" |
54265 | "/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n" |
54266 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54267 | "_mm_loadl_pi(__m128 __a, const __m64 *__p)\n" |
54268 | "{\n" |
54269 | " typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));\n" |
54270 | " struct __mm_loadl_pi_struct {\n" |
54271 | " __mm_loadl_pi_v2f32 __u;\n" |
54272 | " } __attribute__((__packed__, __may_alias__));\n" |
54273 | " __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;\n" |
54274 | " __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n" |
54275 | " return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);\n" |
54276 | "}\n" |
54277 | "\n" |
54278 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
54279 | "/// 32 bits of the vector are initialized with the single-precision\n" |
54280 | "/// floating-point value loaded from a specified memory location. The upper\n" |
54281 | "/// 96 bits are set to zero.\n" |
54282 | "///\n" |
54283 | "/// \\headerfile <x86intrin.h>\n" |
54284 | "///\n" |
54285 | "/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n" |
54286 | "///\n" |
54287 | "/// \\param __p\n" |
54288 | "/// A pointer to a 32-bit memory location containing a single-precision\n" |
54289 | "/// floating-point value.\n" |
54290 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n" |
54291 | "/// lower 32 bits contain the value loaded from the memory location. The\n" |
54292 | "/// upper 96 bits are set to zero.\n" |
54293 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54294 | "_mm_load_ss(const float *__p)\n" |
54295 | "{\n" |
54296 | " struct __mm_load_ss_struct {\n" |
54297 | " float __u;\n" |
54298 | " } __attribute__((__packed__, __may_alias__));\n" |
54299 | " float __u = ((struct __mm_load_ss_struct*)__p)->__u;\n" |
54300 | " return __extension__ (__m128){ __u, 0, 0, 0 };\n" |
54301 | "}\n" |
54302 | "\n" |
54303 | "/// Loads a 32-bit float value and duplicates it to all four vector\n" |
54304 | "/// elements of a 128-bit vector of [4 x float].\n" |
54305 | "///\n" |
54306 | "/// \\headerfile <x86intrin.h>\n" |
54307 | "///\n" |
54308 | "/// This intrinsic corresponds to the <c> VBROADCASTSS / MOVSS + shuffling </c>\n" |
54309 | "/// instruction.\n" |
54310 | "///\n" |
54311 | "/// \\param __p\n" |
54312 | "/// A pointer to a float value to be loaded and duplicated.\n" |
54313 | "/// \\returns A 128-bit vector of [4 x float] containing the loaded and\n" |
54314 | "/// duplicated values.\n" |
54315 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54316 | "_mm_load1_ps(const float *__p)\n" |
54317 | "{\n" |
54318 | " struct __mm_load1_ps_struct {\n" |
54319 | " float __u;\n" |
54320 | " } __attribute__((__packed__, __may_alias__));\n" |
54321 | " float __u = ((struct __mm_load1_ps_struct*)__p)->__u;\n" |
54322 | " return __extension__ (__m128){ __u, __u, __u, __u };\n" |
54323 | "}\n" |
54324 | "\n" |
54325 | "#define _mm_load_ps1(p) _mm_load1_ps(p)\n" |
54326 | "\n" |
54327 | "/// Loads a 128-bit floating-point vector of [4 x float] from an aligned\n" |
54328 | "/// memory location.\n" |
54329 | "///\n" |
54330 | "/// \\headerfile <x86intrin.h>\n" |
54331 | "///\n" |
54332 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n" |
54333 | "///\n" |
54334 | "/// \\param __p\n" |
54335 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
54336 | "/// location has to be 128-bit aligned.\n" |
54337 | "/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n" |
54338 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54339 | "_mm_load_ps(const float *__p)\n" |
54340 | "{\n" |
54341 | " return *(__m128*)__p;\n" |
54342 | "}\n" |
54343 | "\n" |
54344 | "/// Loads a 128-bit floating-point vector of [4 x float] from an\n" |
54345 | "/// unaligned memory location.\n" |
54346 | "///\n" |
54347 | "/// \\headerfile <x86intrin.h>\n" |
54348 | "///\n" |
54349 | "/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n" |
54350 | "///\n" |
54351 | "/// \\param __p\n" |
54352 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
54353 | "/// location does not have to be aligned.\n" |
54354 | "/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n" |
54355 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54356 | "_mm_loadu_ps(const float *__p)\n" |
54357 | "{\n" |
54358 | " struct __loadu_ps {\n" |
54359 | " __m128 __v;\n" |
54360 | " } __attribute__((__packed__, __may_alias__));\n" |
54361 | " return ((struct __loadu_ps*)__p)->__v;\n" |
54362 | "}\n" |
54363 | "\n" |
54364 | "/// Loads four packed float values, in reverse order, from an aligned\n" |
54365 | "/// memory location to 32-bit elements in a 128-bit vector of [4 x float].\n" |
54366 | "///\n" |
54367 | "/// \\headerfile <x86intrin.h>\n" |
54368 | "///\n" |
54369 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n" |
54370 | "/// instruction.\n" |
54371 | "///\n" |
54372 | "/// \\param __p\n" |
54373 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
54374 | "/// location has to be 128-bit aligned.\n" |
54375 | "/// \\returns A 128-bit vector of [4 x float] containing the moved values, loaded\n" |
54376 | "/// in reverse order.\n" |
54377 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54378 | "_mm_loadr_ps(const float *__p)\n" |
54379 | "{\n" |
54380 | " __m128 __a = _mm_load_ps(__p);\n" |
54381 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n" |
54382 | "}\n" |
54383 | "\n" |
54384 | "/// Create a 128-bit vector of [4 x float] with undefined values.\n" |
54385 | "///\n" |
54386 | "/// \\headerfile <x86intrin.h>\n" |
54387 | "///\n" |
54388 | "/// This intrinsic has no corresponding instruction.\n" |
54389 | "///\n" |
54390 | "/// \\returns A 128-bit vector of [4 x float] containing undefined values.\n" |
54391 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54392 | "_mm_undefined_ps(void)\n" |
54393 | "{\n" |
54394 | " return (__m128)__builtin_ia32_undef128();\n" |
54395 | "}\n" |
54396 | "\n" |
54397 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
54398 | "/// 32 bits of the vector are initialized with the specified single-precision\n" |
54399 | "/// floating-point value. The upper 96 bits are set to zero.\n" |
54400 | "///\n" |
54401 | "/// \\headerfile <x86intrin.h>\n" |
54402 | "///\n" |
54403 | "/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n" |
54404 | "///\n" |
54405 | "/// \\param __w\n" |
54406 | "/// A single-precision floating-point value used to initialize the lower 32\n" |
54407 | "/// bits of the result.\n" |
54408 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n" |
54409 | "/// lower 32 bits contain the value provided in the source operand. The\n" |
54410 | "/// upper 96 bits are set to zero.\n" |
54411 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54412 | "_mm_set_ss(float __w)\n" |
54413 | "{\n" |
54414 | " return __extension__ (__m128){ __w, 0, 0, 0 };\n" |
54415 | "}\n" |
54416 | "\n" |
54417 | "/// Constructs a 128-bit floating-point vector of [4 x float], with each\n" |
54418 | "/// of the four single-precision floating-point vector elements set to the\n" |
54419 | "/// specified single-precision floating-point value.\n" |
54420 | "///\n" |
54421 | "/// \\headerfile <x86intrin.h>\n" |
54422 | "///\n" |
54423 | "/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n" |
54424 | "///\n" |
54425 | "/// \\param __w\n" |
54426 | "/// A single-precision floating-point value used to initialize each vector\n" |
54427 | "/// element of the result.\n" |
54428 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n" |
54429 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54430 | "_mm_set1_ps(float __w)\n" |
54431 | "{\n" |
54432 | " return __extension__ (__m128){ __w, __w, __w, __w };\n" |
54433 | "}\n" |
54434 | "\n" |
54435 | "/* Microsoft specific. */\n" |
54436 | "/// Constructs a 128-bit floating-point vector of [4 x float], with each\n" |
54437 | "/// of the four single-precision floating-point vector elements set to the\n" |
54438 | "/// specified single-precision floating-point value.\n" |
54439 | "///\n" |
54440 | "/// \\headerfile <x86intrin.h>\n" |
54441 | "///\n" |
54442 | "/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n" |
54443 | "///\n" |
54444 | "/// \\param __w\n" |
54445 | "/// A single-precision floating-point value used to initialize each vector\n" |
54446 | "/// element of the result.\n" |
54447 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n" |
54448 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54449 | "_mm_set_ps1(float __w)\n" |
54450 | "{\n" |
54451 | " return _mm_set1_ps(__w);\n" |
54452 | "}\n" |
54453 | "\n" |
54454 | "/// Constructs a 128-bit floating-point vector of [4 x float]\n" |
54455 | "/// initialized with the specified single-precision floating-point values.\n" |
54456 | "///\n" |
54457 | "/// \\headerfile <x86intrin.h>\n" |
54458 | "///\n" |
54459 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
54460 | "/// instruction.\n" |
54461 | "///\n" |
54462 | "/// \\param __z\n" |
54463 | "/// A single-precision floating-point value used to initialize bits [127:96]\n" |
54464 | "/// of the result.\n" |
54465 | "/// \\param __y\n" |
54466 | "/// A single-precision floating-point value used to initialize bits [95:64]\n" |
54467 | "/// of the result.\n" |
54468 | "/// \\param __x\n" |
54469 | "/// A single-precision floating-point value used to initialize bits [63:32]\n" |
54470 | "/// of the result.\n" |
54471 | "/// \\param __w\n" |
54472 | "/// A single-precision floating-point value used to initialize bits [31:0]\n" |
54473 | "/// of the result.\n" |
54474 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n" |
54475 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54476 | "_mm_set_ps(float __z, float __y, float __x, float __w)\n" |
54477 | "{\n" |
54478 | " return __extension__ (__m128){ __w, __x, __y, __z };\n" |
54479 | "}\n" |
54480 | "\n" |
54481 | "/// Constructs a 128-bit floating-point vector of [4 x float],\n" |
54482 | "/// initialized in reverse order with the specified 32-bit single-precision\n" |
54483 | "/// float-point values.\n" |
54484 | "///\n" |
54485 | "/// \\headerfile <x86intrin.h>\n" |
54486 | "///\n" |
54487 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
54488 | "/// instruction.\n" |
54489 | "///\n" |
54490 | "/// \\param __z\n" |
54491 | "/// A single-precision floating-point value used to initialize bits [31:0]\n" |
54492 | "/// of the result.\n" |
54493 | "/// \\param __y\n" |
54494 | "/// A single-precision floating-point value used to initialize bits [63:32]\n" |
54495 | "/// of the result.\n" |
54496 | "/// \\param __x\n" |
54497 | "/// A single-precision floating-point value used to initialize bits [95:64]\n" |
54498 | "/// of the result.\n" |
54499 | "/// \\param __w\n" |
54500 | "/// A single-precision floating-point value used to initialize bits [127:96]\n" |
54501 | "/// of the result.\n" |
54502 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n" |
54503 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54504 | "_mm_setr_ps(float __z, float __y, float __x, float __w)\n" |
54505 | "{\n" |
54506 | " return __extension__ (__m128){ __z, __y, __x, __w };\n" |
54507 | "}\n" |
54508 | "\n" |
54509 | "/// Constructs a 128-bit floating-point vector of [4 x float] initialized\n" |
54510 | "/// to zero.\n" |
54511 | "///\n" |
54512 | "/// \\headerfile <x86intrin.h>\n" |
54513 | "///\n" |
54514 | "/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n" |
54515 | "///\n" |
54516 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float] with\n" |
54517 | "/// all elements set to zero.\n" |
54518 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
54519 | "_mm_setzero_ps(void)\n" |
54520 | "{\n" |
54521 | " return __extension__ (__m128){ 0, 0, 0, 0 };\n" |
54522 | "}\n" |
54523 | "\n" |
54524 | "/// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a\n" |
54525 | "/// memory location.\n" |
54526 | "///\n" |
54527 | "/// \\headerfile <x86intrin.h>\n" |
54528 | "///\n" |
54529 | "/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n" |
54530 | "///\n" |
54531 | "/// \\param __p\n" |
54532 | "/// A pointer to a 64-bit memory location.\n" |
54533 | "/// \\param __a\n" |
54534 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
54535 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
54536 | "_mm_storeh_pi(__m64 *__p, __m128 __a)\n" |
54537 | "{\n" |
54538 | " __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a);\n" |
54539 | "}\n" |
54540 | "\n" |
54541 | "/// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a\n" |
54542 | "/// memory location.\n" |
54543 | "///\n" |
54544 | "/// \\headerfile <x86intrin.h>\n" |
54545 | "///\n" |
54546 | "/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n" |
54547 | "///\n" |
54548 | "/// \\param __p\n" |
54549 | "/// A pointer to a memory location that will receive the float values.\n" |
54550 | "/// \\param __a\n" |
54551 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
54552 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
54553 | "_mm_storel_pi(__m64 *__p, __m128 __a)\n" |
54554 | "{\n" |
54555 | " __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a);\n" |
54556 | "}\n" |
54557 | "\n" |
54558 | "/// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a\n" |
54559 | "/// memory location.\n" |
54560 | "///\n" |
54561 | "/// \\headerfile <x86intrin.h>\n" |
54562 | "///\n" |
54563 | "/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n" |
54564 | "///\n" |
54565 | "/// \\param __p\n" |
54566 | "/// A pointer to a 32-bit memory location.\n" |
54567 | "/// \\param __a\n" |
54568 | "/// A 128-bit vector of [4 x float] containing the value to be stored.\n" |
54569 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
54570 | "_mm_store_ss(float *__p, __m128 __a)\n" |
54571 | "{\n" |
54572 | " struct __mm_store_ss_struct {\n" |
54573 | " float __u;\n" |
54574 | " } __attribute__((__packed__, __may_alias__));\n" |
54575 | " ((struct __mm_store_ss_struct*)__p)->__u = __a[0];\n" |
54576 | "}\n" |
54577 | "\n" |
54578 | "/// Stores a 128-bit vector of [4 x float] to an unaligned memory\n" |
54579 | "/// location.\n" |
54580 | "///\n" |
54581 | "/// \\headerfile <x86intrin.h>\n" |
54582 | "///\n" |
54583 | "/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n" |
54584 | "///\n" |
54585 | "/// \\param __p\n" |
54586 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
54587 | "/// location does not have to be aligned.\n" |
54588 | "/// \\param __a\n" |
54589 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
54590 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
54591 | "_mm_storeu_ps(float *__p, __m128 __a)\n" |
54592 | "{\n" |
54593 | " struct __storeu_ps {\n" |
54594 | " __m128 __v;\n" |
54595 | " } __attribute__((__packed__, __may_alias__));\n" |
54596 | " ((struct __storeu_ps*)__p)->__v = __a;\n" |
54597 | "}\n" |
54598 | "\n" |
54599 | "/// Stores a 128-bit vector of [4 x float] into an aligned memory\n" |
54600 | "/// location.\n" |
54601 | "///\n" |
54602 | "/// \\headerfile <x86intrin.h>\n" |
54603 | "///\n" |
54604 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n" |
54605 | "///\n" |
54606 | "/// \\param __p\n" |
54607 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
54608 | "/// location has to be 16-byte aligned.\n" |
54609 | "/// \\param __a\n" |
54610 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
54611 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
54612 | "_mm_store_ps(float *__p, __m128 __a)\n" |
54613 | "{\n" |
54614 | " *(__m128*)__p = __a;\n" |
54615 | "}\n" |
54616 | "\n" |
54617 | "/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n" |
54618 | "/// four contiguous elements in an aligned memory location.\n" |
54619 | "///\n" |
54620 | "/// \\headerfile <x86intrin.h>\n" |
54621 | "///\n" |
54622 | "/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n" |
54623 | "/// instruction.\n" |
54624 | "///\n" |
54625 | "/// \\param __p\n" |
54626 | "/// A pointer to a 128-bit memory location.\n" |
54627 | "/// \\param __a\n" |
54628 | "/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n" |
54629 | "/// of the four contiguous elements pointed by \\a __p.\n" |
54630 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
54631 | "_mm_store1_ps(float *__p, __m128 __a)\n" |
54632 | "{\n" |
54633 | " __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);\n" |
54634 | " _mm_store_ps(__p, __a);\n" |
54635 | "}\n" |
54636 | "\n" |
54637 | "/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n" |
54638 | "/// four contiguous elements in an aligned memory location.\n" |
54639 | "///\n" |
54640 | "/// \\headerfile <x86intrin.h>\n" |
54641 | "///\n" |
54642 | "/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n" |
54643 | "/// instruction.\n" |
54644 | "///\n" |
54645 | "/// \\param __p\n" |
54646 | "/// A pointer to a 128-bit memory location.\n" |
54647 | "/// \\param __a\n" |
54648 | "/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n" |
54649 | "/// of the four contiguous elements pointed by \\a __p.\n" |
54650 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
54651 | "_mm_store_ps1(float *__p, __m128 __a)\n" |
54652 | "{\n" |
54653 | " _mm_store1_ps(__p, __a);\n" |
54654 | "}\n" |
54655 | "\n" |
54656 | "/// Stores float values from a 128-bit vector of [4 x float] to an\n" |
54657 | "/// aligned memory location in reverse order.\n" |
54658 | "///\n" |
54659 | "/// \\headerfile <x86intrin.h>\n" |
54660 | "///\n" |
54661 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n" |
54662 | "/// instruction.\n" |
54663 | "///\n" |
54664 | "/// \\param __p\n" |
54665 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
54666 | "/// location has to be 128-bit aligned.\n" |
54667 | "/// \\param __a\n" |
54668 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
54669 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
54670 | "_mm_storer_ps(float *__p, __m128 __a)\n" |
54671 | "{\n" |
54672 | " __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n" |
54673 | " _mm_store_ps(__p, __a);\n" |
54674 | "}\n" |
54675 | "\n" |
54676 | "#define _MM_HINT_ET0 7\n" |
54677 | "#define _MM_HINT_ET1 6\n" |
54678 | "#define _MM_HINT_T0 3\n" |
54679 | "#define _MM_HINT_T1 2\n" |
54680 | "#define _MM_HINT_T2 1\n" |
54681 | "#define _MM_HINT_NTA 0\n" |
54682 | "\n" |
54683 | "#ifndef _MSC_VER\n" |
54684 | "/* FIXME: We have to #define this because \"sel\" must be a constant integer, and\n" |
54685 | " Sema doesn't do any form of constant propagation yet. */\n" |
54686 | "\n" |
54687 | "/// Loads one cache line of data from the specified address to a location\n" |
54688 | "/// closer to the processor.\n" |
54689 | "///\n" |
54690 | "/// \\headerfile <x86intrin.h>\n" |
54691 | "///\n" |
54692 | "/// \\code\n" |
54693 | "/// void _mm_prefetch(const void * a, const int sel);\n" |
54694 | "/// \\endcode\n" |
54695 | "///\n" |
54696 | "/// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction.\n" |
54697 | "///\n" |
54698 | "/// \\param a\n" |
54699 | "/// A pointer to a memory location containing a cache line of data.\n" |
54700 | "/// \\param sel\n" |
54701 | "/// A predefined integer constant specifying the type of prefetch\n" |
54702 | "/// operation: \\n\n" |
54703 | "/// _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The\n" |
54704 | "/// PREFETCHNTA instruction will be generated. \\n\n" |
54705 | "/// _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will\n" |
54706 | "/// be generated. \\n\n" |
54707 | "/// _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will\n" |
54708 | "/// be generated. \\n\n" |
54709 | "/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will\n" |
54710 | "/// be generated.\n" |
54711 | "#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \\\n" |
54712 | " ((sel) >> 2) & 1, (sel) & 0x3))\n" |
54713 | "#endif\n" |
54714 | "\n" |
54715 | "/// Stores a 64-bit integer in the specified aligned memory location. To\n" |
54716 | "/// minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
54717 | "/// used again soon).\n" |
54718 | "///\n" |
54719 | "/// \\headerfile <x86intrin.h>\n" |
54720 | "///\n" |
54721 | "/// This intrinsic corresponds to the <c> MOVNTQ </c> instruction.\n" |
54722 | "///\n" |
54723 | "/// \\param __p\n" |
54724 | "/// A pointer to an aligned memory location used to store the register value.\n" |
54725 | "/// \\param __a\n" |
54726 | "/// A 64-bit integer containing the value to be stored.\n" |
54727 | "static __inline__ void __DEFAULT_FN_ATTRS_MMX\n" |
54728 | "_mm_stream_pi(__m64 *__p, __m64 __a)\n" |
54729 | "{\n" |
54730 | " __builtin_ia32_movntq(__p, __a);\n" |
54731 | "}\n" |
54732 | "\n" |
54733 | "/// Moves packed float values from a 128-bit vector of [4 x float] to a\n" |
54734 | "/// 128-bit aligned memory location. To minimize caching, the data is flagged\n" |
54735 | "/// as non-temporal (unlikely to be used again soon).\n" |
54736 | "///\n" |
54737 | "/// \\headerfile <x86intrin.h>\n" |
54738 | "///\n" |
54739 | "/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n" |
54740 | "///\n" |
54741 | "/// \\param __p\n" |
54742 | "/// A pointer to a 128-bit aligned memory location that will receive the\n" |
54743 | "/// single-precision floating-point values.\n" |
54744 | "/// \\param __a\n" |
54745 | "/// A 128-bit vector of [4 x float] containing the values to be moved.\n" |
54746 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
54747 | "_mm_stream_ps(float *__p, __m128 __a)\n" |
54748 | "{\n" |
54749 | " __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);\n" |
54750 | "}\n" |
54751 | "\n" |
54752 | "#if defined(__cplusplus)\n" |
54753 | "extern \"C\" {\n" |
54754 | "#endif\n" |
54755 | "\n" |
54756 | "/// Forces strong memory ordering (serialization) between store\n" |
54757 | "/// instructions preceding this instruction and store instructions following\n" |
54758 | "/// this instruction, ensuring the system completes all previous stores\n" |
54759 | "/// before executing subsequent stores.\n" |
54760 | "///\n" |
54761 | "/// \\headerfile <x86intrin.h>\n" |
54762 | "///\n" |
54763 | "/// This intrinsic corresponds to the <c> SFENCE </c> instruction.\n" |
54764 | "///\n" |
54765 | "void _mm_sfence(void);\n" |
54766 | "\n" |
54767 | "#if defined(__cplusplus)\n" |
54768 | "} // extern \"C\"\n" |
54769 | "#endif\n" |
54770 | "\n" |
54771 | "/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and\n" |
54772 | "/// returns it, as specified by the immediate integer operand.\n" |
54773 | "///\n" |
54774 | "/// \\headerfile <x86intrin.h>\n" |
54775 | "///\n" |
54776 | "/// \\code\n" |
54777 | "/// int _mm_extract_pi16(__m64 a, int n);\n" |
54778 | "/// \\endcode\n" |
54779 | "///\n" |
54780 | "/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n" |
54781 | "///\n" |
54782 | "/// \\param a\n" |
54783 | "/// A 64-bit vector of [4 x i16].\n" |
54784 | "/// \\param n\n" |
54785 | "/// An immediate integer operand that determines which bits are extracted: \\n\n" |
54786 | "/// 0: Bits [15:0] are copied to the destination. \\n\n" |
54787 | "/// 1: Bits [31:16] are copied to the destination. \\n\n" |
54788 | "/// 2: Bits [47:32] are copied to the destination. \\n\n" |
54789 | "/// 3: Bits [63:48] are copied to the destination.\n" |
54790 | "/// \\returns A 16-bit integer containing the extracted 16 bits of packed data.\n" |
54791 | "#define _mm_extract_pi16(a, n) \\\n" |
54792 | " (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n)\n" |
54793 | "\n" |
54794 | "/// Copies data from the 64-bit vector of [4 x i16] to the destination,\n" |
54795 | "/// and inserts the lower 16-bits of an integer operand at the 16-bit offset\n" |
54796 | "/// specified by the immediate operand \\a n.\n" |
54797 | "///\n" |
54798 | "/// \\headerfile <x86intrin.h>\n" |
54799 | "///\n" |
54800 | "/// \\code\n" |
54801 | "/// __m64 _mm_insert_pi16(__m64 a, int d, int n);\n" |
54802 | "/// \\endcode\n" |
54803 | "///\n" |
54804 | "/// This intrinsic corresponds to the <c> PINSRW </c> instruction.\n" |
54805 | "///\n" |
54806 | "/// \\param a\n" |
54807 | "/// A 64-bit vector of [4 x i16].\n" |
54808 | "/// \\param d\n" |
54809 | "/// An integer. The lower 16-bit value from this operand is written to the\n" |
54810 | "/// destination at the offset specified by operand \\a n.\n" |
54811 | "/// \\param n\n" |
54812 | "/// An immediate integer operant that determines which the bits to be used\n" |
54813 | "/// in the destination. \\n\n" |
54814 | "/// 0: Bits [15:0] are copied to the destination. \\n\n" |
54815 | "/// 1: Bits [31:16] are copied to the destination. \\n\n" |
54816 | "/// 2: Bits [47:32] are copied to the destination. \\n\n" |
54817 | "/// 3: Bits [63:48] are copied to the destination. \\n\n" |
54818 | "/// The remaining bits in the destination are copied from the corresponding\n" |
54819 | "/// bits in operand \\a a.\n" |
54820 | "/// \\returns A 64-bit integer vector containing the copied packed data from the\n" |
54821 | "/// operands.\n" |
54822 | "#define _mm_insert_pi16(a, d, n) \\\n" |
54823 | " (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n)\n" |
54824 | "\n" |
54825 | "/// Compares each of the corresponding packed 16-bit integer values of\n" |
54826 | "/// the 64-bit integer vectors, and writes the greater value to the\n" |
54827 | "/// corresponding bits in the destination.\n" |
54828 | "///\n" |
54829 | "/// \\headerfile <x86intrin.h>\n" |
54830 | "///\n" |
54831 | "/// This intrinsic corresponds to the <c> PMAXSW </c> instruction.\n" |
54832 | "///\n" |
54833 | "/// \\param __a\n" |
54834 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54835 | "/// \\param __b\n" |
54836 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54837 | "/// \\returns A 64-bit integer vector containing the comparison results.\n" |
54838 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
54839 | "_mm_max_pi16(__m64 __a, __m64 __b)\n" |
54840 | "{\n" |
54841 | " return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);\n" |
54842 | "}\n" |
54843 | "\n" |
54844 | "/// Compares each of the corresponding packed 8-bit unsigned integer\n" |
54845 | "/// values of the 64-bit integer vectors, and writes the greater value to the\n" |
54846 | "/// corresponding bits in the destination.\n" |
54847 | "///\n" |
54848 | "/// \\headerfile <x86intrin.h>\n" |
54849 | "///\n" |
54850 | "/// This intrinsic corresponds to the <c> PMAXUB </c> instruction.\n" |
54851 | "///\n" |
54852 | "/// \\param __a\n" |
54853 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54854 | "/// \\param __b\n" |
54855 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54856 | "/// \\returns A 64-bit integer vector containing the comparison results.\n" |
54857 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
54858 | "_mm_max_pu8(__m64 __a, __m64 __b)\n" |
54859 | "{\n" |
54860 | " return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);\n" |
54861 | "}\n" |
54862 | "\n" |
54863 | "/// Compares each of the corresponding packed 16-bit integer values of\n" |
54864 | "/// the 64-bit integer vectors, and writes the lesser value to the\n" |
54865 | "/// corresponding bits in the destination.\n" |
54866 | "///\n" |
54867 | "/// \\headerfile <x86intrin.h>\n" |
54868 | "///\n" |
54869 | "/// This intrinsic corresponds to the <c> PMINSW </c> instruction.\n" |
54870 | "///\n" |
54871 | "/// \\param __a\n" |
54872 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54873 | "/// \\param __b\n" |
54874 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54875 | "/// \\returns A 64-bit integer vector containing the comparison results.\n" |
54876 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
54877 | "_mm_min_pi16(__m64 __a, __m64 __b)\n" |
54878 | "{\n" |
54879 | " return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);\n" |
54880 | "}\n" |
54881 | "\n" |
54882 | "/// Compares each of the corresponding packed 8-bit unsigned integer\n" |
54883 | "/// values of the 64-bit integer vectors, and writes the lesser value to the\n" |
54884 | "/// corresponding bits in the destination.\n" |
54885 | "///\n" |
54886 | "/// \\headerfile <x86intrin.h>\n" |
54887 | "///\n" |
54888 | "/// This intrinsic corresponds to the <c> PMINUB </c> instruction.\n" |
54889 | "///\n" |
54890 | "/// \\param __a\n" |
54891 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54892 | "/// \\param __b\n" |
54893 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54894 | "/// \\returns A 64-bit integer vector containing the comparison results.\n" |
54895 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
54896 | "_mm_min_pu8(__m64 __a, __m64 __b)\n" |
54897 | "{\n" |
54898 | " return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);\n" |
54899 | "}\n" |
54900 | "\n" |
54901 | "/// Takes the most significant bit from each 8-bit element in a 64-bit\n" |
54902 | "/// integer vector to create an 8-bit mask value. Zero-extends the value to\n" |
54903 | "/// 32-bit integer and writes it to the destination.\n" |
54904 | "///\n" |
54905 | "/// \\headerfile <x86intrin.h>\n" |
54906 | "///\n" |
54907 | "/// This intrinsic corresponds to the <c> PMOVMSKB </c> instruction.\n" |
54908 | "///\n" |
54909 | "/// \\param __a\n" |
54910 | "/// A 64-bit integer vector containing the values with bits to be extracted.\n" |
54911 | "/// \\returns The most significant bit from each 8-bit element in \\a __a,\n" |
54912 | "/// written to bits [7:0].\n" |
54913 | "static __inline__ int __DEFAULT_FN_ATTRS_MMX\n" |
54914 | "_mm_movemask_pi8(__m64 __a)\n" |
54915 | "{\n" |
54916 | " return __builtin_ia32_pmovmskb((__v8qi)__a);\n" |
54917 | "}\n" |
54918 | "\n" |
54919 | "/// Multiplies packed 16-bit unsigned integer values and writes the\n" |
54920 | "/// high-order 16 bits of each 32-bit product to the corresponding bits in\n" |
54921 | "/// the destination.\n" |
54922 | "///\n" |
54923 | "/// \\headerfile <x86intrin.h>\n" |
54924 | "///\n" |
54925 | "/// This intrinsic corresponds to the <c> PMULHUW </c> instruction.\n" |
54926 | "///\n" |
54927 | "/// \\param __a\n" |
54928 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54929 | "/// \\param __b\n" |
54930 | "/// A 64-bit integer vector containing one of the source operands.\n" |
54931 | "/// \\returns A 64-bit integer vector containing the products of both operands.\n" |
54932 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
54933 | "_mm_mulhi_pu16(__m64 __a, __m64 __b)\n" |
54934 | "{\n" |
54935 | " return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);\n" |
54936 | "}\n" |
54937 | "\n" |
54938 | "/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the\n" |
54939 | "/// destination, as specified by the immediate value operand.\n" |
54940 | "///\n" |
54941 | "/// \\headerfile <x86intrin.h>\n" |
54942 | "///\n" |
54943 | "/// \\code\n" |
54944 | "/// __m64 _mm_shuffle_pi16(__m64 a, const int n);\n" |
54945 | "/// \\endcode\n" |
54946 | "///\n" |
54947 | "/// This intrinsic corresponds to the <c> PSHUFW </c> instruction.\n" |
54948 | "///\n" |
54949 | "/// \\param a\n" |
54950 | "/// A 64-bit integer vector containing the values to be shuffled.\n" |
54951 | "/// \\param n\n" |
54952 | "/// An immediate value containing an 8-bit value specifying which elements to\n" |
54953 | "/// copy from \\a a. The destinations within the 64-bit destination are\n" |
54954 | "/// assigned values as follows: \\n\n" |
54955 | "/// Bits [1:0] are used to assign values to bits [15:0] in the\n" |
54956 | "/// destination. \\n\n" |
54957 | "/// Bits [3:2] are used to assign values to bits [31:16] in the\n" |
54958 | "/// destination. \\n\n" |
54959 | "/// Bits [5:4] are used to assign values to bits [47:32] in the\n" |
54960 | "/// destination. \\n\n" |
54961 | "/// Bits [7:6] are used to assign values to bits [63:48] in the\n" |
54962 | "/// destination. \\n\n" |
54963 | "/// Bit value assignments: \\n\n" |
54964 | "/// 00: assigned from bits [15:0] of \\a a. \\n\n" |
54965 | "/// 01: assigned from bits [31:16] of \\a a. \\n\n" |
54966 | "/// 10: assigned from bits [47:32] of \\a a. \\n\n" |
54967 | "/// 11: assigned from bits [63:48] of \\a a.\n" |
54968 | "/// \\returns A 64-bit integer vector containing the shuffled values.\n" |
54969 | "#define _mm_shuffle_pi16(a, n) \\\n" |
54970 | " (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))\n" |
54971 | "\n" |
54972 | "/// Conditionally copies the values from each 8-bit element in the first\n" |
54973 | "/// 64-bit integer vector operand to the specified memory location, as\n" |
54974 | "/// specified by the most significant bit in the corresponding element in the\n" |
54975 | "/// second 64-bit integer vector operand.\n" |
54976 | "///\n" |
54977 | "/// To minimize caching, the data is flagged as non-temporal\n" |
54978 | "/// (unlikely to be used again soon).\n" |
54979 | "///\n" |
54980 | "/// \\headerfile <x86intrin.h>\n" |
54981 | "///\n" |
54982 | "/// This intrinsic corresponds to the <c> MASKMOVQ </c> instruction.\n" |
54983 | "///\n" |
54984 | "/// \\param __d\n" |
54985 | "/// A 64-bit integer vector containing the values with elements to be copied.\n" |
54986 | "/// \\param __n\n" |
54987 | "/// A 64-bit integer vector operand. The most significant bit from each 8-bit\n" |
54988 | "/// element determines whether the corresponding element in operand \\a __d\n" |
54989 | "/// is copied. If the most significant bit of a given element is 1, the\n" |
54990 | "/// corresponding element in operand \\a __d is copied.\n" |
54991 | "/// \\param __p\n" |
54992 | "/// A pointer to a 64-bit memory location that will receive the conditionally\n" |
54993 | "/// copied integer values. The address of the memory location does not have\n" |
54994 | "/// to be aligned.\n" |
54995 | "static __inline__ void __DEFAULT_FN_ATTRS_MMX\n" |
54996 | "_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)\n" |
54997 | "{\n" |
54998 | " __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);\n" |
54999 | "}\n" |
55000 | "\n" |
55001 | "/// Computes the rounded averages of the packed unsigned 8-bit integer\n" |
55002 | "/// values and writes the averages to the corresponding bits in the\n" |
55003 | "/// destination.\n" |
55004 | "///\n" |
55005 | "/// \\headerfile <x86intrin.h>\n" |
55006 | "///\n" |
55007 | "/// This intrinsic corresponds to the <c> PAVGB </c> instruction.\n" |
55008 | "///\n" |
55009 | "/// \\param __a\n" |
55010 | "/// A 64-bit integer vector containing one of the source operands.\n" |
55011 | "/// \\param __b\n" |
55012 | "/// A 64-bit integer vector containing one of the source operands.\n" |
55013 | "/// \\returns A 64-bit integer vector containing the averages of both operands.\n" |
55014 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
55015 | "_mm_avg_pu8(__m64 __a, __m64 __b)\n" |
55016 | "{\n" |
55017 | " return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);\n" |
55018 | "}\n" |
55019 | "\n" |
55020 | "/// Computes the rounded averages of the packed unsigned 16-bit integer\n" |
55021 | "/// values and writes the averages to the corresponding bits in the\n" |
55022 | "/// destination.\n" |
55023 | "///\n" |
55024 | "/// \\headerfile <x86intrin.h>\n" |
55025 | "///\n" |
55026 | "/// This intrinsic corresponds to the <c> PAVGW </c> instruction.\n" |
55027 | "///\n" |
55028 | "/// \\param __a\n" |
55029 | "/// A 64-bit integer vector containing one of the source operands.\n" |
55030 | "/// \\param __b\n" |
55031 | "/// A 64-bit integer vector containing one of the source operands.\n" |
55032 | "/// \\returns A 64-bit integer vector containing the averages of both operands.\n" |
55033 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
55034 | "_mm_avg_pu16(__m64 __a, __m64 __b)\n" |
55035 | "{\n" |
55036 | " return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);\n" |
55037 | "}\n" |
55038 | "\n" |
55039 | "/// Subtracts the corresponding 8-bit unsigned integer values of the two\n" |
55040 | "/// 64-bit vector operands and computes the absolute value for each of the\n" |
55041 | "/// difference. Then sum of the 8 absolute differences is written to the\n" |
55042 | "/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.\n" |
55043 | "///\n" |
55044 | "/// \\headerfile <x86intrin.h>\n" |
55045 | "///\n" |
55046 | "/// This intrinsic corresponds to the <c> PSADBW </c> instruction.\n" |
55047 | "///\n" |
55048 | "/// \\param __a\n" |
55049 | "/// A 64-bit integer vector containing one of the source operands.\n" |
55050 | "/// \\param __b\n" |
55051 | "/// A 64-bit integer vector containing one of the source operands.\n" |
55052 | "/// \\returns A 64-bit integer vector whose lower 16 bits contain the sums of the\n" |
55053 | "/// sets of absolute differences between both operands. The upper bits are\n" |
55054 | "/// cleared.\n" |
55055 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
55056 | "_mm_sad_pu8(__m64 __a, __m64 __b)\n" |
55057 | "{\n" |
55058 | " return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);\n" |
55059 | "}\n" |
55060 | "\n" |
55061 | "#if defined(__cplusplus)\n" |
55062 | "extern \"C\" {\n" |
55063 | "#endif\n" |
55064 | "\n" |
55065 | "/// Returns the contents of the MXCSR register as a 32-bit unsigned\n" |
55066 | "/// integer value.\n" |
55067 | "///\n" |
55068 | "/// There are several groups of macros associated with this\n" |
55069 | "/// intrinsic, including:\n" |
55070 | "/// <ul>\n" |
55071 | "/// <li>\n" |
55072 | "/// For checking exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n" |
55073 | "/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n" |
55074 | "/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n" |
55075 | "/// _MM_GET_EXCEPTION_STATE().\n" |
55076 | "/// </li>\n" |
55077 | "/// <li>\n" |
55078 | "/// For checking exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n" |
55079 | "/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n" |
55080 | "/// There is a convenience wrapper _MM_GET_EXCEPTION_MASK().\n" |
55081 | "/// </li>\n" |
55082 | "/// <li>\n" |
55083 | "/// For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n" |
55084 | "/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n" |
55085 | "/// _MM_GET_ROUNDING_MODE().\n" |
55086 | "/// </li>\n" |
55087 | "/// <li>\n" |
55088 | "/// For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n" |
55089 | "/// There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE().\n" |
55090 | "/// </li>\n" |
55091 | "/// <li>\n" |
55092 | "/// For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n" |
55093 | "/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n" |
55094 | "/// _MM_GET_DENORMALS_ZERO_MODE().\n" |
55095 | "/// </li>\n" |
55096 | "/// </ul>\n" |
55097 | "///\n" |
55098 | "/// For example, the following expression checks if an overflow exception has\n" |
55099 | "/// occurred:\n" |
55100 | "/// \\code\n" |
55101 | "/// ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW )\n" |
55102 | "/// \\endcode\n" |
55103 | "///\n" |
55104 | "/// The following expression gets the current rounding mode:\n" |
55105 | "/// \\code\n" |
55106 | "/// _MM_GET_ROUNDING_MODE()\n" |
55107 | "/// \\endcode\n" |
55108 | "///\n" |
55109 | "/// \\headerfile <x86intrin.h>\n" |
55110 | "///\n" |
55111 | "/// This intrinsic corresponds to the <c> VSTMXCSR / STMXCSR </c> instruction.\n" |
55112 | "///\n" |
55113 | "/// \\returns A 32-bit unsigned integer containing the contents of the MXCSR\n" |
55114 | "/// register.\n" |
55115 | "unsigned int _mm_getcsr(void);\n" |
55116 | "\n" |
55117 | "/// Sets the MXCSR register with the 32-bit unsigned integer value.\n" |
55118 | "///\n" |
55119 | "/// There are several groups of macros associated with this intrinsic,\n" |
55120 | "/// including:\n" |
55121 | "/// <ul>\n" |
55122 | "/// <li>\n" |
55123 | "/// For setting exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n" |
55124 | "/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n" |
55125 | "/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n" |
55126 | "/// _MM_SET_EXCEPTION_STATE(x) where x is one of these macros.\n" |
55127 | "/// </li>\n" |
55128 | "/// <li>\n" |
55129 | "/// For setting exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n" |
55130 | "/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n" |
55131 | "/// There is a convenience wrapper _MM_SET_EXCEPTION_MASK(x) where x is one\n" |
55132 | "/// of these macros.\n" |
55133 | "/// </li>\n" |
55134 | "/// <li>\n" |
55135 | "/// For setting rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n" |
55136 | "/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n" |
55137 | "/// _MM_SET_ROUNDING_MODE(x) where x is one of these macros.\n" |
55138 | "/// </li>\n" |
55139 | "/// <li>\n" |
55140 | "/// For setting flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n" |
55141 | "/// There is a convenience wrapper _MM_SET_FLUSH_ZERO_MODE(x) where x is\n" |
55142 | "/// one of these macros.\n" |
55143 | "/// </li>\n" |
55144 | "/// <li>\n" |
55145 | "/// For setting denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n" |
55146 | "/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n" |
55147 | "/// _MM_SET_DENORMALS_ZERO_MODE(x) where x is one of these macros.\n" |
55148 | "/// </li>\n" |
55149 | "/// </ul>\n" |
55150 | "///\n" |
55151 | "/// For example, the following expression causes subsequent floating-point\n" |
55152 | "/// operations to round up:\n" |
55153 | "/// _mm_setcsr(_mm_getcsr() | _MM_ROUND_UP)\n" |
55154 | "///\n" |
55155 | "/// The following example sets the DAZ and FTZ flags:\n" |
55156 | "/// \\code\n" |
55157 | "/// void setFlags() {\n" |
55158 | "/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);\n" |
55159 | "/// _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);\n" |
55160 | "/// }\n" |
55161 | "/// \\endcode\n" |
55162 | "///\n" |
55163 | "/// \\headerfile <x86intrin.h>\n" |
55164 | "///\n" |
55165 | "/// This intrinsic corresponds to the <c> VLDMXCSR / LDMXCSR </c> instruction.\n" |
55166 | "///\n" |
55167 | "/// \\param __i\n" |
55168 | "/// A 32-bit unsigned integer value to be written to the MXCSR register.\n" |
55169 | "void _mm_setcsr(unsigned int __i);\n" |
55170 | "\n" |
55171 | "#if defined(__cplusplus)\n" |
55172 | "} // extern \"C\"\n" |
55173 | "#endif\n" |
55174 | "\n" |
55175 | "/// Selects 4 float values from the 128-bit operands of [4 x float], as\n" |
55176 | "/// specified by the immediate value operand.\n" |
55177 | "///\n" |
55178 | "/// \\headerfile <x86intrin.h>\n" |
55179 | "///\n" |
55180 | "/// \\code\n" |
55181 | "/// __m128 _mm_shuffle_ps(__m128 a, __m128 b, const int mask);\n" |
55182 | "/// \\endcode\n" |
55183 | "///\n" |
55184 | "/// This intrinsic corresponds to the <c> VSHUFPS / SHUFPS </c> instruction.\n" |
55185 | "///\n" |
55186 | "/// \\param a\n" |
55187 | "/// A 128-bit vector of [4 x float].\n" |
55188 | "/// \\param b\n" |
55189 | "/// A 128-bit vector of [4 x float].\n" |
55190 | "/// \\param mask\n" |
55191 | "/// An immediate value containing an 8-bit value specifying which elements to\n" |
55192 | "/// copy from \\a a and \\a b. \\n\n" |
55193 | "/// Bits [3:0] specify the values copied from operand \\a a. \\n\n" |
55194 | "/// Bits [7:4] specify the values copied from operand \\a b. \\n\n" |
55195 | "/// The destinations within the 128-bit destination are assigned values as\n" |
55196 | "/// follows: \\n\n" |
55197 | "/// Bits [1:0] are used to assign values to bits [31:0] in the\n" |
55198 | "/// destination. \\n\n" |
55199 | "/// Bits [3:2] are used to assign values to bits [63:32] in the\n" |
55200 | "/// destination. \\n\n" |
55201 | "/// Bits [5:4] are used to assign values to bits [95:64] in the\n" |
55202 | "/// destination. \\n\n" |
55203 | "/// Bits [7:6] are used to assign values to bits [127:96] in the\n" |
55204 | "/// destination. \\n\n" |
55205 | "/// Bit value assignments: \\n\n" |
55206 | "/// 00: Bits [31:0] copied from the specified operand. \\n\n" |
55207 | "/// 01: Bits [63:32] copied from the specified operand. \\n\n" |
55208 | "/// 10: Bits [95:64] copied from the specified operand. \\n\n" |
55209 | "/// 11: Bits [127:96] copied from the specified operand.\n" |
55210 | "/// \\returns A 128-bit vector of [4 x float] containing the shuffled values.\n" |
55211 | "#define _mm_shuffle_ps(a, b, mask) \\\n" |
55212 | " (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \\\n" |
55213 | " (int)(mask))\n" |
55214 | "\n" |
55215 | "/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n" |
55216 | "/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n" |
55217 | "///\n" |
55218 | "/// \\headerfile <x86intrin.h>\n" |
55219 | "///\n" |
55220 | "/// This intrinsic corresponds to the <c> VUNPCKHPS / UNPCKHPS </c> instruction.\n" |
55221 | "///\n" |
55222 | "/// \\param __a\n" |
55223 | "/// A 128-bit vector of [4 x float]. \\n\n" |
55224 | "/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n" |
55225 | "/// Bits [127:96] are written to bits [95:64] of the destination.\n" |
55226 | "/// \\param __b\n" |
55227 | "/// A 128-bit vector of [4 x float].\n" |
55228 | "/// Bits [95:64] are written to bits [63:32] of the destination. \\n\n" |
55229 | "/// Bits [127:96] are written to bits [127:96] of the destination.\n" |
55230 | "/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n" |
55231 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
55232 | "_mm_unpackhi_ps(__m128 __a, __m128 __b)\n" |
55233 | "{\n" |
55234 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);\n" |
55235 | "}\n" |
55236 | "\n" |
55237 | "/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n" |
55238 | "/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n" |
55239 | "///\n" |
55240 | "/// \\headerfile <x86intrin.h>\n" |
55241 | "///\n" |
55242 | "/// This intrinsic corresponds to the <c> VUNPCKLPS / UNPCKLPS </c> instruction.\n" |
55243 | "///\n" |
55244 | "/// \\param __a\n" |
55245 | "/// A 128-bit vector of [4 x float]. \\n\n" |
55246 | "/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n" |
55247 | "/// Bits [63:32] are written to bits [95:64] of the destination.\n" |
55248 | "/// \\param __b\n" |
55249 | "/// A 128-bit vector of [4 x float]. \\n\n" |
55250 | "/// Bits [31:0] are written to bits [63:32] of the destination. \\n\n" |
55251 | "/// Bits [63:32] are written to bits [127:96] of the destination.\n" |
55252 | "/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n" |
55253 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
55254 | "_mm_unpacklo_ps(__m128 __a, __m128 __b)\n" |
55255 | "{\n" |
55256 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);\n" |
55257 | "}\n" |
55258 | "\n" |
55259 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
55260 | "/// 32 bits are set to the lower 32 bits of the second parameter. The upper\n" |
55261 | "/// 96 bits are set to the upper 96 bits of the first parameter.\n" |
55262 | "///\n" |
55263 | "/// \\headerfile <x86intrin.h>\n" |
55264 | "///\n" |
55265 | "/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS / MOVSS </c>\n" |
55266 | "/// instruction.\n" |
55267 | "///\n" |
55268 | "/// \\param __a\n" |
55269 | "/// A 128-bit floating-point vector of [4 x float]. The upper 96 bits are\n" |
55270 | "/// written to the upper 96 bits of the result.\n" |
55271 | "/// \\param __b\n" |
55272 | "/// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are\n" |
55273 | "/// written to the lower 32 bits of the result.\n" |
55274 | "/// \\returns A 128-bit floating-point vector of [4 x float].\n" |
55275 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
55276 | "_mm_move_ss(__m128 __a, __m128 __b)\n" |
55277 | "{\n" |
55278 | " __a[0] = __b[0];\n" |
55279 | " return __a;\n" |
55280 | "}\n" |
55281 | "\n" |
55282 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
55283 | "/// 64 bits are set to the upper 64 bits of the second parameter. The upper\n" |
55284 | "/// 64 bits are set to the upper 64 bits of the first parameter.\n" |
55285 | "///\n" |
55286 | "/// \\headerfile <x86intrin.h>\n" |
55287 | "///\n" |
55288 | "/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n" |
55289 | "///\n" |
55290 | "/// \\param __a\n" |
55291 | "/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n" |
55292 | "/// written to the upper 64 bits of the result.\n" |
55293 | "/// \\param __b\n" |
55294 | "/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n" |
55295 | "/// written to the lower 64 bits of the result.\n" |
55296 | "/// \\returns A 128-bit floating-point vector of [4 x float].\n" |
55297 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
55298 | "_mm_movehl_ps(__m128 __a, __m128 __b)\n" |
55299 | "{\n" |
55300 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);\n" |
55301 | "}\n" |
55302 | "\n" |
55303 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
55304 | "/// 64 bits are set to the lower 64 bits of the first parameter. The upper\n" |
55305 | "/// 64 bits are set to the lower 64 bits of the second parameter.\n" |
55306 | "///\n" |
55307 | "/// \\headerfile <x86intrin.h>\n" |
55308 | "///\n" |
55309 | "/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n" |
55310 | "///\n" |
55311 | "/// \\param __a\n" |
55312 | "/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n" |
55313 | "/// written to the lower 64 bits of the result.\n" |
55314 | "/// \\param __b\n" |
55315 | "/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n" |
55316 | "/// written to the upper 64 bits of the result.\n" |
55317 | "/// \\returns A 128-bit floating-point vector of [4 x float].\n" |
55318 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
55319 | "_mm_movelh_ps(__m128 __a, __m128 __b)\n" |
55320 | "{\n" |
55321 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);\n" |
55322 | "}\n" |
55323 | "\n" |
55324 | "/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x\n" |
55325 | "/// float].\n" |
55326 | "///\n" |
55327 | "/// \\headerfile <x86intrin.h>\n" |
55328 | "///\n" |
55329 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
55330 | "///\n" |
55331 | "/// \\param __a\n" |
55332 | "/// A 64-bit vector of [4 x i16]. The elements of the destination are copied\n" |
55333 | "/// from the corresponding elements in this operand.\n" |
55334 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n" |
55335 | "/// values from the operand.\n" |
55336 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
55337 | "_mm_cvtpi16_ps(__m64 __a)\n" |
55338 | "{\n" |
55339 | " __m64 __b, __c;\n" |
55340 | " __m128 __r;\n" |
55341 | "\n" |
55342 | " __b = _mm_setzero_si64();\n" |
55343 | " __b = _mm_cmpgt_pi16(__b, __a);\n" |
55344 | " __c = _mm_unpackhi_pi16(__a, __b);\n" |
55345 | " __r = _mm_setzero_ps();\n" |
55346 | " __r = _mm_cvtpi32_ps(__r, __c);\n" |
55347 | " __r = _mm_movelh_ps(__r, __r);\n" |
55348 | " __c = _mm_unpacklo_pi16(__a, __b);\n" |
55349 | " __r = _mm_cvtpi32_ps(__r, __c);\n" |
55350 | "\n" |
55351 | " return __r;\n" |
55352 | "}\n" |
55353 | "\n" |
55354 | "/// Converts a 64-bit vector of 16-bit unsigned integer values into a\n" |
55355 | "/// 128-bit vector of [4 x float].\n" |
55356 | "///\n" |
55357 | "/// \\headerfile <x86intrin.h>\n" |
55358 | "///\n" |
55359 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
55360 | "///\n" |
55361 | "/// \\param __a\n" |
55362 | "/// A 64-bit vector of 16-bit unsigned integer values. The elements of the\n" |
55363 | "/// destination are copied from the corresponding elements in this operand.\n" |
55364 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n" |
55365 | "/// values from the operand.\n" |
55366 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
55367 | "_mm_cvtpu16_ps(__m64 __a)\n" |
55368 | "{\n" |
55369 | " __m64 __b, __c;\n" |
55370 | " __m128 __r;\n" |
55371 | "\n" |
55372 | " __b = _mm_setzero_si64();\n" |
55373 | " __c = _mm_unpackhi_pi16(__a, __b);\n" |
55374 | " __r = _mm_setzero_ps();\n" |
55375 | " __r = _mm_cvtpi32_ps(__r, __c);\n" |
55376 | " __r = _mm_movelh_ps(__r, __r);\n" |
55377 | " __c = _mm_unpacklo_pi16(__a, __b);\n" |
55378 | " __r = _mm_cvtpi32_ps(__r, __c);\n" |
55379 | "\n" |
55380 | " return __r;\n" |
55381 | "}\n" |
55382 | "\n" |
55383 | "/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]\n" |
55384 | "/// into a 128-bit vector of [4 x float].\n" |
55385 | "///\n" |
55386 | "/// \\headerfile <x86intrin.h>\n" |
55387 | "///\n" |
55388 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
55389 | "///\n" |
55390 | "/// \\param __a\n" |
55391 | "/// A 64-bit vector of [8 x i8]. The elements of the destination are copied\n" |
55392 | "/// from the corresponding lower 4 elements in this operand.\n" |
55393 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n" |
55394 | "/// values from the operand.\n" |
55395 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
55396 | "_mm_cvtpi8_ps(__m64 __a)\n" |
55397 | "{\n" |
55398 | " __m64 __b;\n" |
55399 | "\n" |
55400 | " __b = _mm_setzero_si64();\n" |
55401 | " __b = _mm_cmpgt_pi8(__b, __a);\n" |
55402 | " __b = _mm_unpacklo_pi8(__a, __b);\n" |
55403 | "\n" |
55404 | " return _mm_cvtpi16_ps(__b);\n" |
55405 | "}\n" |
55406 | "\n" |
55407 | "/// Converts the lower four unsigned 8-bit integer values from a 64-bit\n" |
55408 | "/// vector of [8 x u8] into a 128-bit vector of [4 x float].\n" |
55409 | "///\n" |
55410 | "/// \\headerfile <x86intrin.h>\n" |
55411 | "///\n" |
55412 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
55413 | "///\n" |
55414 | "/// \\param __a\n" |
55415 | "/// A 64-bit vector of unsigned 8-bit integer values. The elements of the\n" |
55416 | "/// destination are copied from the corresponding lower 4 elements in this\n" |
55417 | "/// operand.\n" |
55418 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n" |
55419 | "/// values from the source operand.\n" |
55420 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
55421 | "_mm_cvtpu8_ps(__m64 __a)\n" |
55422 | "{\n" |
55423 | " __m64 __b;\n" |
55424 | "\n" |
55425 | " __b = _mm_setzero_si64();\n" |
55426 | " __b = _mm_unpacklo_pi8(__a, __b);\n" |
55427 | "\n" |
55428 | " return _mm_cvtpi16_ps(__b);\n" |
55429 | "}\n" |
55430 | "\n" |
55431 | "/// Converts the two 32-bit signed integer values from each 64-bit vector\n" |
55432 | "/// operand of [2 x i32] into a 128-bit vector of [4 x float].\n" |
55433 | "///\n" |
55434 | "/// \\headerfile <x86intrin.h>\n" |
55435 | "///\n" |
55436 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
55437 | "///\n" |
55438 | "/// \\param __a\n" |
55439 | "/// A 64-bit vector of [2 x i32]. The lower elements of the destination are\n" |
55440 | "/// copied from the elements in this operand.\n" |
55441 | "/// \\param __b\n" |
55442 | "/// A 64-bit vector of [2 x i32]. The upper elements of the destination are\n" |
55443 | "/// copied from the elements in this operand.\n" |
55444 | "/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n" |
55445 | "/// copied and converted values from the first operand. The upper 64 bits\n" |
55446 | "/// contain the copied and converted values from the second operand.\n" |
55447 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
55448 | "_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)\n" |
55449 | "{\n" |
55450 | " __m128 __c;\n" |
55451 | "\n" |
55452 | " __c = _mm_setzero_ps();\n" |
55453 | " __c = _mm_cvtpi32_ps(__c, __b);\n" |
55454 | " __c = _mm_movelh_ps(__c, __c);\n" |
55455 | "\n" |
55456 | " return _mm_cvtpi32_ps(__c, __a);\n" |
55457 | "}\n" |
55458 | "\n" |
55459 | "/// Converts each single-precision floating-point element of a 128-bit\n" |
55460 | "/// floating-point vector of [4 x float] into a 16-bit signed integer, and\n" |
55461 | "/// packs the results into a 64-bit integer vector of [4 x i16].\n" |
55462 | "///\n" |
55463 | "/// If the floating-point element is NaN or infinity, or if the\n" |
55464 | "/// floating-point element is greater than 0x7FFFFFFF or less than -0x8000,\n" |
55465 | "/// it is converted to 0x8000. Otherwise if the floating-point element is\n" |
55466 | "/// greater than 0x7FFF, it is converted to 0x7FFF.\n" |
55467 | "///\n" |
55468 | "/// \\headerfile <x86intrin.h>\n" |
55469 | "///\n" |
55470 | "/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n" |
55471 | "///\n" |
55472 | "/// \\param __a\n" |
55473 | "/// A 128-bit floating-point vector of [4 x float].\n" |
55474 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n" |
55475 | "/// values.\n" |
55476 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
55477 | "_mm_cvtps_pi16(__m128 __a)\n" |
55478 | "{\n" |
55479 | " __m64 __b, __c;\n" |
55480 | "\n" |
55481 | " __b = _mm_cvtps_pi32(__a);\n" |
55482 | " __a = _mm_movehl_ps(__a, __a);\n" |
55483 | " __c = _mm_cvtps_pi32(__a);\n" |
55484 | "\n" |
55485 | " return _mm_packs_pi32(__b, __c);\n" |
55486 | "}\n" |
55487 | "\n" |
55488 | "/// Converts each single-precision floating-point element of a 128-bit\n" |
55489 | "/// floating-point vector of [4 x float] into an 8-bit signed integer, and\n" |
55490 | "/// packs the results into the lower 32 bits of a 64-bit integer vector of\n" |
55491 | "/// [8 x i8]. The upper 32 bits of the vector are set to 0.\n" |
55492 | "///\n" |
55493 | "/// If the floating-point element is NaN or infinity, or if the\n" |
55494 | "/// floating-point element is greater than 0x7FFFFFFF or less than -0x80, it\n" |
55495 | "/// is converted to 0x80. Otherwise if the floating-point element is greater\n" |
55496 | "/// than 0x7F, it is converted to 0x7F.\n" |
55497 | "///\n" |
55498 | "/// \\headerfile <x86intrin.h>\n" |
55499 | "///\n" |
55500 | "/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n" |
55501 | "///\n" |
55502 | "/// \\param __a\n" |
55503 | "/// 128-bit floating-point vector of [4 x float].\n" |
55504 | "/// \\returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the\n" |
55505 | "/// converted values and the uppper 32 bits are set to zero.\n" |
55506 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
55507 | "_mm_cvtps_pi8(__m128 __a)\n" |
55508 | "{\n" |
55509 | " __m64 __b, __c;\n" |
55510 | "\n" |
55511 | " __b = _mm_cvtps_pi16(__a);\n" |
55512 | " __c = _mm_setzero_si64();\n" |
55513 | "\n" |
55514 | " return _mm_packs_pi16(__b, __c);\n" |
55515 | "}\n" |
55516 | "\n" |
55517 | "/// Extracts the sign bits from each single-precision floating-point\n" |
55518 | "/// element of a 128-bit floating-point vector of [4 x float] and returns the\n" |
55519 | "/// sign bits in bits [0:3] of the result. Bits [31:4] of the result are set\n" |
55520 | "/// to zero.\n" |
55521 | "///\n" |
55522 | "/// \\headerfile <x86intrin.h>\n" |
55523 | "///\n" |
55524 | "/// This intrinsic corresponds to the <c> VMOVMSKPS / MOVMSKPS </c> instruction.\n" |
55525 | "///\n" |
55526 | "/// \\param __a\n" |
55527 | "/// A 128-bit floating-point vector of [4 x float].\n" |
55528 | "/// \\returns A 32-bit integer value. Bits [3:0] contain the sign bits from each\n" |
55529 | "/// single-precision floating-point element of the parameter. Bits [31:4] are\n" |
55530 | "/// set to zero.\n" |
55531 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
55532 | "_mm_movemask_ps(__m128 __a)\n" |
55533 | "{\n" |
55534 | " return __builtin_ia32_movmskps((__v4sf)__a);\n" |
55535 | "}\n" |
55536 | "\n" |
55537 | "\n" |
55538 | "#define _MM_ALIGN16 __attribute__((aligned(16)))\n" |
55539 | "\n" |
55540 | "#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))\n" |
55541 | "\n" |
55542 | "#define _MM_EXCEPT_INVALID (0x0001)\n" |
55543 | "#define _MM_EXCEPT_DENORM (0x0002)\n" |
55544 | "#define _MM_EXCEPT_DIV_ZERO (0x0004)\n" |
55545 | "#define _MM_EXCEPT_OVERFLOW (0x0008)\n" |
55546 | "#define _MM_EXCEPT_UNDERFLOW (0x0010)\n" |
55547 | "#define _MM_EXCEPT_INEXACT (0x0020)\n" |
55548 | "#define _MM_EXCEPT_MASK (0x003f)\n" |
55549 | "\n" |
55550 | "#define _MM_MASK_INVALID (0x0080)\n" |
55551 | "#define _MM_MASK_DENORM (0x0100)\n" |
55552 | "#define _MM_MASK_DIV_ZERO (0x0200)\n" |
55553 | "#define _MM_MASK_OVERFLOW (0x0400)\n" |
55554 | "#define _MM_MASK_UNDERFLOW (0x0800)\n" |
55555 | "#define _MM_MASK_INEXACT (0x1000)\n" |
55556 | "#define _MM_MASK_MASK (0x1f80)\n" |
55557 | "\n" |
55558 | "#define _MM_ROUND_NEAREST (0x0000)\n" |
55559 | "#define _MM_ROUND_DOWN (0x2000)\n" |
55560 | "#define _MM_ROUND_UP (0x4000)\n" |
55561 | "#define _MM_ROUND_TOWARD_ZERO (0x6000)\n" |
55562 | "#define _MM_ROUND_MASK (0x6000)\n" |
55563 | "\n" |
55564 | "#define _MM_FLUSH_ZERO_MASK (0x8000)\n" |
55565 | "#define _MM_FLUSH_ZERO_ON (0x8000)\n" |
55566 | "#define _MM_FLUSH_ZERO_OFF (0x0000)\n" |
55567 | "\n" |
55568 | "#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)\n" |
55569 | "#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)\n" |
55570 | "#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)\n" |
55571 | "#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)\n" |
55572 | "\n" |
55573 | "#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))\n" |
55574 | "#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))\n" |
55575 | "#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))\n" |
55576 | "#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))\n" |
55577 | "\n" |
55578 | "#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \\\n" |
55579 | "do { \\\n" |
55580 | " __m128 tmp3, tmp2, tmp1, tmp0; \\\n" |
55581 | " tmp0 = _mm_unpacklo_ps((row0), (row1)); \\\n" |
55582 | " tmp2 = _mm_unpacklo_ps((row2), (row3)); \\\n" |
55583 | " tmp1 = _mm_unpackhi_ps((row0), (row1)); \\\n" |
55584 | " tmp3 = _mm_unpackhi_ps((row2), (row3)); \\\n" |
55585 | " (row0) = _mm_movelh_ps(tmp0, tmp2); \\\n" |
55586 | " (row1) = _mm_movehl_ps(tmp2, tmp0); \\\n" |
55587 | " (row2) = _mm_movelh_ps(tmp1, tmp3); \\\n" |
55588 | " (row3) = _mm_movehl_ps(tmp3, tmp1); \\\n" |
55589 | "} while (0)\n" |
55590 | "\n" |
55591 | "/* Aliases for compatibility. */\n" |
55592 | "#define _m_pextrw _mm_extract_pi16\n" |
55593 | "#define _m_pinsrw _mm_insert_pi16\n" |
55594 | "#define _m_pmaxsw _mm_max_pi16\n" |
55595 | "#define _m_pmaxub _mm_max_pu8\n" |
55596 | "#define _m_pminsw _mm_min_pi16\n" |
55597 | "#define _m_pminub _mm_min_pu8\n" |
55598 | "#define _m_pmovmskb _mm_movemask_pi8\n" |
55599 | "#define _m_pmulhuw _mm_mulhi_pu16\n" |
55600 | "#define _m_pshufw _mm_shuffle_pi16\n" |
55601 | "#define _m_maskmovq _mm_maskmove_si64\n" |
55602 | "#define _m_pavgb _mm_avg_pu8\n" |
55603 | "#define _m_pavgw _mm_avg_pu16\n" |
55604 | "#define _m_psadbw _mm_sad_pu8\n" |
55605 | "#define _m_ _mm_\n" |
55606 | "#define _m_ _mm_\n" |
55607 | "\n" |
55608 | "#undef __DEFAULT_FN_ATTRS\n" |
55609 | "#undef __DEFAULT_FN_ATTRS_MMX\n" |
55610 | "\n" |
55611 | "/* Ugly hack for backwards-compatibility (compatible with gcc) */\n" |
55612 | "#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)\n" |
55613 | "#include <emmintrin.h>\n" |
55614 | "#endif\n" |
55615 | "\n" |
55616 | "#endif /* __XMMINTRIN_H */\n" |
55617 | "" } , |
55618 | { "/builtins/xopintrin.h" , "/*===---- xopintrin.h - XOP intrinsics -------------------------------------===\n" |
55619 | " *\n" |
55620 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
55621 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
55622 | " * in the Software without restriction, including without limitation the rights\n" |
55623 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
55624 | " * copies of the Software, and to permit persons to whom the Software is\n" |
55625 | " * furnished to do so, subject to the following conditions:\n" |
55626 | " *\n" |
55627 | " * The above copyright notice and this permission notice shall be included in\n" |
55628 | " * all copies or substantial portions of the Software.\n" |
55629 | " *\n" |
55630 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
55631 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
55632 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
55633 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
55634 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
55635 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
55636 | " * THE SOFTWARE.\n" |
55637 | " *\n" |
55638 | " *===-----------------------------------------------------------------------===\n" |
55639 | " */\n" |
55640 | "\n" |
55641 | "#ifndef __X86INTRIN_H\n" |
55642 | "#error \"Never use <xopintrin.h> directly; include <x86intrin.h> instead.\"\n" |
55643 | "#endif\n" |
55644 | "\n" |
55645 | "#ifndef __XOPINTRIN_H\n" |
55646 | "#define __XOPINTRIN_H\n" |
55647 | "\n" |
55648 | "#include <fma4intrin.h>\n" |
55649 | "\n" |
55650 | "/* Define the default attributes for the functions in this file. */\n" |
55651 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(128)))\n" |
55652 | "#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(256)))\n" |
55653 | "\n" |
55654 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55655 | "_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
55656 | "{\n" |
55657 | " return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n" |
55658 | "}\n" |
55659 | "\n" |
55660 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55661 | "_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
55662 | "{\n" |
55663 | " return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n" |
55664 | "}\n" |
55665 | "\n" |
55666 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55667 | "_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
55668 | "{\n" |
55669 | " return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n" |
55670 | "}\n" |
55671 | "\n" |
55672 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55673 | "_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
55674 | "{\n" |
55675 | " return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n" |
55676 | "}\n" |
55677 | "\n" |
55678 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55679 | "_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
55680 | "{\n" |
55681 | " return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n" |
55682 | "}\n" |
55683 | "\n" |
55684 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55685 | "_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
55686 | "{\n" |
55687 | " return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n" |
55688 | "}\n" |
55689 | "\n" |
55690 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55691 | "_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
55692 | "{\n" |
55693 | " return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n" |
55694 | "}\n" |
55695 | "\n" |
55696 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55697 | "_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
55698 | "{\n" |
55699 | " return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n" |
55700 | "}\n" |
55701 | "\n" |
55702 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55703 | "_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
55704 | "{\n" |
55705 | " return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n" |
55706 | "}\n" |
55707 | "\n" |
55708 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55709 | "_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
55710 | "{\n" |
55711 | " return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n" |
55712 | "}\n" |
55713 | "\n" |
55714 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55715 | "_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
55716 | "{\n" |
55717 | " return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n" |
55718 | "}\n" |
55719 | "\n" |
55720 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55721 | "_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
55722 | "{\n" |
55723 | " return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n" |
55724 | "}\n" |
55725 | "\n" |
55726 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55727 | "_mm_haddw_epi8(__m128i __A)\n" |
55728 | "{\n" |
55729 | " return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);\n" |
55730 | "}\n" |
55731 | "\n" |
55732 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55733 | "_mm_haddd_epi8(__m128i __A)\n" |
55734 | "{\n" |
55735 | " return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);\n" |
55736 | "}\n" |
55737 | "\n" |
55738 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55739 | "_mm_haddq_epi8(__m128i __A)\n" |
55740 | "{\n" |
55741 | " return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);\n" |
55742 | "}\n" |
55743 | "\n" |
55744 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55745 | "_mm_haddd_epi16(__m128i __A)\n" |
55746 | "{\n" |
55747 | " return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);\n" |
55748 | "}\n" |
55749 | "\n" |
55750 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55751 | "_mm_haddq_epi16(__m128i __A)\n" |
55752 | "{\n" |
55753 | " return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);\n" |
55754 | "}\n" |
55755 | "\n" |
55756 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55757 | "_mm_haddq_epi32(__m128i __A)\n" |
55758 | "{\n" |
55759 | " return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);\n" |
55760 | "}\n" |
55761 | "\n" |
55762 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55763 | "_mm_haddw_epu8(__m128i __A)\n" |
55764 | "{\n" |
55765 | " return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);\n" |
55766 | "}\n" |
55767 | "\n" |
55768 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55769 | "_mm_haddd_epu8(__m128i __A)\n" |
55770 | "{\n" |
55771 | " return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);\n" |
55772 | "}\n" |
55773 | "\n" |
55774 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55775 | "_mm_haddq_epu8(__m128i __A)\n" |
55776 | "{\n" |
55777 | " return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);\n" |
55778 | "}\n" |
55779 | "\n" |
55780 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55781 | "_mm_haddd_epu16(__m128i __A)\n" |
55782 | "{\n" |
55783 | " return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);\n" |
55784 | "}\n" |
55785 | "\n" |
55786 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55787 | "_mm_haddq_epu16(__m128i __A)\n" |
55788 | "{\n" |
55789 | " return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);\n" |
55790 | "}\n" |
55791 | "\n" |
55792 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55793 | "_mm_haddq_epu32(__m128i __A)\n" |
55794 | "{\n" |
55795 | " return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);\n" |
55796 | "}\n" |
55797 | "\n" |
55798 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55799 | "_mm_hsubw_epi8(__m128i __A)\n" |
55800 | "{\n" |
55801 | " return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);\n" |
55802 | "}\n" |
55803 | "\n" |
55804 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55805 | "_mm_hsubd_epi16(__m128i __A)\n" |
55806 | "{\n" |
55807 | " return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);\n" |
55808 | "}\n" |
55809 | "\n" |
55810 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55811 | "_mm_hsubq_epi32(__m128i __A)\n" |
55812 | "{\n" |
55813 | " return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);\n" |
55814 | "}\n" |
55815 | "\n" |
55816 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55817 | "_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)\n" |
55818 | "{\n" |
55819 | " return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));\n" |
55820 | "}\n" |
55821 | "\n" |
55822 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
55823 | "_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)\n" |
55824 | "{\n" |
55825 | " return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));\n" |
55826 | "}\n" |
55827 | "\n" |
55828 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55829 | "_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)\n" |
55830 | "{\n" |
55831 | " return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);\n" |
55832 | "}\n" |
55833 | "\n" |
55834 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55835 | "_mm_rot_epi8(__m128i __A, __m128i __B)\n" |
55836 | "{\n" |
55837 | " return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);\n" |
55838 | "}\n" |
55839 | "\n" |
55840 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55841 | "_mm_rot_epi16(__m128i __A, __m128i __B)\n" |
55842 | "{\n" |
55843 | " return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);\n" |
55844 | "}\n" |
55845 | "\n" |
55846 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55847 | "_mm_rot_epi32(__m128i __A, __m128i __B)\n" |
55848 | "{\n" |
55849 | " return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);\n" |
55850 | "}\n" |
55851 | "\n" |
55852 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55853 | "_mm_rot_epi64(__m128i __A, __m128i __B)\n" |
55854 | "{\n" |
55855 | " return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);\n" |
55856 | "}\n" |
55857 | "\n" |
55858 | "#define _mm_roti_epi8(A, N) \\\n" |
55859 | " (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))\n" |
55860 | "\n" |
55861 | "#define _mm_roti_epi16(A, N) \\\n" |
55862 | " (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))\n" |
55863 | "\n" |
55864 | "#define _mm_roti_epi32(A, N) \\\n" |
55865 | " (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))\n" |
55866 | "\n" |
55867 | "#define _mm_roti_epi64(A, N) \\\n" |
55868 | " (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))\n" |
55869 | "\n" |
55870 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55871 | "_mm_shl_epi8(__m128i __A, __m128i __B)\n" |
55872 | "{\n" |
55873 | " return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);\n" |
55874 | "}\n" |
55875 | "\n" |
55876 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55877 | "_mm_shl_epi16(__m128i __A, __m128i __B)\n" |
55878 | "{\n" |
55879 | " return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);\n" |
55880 | "}\n" |
55881 | "\n" |
55882 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55883 | "_mm_shl_epi32(__m128i __A, __m128i __B)\n" |
55884 | "{\n" |
55885 | " return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);\n" |
55886 | "}\n" |
55887 | "\n" |
55888 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55889 | "_mm_shl_epi64(__m128i __A, __m128i __B)\n" |
55890 | "{\n" |
55891 | " return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);\n" |
55892 | "}\n" |
55893 | "\n" |
55894 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55895 | "_mm_sha_epi8(__m128i __A, __m128i __B)\n" |
55896 | "{\n" |
55897 | " return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);\n" |
55898 | "}\n" |
55899 | "\n" |
55900 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55901 | "_mm_sha_epi16(__m128i __A, __m128i __B)\n" |
55902 | "{\n" |
55903 | " return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);\n" |
55904 | "}\n" |
55905 | "\n" |
55906 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55907 | "_mm_sha_epi32(__m128i __A, __m128i __B)\n" |
55908 | "{\n" |
55909 | " return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);\n" |
55910 | "}\n" |
55911 | "\n" |
55912 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55913 | "_mm_sha_epi64(__m128i __A, __m128i __B)\n" |
55914 | "{\n" |
55915 | " return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);\n" |
55916 | "}\n" |
55917 | "\n" |
55918 | "#define _mm_com_epu8(A, B, N) \\\n" |
55919 | " (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \\\n" |
55920 | " (__v16qi)(__m128i)(B), (N))\n" |
55921 | "\n" |
55922 | "#define _mm_com_epu16(A, B, N) \\\n" |
55923 | " (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \\\n" |
55924 | " (__v8hi)(__m128i)(B), (N))\n" |
55925 | "\n" |
55926 | "#define _mm_com_epu32(A, B, N) \\\n" |
55927 | " (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \\\n" |
55928 | " (__v4si)(__m128i)(B), (N))\n" |
55929 | "\n" |
55930 | "#define _mm_com_epu64(A, B, N) \\\n" |
55931 | " (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \\\n" |
55932 | " (__v2di)(__m128i)(B), (N))\n" |
55933 | "\n" |
55934 | "#define _mm_com_epi8(A, B, N) \\\n" |
55935 | " (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \\\n" |
55936 | " (__v16qi)(__m128i)(B), (N))\n" |
55937 | "\n" |
55938 | "#define _mm_com_epi16(A, B, N) \\\n" |
55939 | " (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \\\n" |
55940 | " (__v8hi)(__m128i)(B), (N))\n" |
55941 | "\n" |
55942 | "#define _mm_com_epi32(A, B, N) \\\n" |
55943 | " (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \\\n" |
55944 | " (__v4si)(__m128i)(B), (N))\n" |
55945 | "\n" |
55946 | "#define _mm_com_epi64(A, B, N) \\\n" |
55947 | " (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \\\n" |
55948 | " (__v2di)(__m128i)(B), (N))\n" |
55949 | "\n" |
55950 | "#define _MM_PCOMCTRL_LT 0\n" |
55951 | "#define _MM_PCOMCTRL_LE 1\n" |
55952 | "#define _MM_PCOMCTRL_GT 2\n" |
55953 | "#define _MM_PCOMCTRL_GE 3\n" |
55954 | "#define _MM_PCOMCTRL_EQ 4\n" |
55955 | "#define _MM_PCOMCTRL_NEQ 5\n" |
55956 | "#define _MM_PCOMCTRL_FALSE 6\n" |
55957 | "#define _MM_PCOMCTRL_TRUE 7\n" |
55958 | "\n" |
55959 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55960 | "_mm_comlt_epu8(__m128i __A, __m128i __B)\n" |
55961 | "{\n" |
55962 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);\n" |
55963 | "}\n" |
55964 | "\n" |
55965 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55966 | "_mm_comle_epu8(__m128i __A, __m128i __B)\n" |
55967 | "{\n" |
55968 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);\n" |
55969 | "}\n" |
55970 | "\n" |
55971 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55972 | "_mm_comgt_epu8(__m128i __A, __m128i __B)\n" |
55973 | "{\n" |
55974 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);\n" |
55975 | "}\n" |
55976 | "\n" |
55977 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55978 | "_mm_comge_epu8(__m128i __A, __m128i __B)\n" |
55979 | "{\n" |
55980 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);\n" |
55981 | "}\n" |
55982 | "\n" |
55983 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55984 | "_mm_comeq_epu8(__m128i __A, __m128i __B)\n" |
55985 | "{\n" |
55986 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);\n" |
55987 | "}\n" |
55988 | "\n" |
55989 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55990 | "_mm_comneq_epu8(__m128i __A, __m128i __B)\n" |
55991 | "{\n" |
55992 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
55993 | "}\n" |
55994 | "\n" |
55995 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
55996 | "_mm_comfalse_epu8(__m128i __A, __m128i __B)\n" |
55997 | "{\n" |
55998 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
55999 | "}\n" |
56000 | "\n" |
56001 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56002 | "_mm_comtrue_epu8(__m128i __A, __m128i __B)\n" |
56003 | "{\n" |
56004 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
56005 | "}\n" |
56006 | "\n" |
56007 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56008 | "_mm_comlt_epu16(__m128i __A, __m128i __B)\n" |
56009 | "{\n" |
56010 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);\n" |
56011 | "}\n" |
56012 | "\n" |
56013 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56014 | "_mm_comle_epu16(__m128i __A, __m128i __B)\n" |
56015 | "{\n" |
56016 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);\n" |
56017 | "}\n" |
56018 | "\n" |
56019 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56020 | "_mm_comgt_epu16(__m128i __A, __m128i __B)\n" |
56021 | "{\n" |
56022 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);\n" |
56023 | "}\n" |
56024 | "\n" |
56025 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56026 | "_mm_comge_epu16(__m128i __A, __m128i __B)\n" |
56027 | "{\n" |
56028 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);\n" |
56029 | "}\n" |
56030 | "\n" |
56031 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56032 | "_mm_comeq_epu16(__m128i __A, __m128i __B)\n" |
56033 | "{\n" |
56034 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);\n" |
56035 | "}\n" |
56036 | "\n" |
56037 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56038 | "_mm_comneq_epu16(__m128i __A, __m128i __B)\n" |
56039 | "{\n" |
56040 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
56041 | "}\n" |
56042 | "\n" |
56043 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56044 | "_mm_comfalse_epu16(__m128i __A, __m128i __B)\n" |
56045 | "{\n" |
56046 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
56047 | "}\n" |
56048 | "\n" |
56049 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56050 | "_mm_comtrue_epu16(__m128i __A, __m128i __B)\n" |
56051 | "{\n" |
56052 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
56053 | "}\n" |
56054 | "\n" |
56055 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56056 | "_mm_comlt_epu32(__m128i __A, __m128i __B)\n" |
56057 | "{\n" |
56058 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);\n" |
56059 | "}\n" |
56060 | "\n" |
56061 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56062 | "_mm_comle_epu32(__m128i __A, __m128i __B)\n" |
56063 | "{\n" |
56064 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);\n" |
56065 | "}\n" |
56066 | "\n" |
56067 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56068 | "_mm_comgt_epu32(__m128i __A, __m128i __B)\n" |
56069 | "{\n" |
56070 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);\n" |
56071 | "}\n" |
56072 | "\n" |
56073 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56074 | "_mm_comge_epu32(__m128i __A, __m128i __B)\n" |
56075 | "{\n" |
56076 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);\n" |
56077 | "}\n" |
56078 | "\n" |
56079 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56080 | "_mm_comeq_epu32(__m128i __A, __m128i __B)\n" |
56081 | "{\n" |
56082 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);\n" |
56083 | "}\n" |
56084 | "\n" |
56085 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56086 | "_mm_comneq_epu32(__m128i __A, __m128i __B)\n" |
56087 | "{\n" |
56088 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
56089 | "}\n" |
56090 | "\n" |
56091 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56092 | "_mm_comfalse_epu32(__m128i __A, __m128i __B)\n" |
56093 | "{\n" |
56094 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
56095 | "}\n" |
56096 | "\n" |
56097 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56098 | "_mm_comtrue_epu32(__m128i __A, __m128i __B)\n" |
56099 | "{\n" |
56100 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
56101 | "}\n" |
56102 | "\n" |
56103 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56104 | "_mm_comlt_epu64(__m128i __A, __m128i __B)\n" |
56105 | "{\n" |
56106 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);\n" |
56107 | "}\n" |
56108 | "\n" |
56109 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56110 | "_mm_comle_epu64(__m128i __A, __m128i __B)\n" |
56111 | "{\n" |
56112 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);\n" |
56113 | "}\n" |
56114 | "\n" |
56115 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56116 | "_mm_comgt_epu64(__m128i __A, __m128i __B)\n" |
56117 | "{\n" |
56118 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);\n" |
56119 | "}\n" |
56120 | "\n" |
56121 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56122 | "_mm_comge_epu64(__m128i __A, __m128i __B)\n" |
56123 | "{\n" |
56124 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);\n" |
56125 | "}\n" |
56126 | "\n" |
56127 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56128 | "_mm_comeq_epu64(__m128i __A, __m128i __B)\n" |
56129 | "{\n" |
56130 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);\n" |
56131 | "}\n" |
56132 | "\n" |
56133 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56134 | "_mm_comneq_epu64(__m128i __A, __m128i __B)\n" |
56135 | "{\n" |
56136 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
56137 | "}\n" |
56138 | "\n" |
56139 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56140 | "_mm_comfalse_epu64(__m128i __A, __m128i __B)\n" |
56141 | "{\n" |
56142 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
56143 | "}\n" |
56144 | "\n" |
56145 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56146 | "_mm_comtrue_epu64(__m128i __A, __m128i __B)\n" |
56147 | "{\n" |
56148 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
56149 | "}\n" |
56150 | "\n" |
56151 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56152 | "_mm_comlt_epi8(__m128i __A, __m128i __B)\n" |
56153 | "{\n" |
56154 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);\n" |
56155 | "}\n" |
56156 | "\n" |
56157 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56158 | "_mm_comle_epi8(__m128i __A, __m128i __B)\n" |
56159 | "{\n" |
56160 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);\n" |
56161 | "}\n" |
56162 | "\n" |
56163 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56164 | "_mm_comgt_epi8(__m128i __A, __m128i __B)\n" |
56165 | "{\n" |
56166 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);\n" |
56167 | "}\n" |
56168 | "\n" |
56169 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56170 | "_mm_comge_epi8(__m128i __A, __m128i __B)\n" |
56171 | "{\n" |
56172 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);\n" |
56173 | "}\n" |
56174 | "\n" |
56175 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56176 | "_mm_comeq_epi8(__m128i __A, __m128i __B)\n" |
56177 | "{\n" |
56178 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);\n" |
56179 | "}\n" |
56180 | "\n" |
56181 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56182 | "_mm_comneq_epi8(__m128i __A, __m128i __B)\n" |
56183 | "{\n" |
56184 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
56185 | "}\n" |
56186 | "\n" |
56187 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56188 | "_mm_comfalse_epi8(__m128i __A, __m128i __B)\n" |
56189 | "{\n" |
56190 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
56191 | "}\n" |
56192 | "\n" |
56193 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56194 | "_mm_comtrue_epi8(__m128i __A, __m128i __B)\n" |
56195 | "{\n" |
56196 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
56197 | "}\n" |
56198 | "\n" |
56199 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56200 | "_mm_comlt_epi16(__m128i __A, __m128i __B)\n" |
56201 | "{\n" |
56202 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);\n" |
56203 | "}\n" |
56204 | "\n" |
56205 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56206 | "_mm_comle_epi16(__m128i __A, __m128i __B)\n" |
56207 | "{\n" |
56208 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);\n" |
56209 | "}\n" |
56210 | "\n" |
56211 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56212 | "_mm_comgt_epi16(__m128i __A, __m128i __B)\n" |
56213 | "{\n" |
56214 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);\n" |
56215 | "}\n" |
56216 | "\n" |
56217 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56218 | "_mm_comge_epi16(__m128i __A, __m128i __B)\n" |
56219 | "{\n" |
56220 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);\n" |
56221 | "}\n" |
56222 | "\n" |
56223 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56224 | "_mm_comeq_epi16(__m128i __A, __m128i __B)\n" |
56225 | "{\n" |
56226 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);\n" |
56227 | "}\n" |
56228 | "\n" |
56229 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56230 | "_mm_comneq_epi16(__m128i __A, __m128i __B)\n" |
56231 | "{\n" |
56232 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
56233 | "}\n" |
56234 | "\n" |
56235 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56236 | "_mm_comfalse_epi16(__m128i __A, __m128i __B)\n" |
56237 | "{\n" |
56238 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
56239 | "}\n" |
56240 | "\n" |
56241 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56242 | "_mm_comtrue_epi16(__m128i __A, __m128i __B)\n" |
56243 | "{\n" |
56244 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
56245 | "}\n" |
56246 | "\n" |
56247 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56248 | "_mm_comlt_epi32(__m128i __A, __m128i __B)\n" |
56249 | "{\n" |
56250 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);\n" |
56251 | "}\n" |
56252 | "\n" |
56253 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56254 | "_mm_comle_epi32(__m128i __A, __m128i __B)\n" |
56255 | "{\n" |
56256 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);\n" |
56257 | "}\n" |
56258 | "\n" |
56259 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56260 | "_mm_comgt_epi32(__m128i __A, __m128i __B)\n" |
56261 | "{\n" |
56262 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);\n" |
56263 | "}\n" |
56264 | "\n" |
56265 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56266 | "_mm_comge_epi32(__m128i __A, __m128i __B)\n" |
56267 | "{\n" |
56268 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);\n" |
56269 | "}\n" |
56270 | "\n" |
56271 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56272 | "_mm_comeq_epi32(__m128i __A, __m128i __B)\n" |
56273 | "{\n" |
56274 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);\n" |
56275 | "}\n" |
56276 | "\n" |
56277 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56278 | "_mm_comneq_epi32(__m128i __A, __m128i __B)\n" |
56279 | "{\n" |
56280 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
56281 | "}\n" |
56282 | "\n" |
56283 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56284 | "_mm_comfalse_epi32(__m128i __A, __m128i __B)\n" |
56285 | "{\n" |
56286 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
56287 | "}\n" |
56288 | "\n" |
56289 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56290 | "_mm_comtrue_epi32(__m128i __A, __m128i __B)\n" |
56291 | "{\n" |
56292 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
56293 | "}\n" |
56294 | "\n" |
56295 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56296 | "_mm_comlt_epi64(__m128i __A, __m128i __B)\n" |
56297 | "{\n" |
56298 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);\n" |
56299 | "}\n" |
56300 | "\n" |
56301 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56302 | "_mm_comle_epi64(__m128i __A, __m128i __B)\n" |
56303 | "{\n" |
56304 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);\n" |
56305 | "}\n" |
56306 | "\n" |
56307 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56308 | "_mm_comgt_epi64(__m128i __A, __m128i __B)\n" |
56309 | "{\n" |
56310 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);\n" |
56311 | "}\n" |
56312 | "\n" |
56313 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56314 | "_mm_comge_epi64(__m128i __A, __m128i __B)\n" |
56315 | "{\n" |
56316 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);\n" |
56317 | "}\n" |
56318 | "\n" |
56319 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56320 | "_mm_comeq_epi64(__m128i __A, __m128i __B)\n" |
56321 | "{\n" |
56322 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);\n" |
56323 | "}\n" |
56324 | "\n" |
56325 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56326 | "_mm_comneq_epi64(__m128i __A, __m128i __B)\n" |
56327 | "{\n" |
56328 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
56329 | "}\n" |
56330 | "\n" |
56331 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56332 | "_mm_comfalse_epi64(__m128i __A, __m128i __B)\n" |
56333 | "{\n" |
56334 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
56335 | "}\n" |
56336 | "\n" |
56337 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
56338 | "_mm_comtrue_epi64(__m128i __A, __m128i __B)\n" |
56339 | "{\n" |
56340 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
56341 | "}\n" |
56342 | "\n" |
56343 | "#define _mm_permute2_pd(X, Y, C, I) \\\n" |
56344 | " (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \\\n" |
56345 | " (__v2df)(__m128d)(Y), \\\n" |
56346 | " (__v2di)(__m128i)(C), (I))\n" |
56347 | "\n" |
56348 | "#define _mm256_permute2_pd(X, Y, C, I) \\\n" |
56349 | " (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \\\n" |
56350 | " (__v4df)(__m256d)(Y), \\\n" |
56351 | " (__v4di)(__m256i)(C), (I))\n" |
56352 | "\n" |
56353 | "#define _mm_permute2_ps(X, Y, C, I) \\\n" |
56354 | " (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \\\n" |
56355 | " (__v4si)(__m128i)(C), (I))\n" |
56356 | "\n" |
56357 | "#define _mm256_permute2_ps(X, Y, C, I) \\\n" |
56358 | " (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \\\n" |
56359 | " (__v8sf)(__m256)(Y), \\\n" |
56360 | " (__v8si)(__m256i)(C), (I))\n" |
56361 | "\n" |
56362 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
56363 | "_mm_frcz_ss(__m128 __A)\n" |
56364 | "{\n" |
56365 | " return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);\n" |
56366 | "}\n" |
56367 | "\n" |
56368 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
56369 | "_mm_frcz_sd(__m128d __A)\n" |
56370 | "{\n" |
56371 | " return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);\n" |
56372 | "}\n" |
56373 | "\n" |
56374 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
56375 | "_mm_frcz_ps(__m128 __A)\n" |
56376 | "{\n" |
56377 | " return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);\n" |
56378 | "}\n" |
56379 | "\n" |
56380 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
56381 | "_mm_frcz_pd(__m128d __A)\n" |
56382 | "{\n" |
56383 | " return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);\n" |
56384 | "}\n" |
56385 | "\n" |
56386 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
56387 | "_mm256_frcz_ps(__m256 __A)\n" |
56388 | "{\n" |
56389 | " return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);\n" |
56390 | "}\n" |
56391 | "\n" |
56392 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
56393 | "_mm256_frcz_pd(__m256d __A)\n" |
56394 | "{\n" |
56395 | " return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);\n" |
56396 | "}\n" |
56397 | "\n" |
56398 | "#undef __DEFAULT_FN_ATTRS\n" |
56399 | "#undef __DEFAULT_FN_ATTRS256\n" |
56400 | "\n" |
56401 | "#endif /* __XOPINTRIN_H */\n" |
56402 | "" } , |
56403 | { "/builtins/xsavecintrin.h" , "/*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------===\n" |
56404 | " *\n" |
56405 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
56406 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
56407 | " * in the Software without restriction, including without limitation the rights\n" |
56408 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
56409 | " * copies of the Software, and to permit persons to whom the Software is\n" |
56410 | " * furnished to do so, subject to the following conditions:\n" |
56411 | " *\n" |
56412 | " * The above copyright notice and this permission notice shall be included in\n" |
56413 | " * all copies or substantial portions of the Software.\n" |
56414 | " *\n" |
56415 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
56416 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
56417 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
56418 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
56419 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
56420 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
56421 | " * THE SOFTWARE.\n" |
56422 | " *\n" |
56423 | " *===-----------------------------------------------------------------------===\n" |
56424 | " */\n" |
56425 | "\n" |
56426 | "#ifndef __IMMINTRIN_H\n" |
56427 | "#error \"Never use <xsavecintrin.h> directly; include <immintrin.h> instead.\"\n" |
56428 | "#endif\n" |
56429 | "\n" |
56430 | "#ifndef __XSAVECINTRIN_H\n" |
56431 | "#define __XSAVECINTRIN_H\n" |
56432 | "\n" |
56433 | "/* Define the default attributes for the functions in this file. */\n" |
56434 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsavec\")))\n" |
56435 | "\n" |
56436 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56437 | "_xsavec(void *__p, unsigned long long __m) {\n" |
56438 | " __builtin_ia32_xsavec(__p, __m);\n" |
56439 | "}\n" |
56440 | "\n" |
56441 | "#ifdef __x86_64__\n" |
56442 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56443 | "_xsavec64(void *__p, unsigned long long __m) {\n" |
56444 | " __builtin_ia32_xsavec64(__p, __m);\n" |
56445 | "}\n" |
56446 | "#endif\n" |
56447 | "\n" |
56448 | "#undef __DEFAULT_FN_ATTRS\n" |
56449 | "\n" |
56450 | "#endif\n" |
56451 | "" } , |
56452 | { "/builtins/xsaveintrin.h" , "/*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------===\n" |
56453 | " *\n" |
56454 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
56455 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
56456 | " * in the Software without restriction, including without limitation the rights\n" |
56457 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
56458 | " * copies of the Software, and to permit persons to whom the Software is\n" |
56459 | " * furnished to do so, subject to the following conditions:\n" |
56460 | " *\n" |
56461 | " * The above copyright notice and this permission notice shall be included in\n" |
56462 | " * all copies or substantial portions of the Software.\n" |
56463 | " *\n" |
56464 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
56465 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
56466 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
56467 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
56468 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
56469 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
56470 | " * THE SOFTWARE.\n" |
56471 | " *\n" |
56472 | " *===-----------------------------------------------------------------------===\n" |
56473 | " */\n" |
56474 | "\n" |
56475 | "#ifndef __IMMINTRIN_H\n" |
56476 | "#error \"Never use <xsaveintrin.h> directly; include <immintrin.h> instead.\"\n" |
56477 | "#endif\n" |
56478 | "\n" |
56479 | "#ifndef __XSAVEINTRIN_H\n" |
56480 | "#define __XSAVEINTRIN_H\n" |
56481 | "\n" |
56482 | "/* Define the default attributes for the functions in this file. */\n" |
56483 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsave\")))\n" |
56484 | "\n" |
56485 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56486 | "_xsave(void *__p, unsigned long long __m) {\n" |
56487 | " __builtin_ia32_xsave(__p, __m);\n" |
56488 | "}\n" |
56489 | "\n" |
56490 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56491 | "_xrstor(void *__p, unsigned long long __m) {\n" |
56492 | " __builtin_ia32_xrstor(__p, __m);\n" |
56493 | "}\n" |
56494 | "\n" |
56495 | "#ifdef __x86_64__\n" |
56496 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56497 | "_xsave64(void *__p, unsigned long long __m) {\n" |
56498 | " __builtin_ia32_xsave64(__p, __m);\n" |
56499 | "}\n" |
56500 | "\n" |
56501 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56502 | "_xrstor64(void *__p, unsigned long long __m) {\n" |
56503 | " __builtin_ia32_xrstor64(__p, __m);\n" |
56504 | "}\n" |
56505 | "#endif\n" |
56506 | "\n" |
56507 | "#undef __DEFAULT_FN_ATTRS\n" |
56508 | "\n" |
56509 | "#endif\n" |
56510 | "" } , |
56511 | { "/builtins/xsaveoptintrin.h" , "/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------===\n" |
56512 | " *\n" |
56513 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
56514 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
56515 | " * in the Software without restriction, including without limitation the rights\n" |
56516 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
56517 | " * copies of the Software, and to permit persons to whom the Software is\n" |
56518 | " * furnished to do so, subject to the following conditions:\n" |
56519 | " *\n" |
56520 | " * The above copyright notice and this permission notice shall be included in\n" |
56521 | " * all copies or substantial portions of the Software.\n" |
56522 | " *\n" |
56523 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
56524 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
56525 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
56526 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
56527 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
56528 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
56529 | " * THE SOFTWARE.\n" |
56530 | " *\n" |
56531 | " *===-----------------------------------------------------------------------===\n" |
56532 | " */\n" |
56533 | "\n" |
56534 | "#ifndef __IMMINTRIN_H\n" |
56535 | "#error \"Never use <xsaveoptintrin.h> directly; include <immintrin.h> instead.\"\n" |
56536 | "#endif\n" |
56537 | "\n" |
56538 | "#ifndef __XSAVEOPTINTRIN_H\n" |
56539 | "#define __XSAVEOPTINTRIN_H\n" |
56540 | "\n" |
56541 | "/* Define the default attributes for the functions in this file. */\n" |
56542 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaveopt\")))\n" |
56543 | "\n" |
56544 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56545 | "_xsaveopt(void *__p, unsigned long long __m) {\n" |
56546 | " __builtin_ia32_xsaveopt(__p, __m);\n" |
56547 | "}\n" |
56548 | "\n" |
56549 | "#ifdef __x86_64__\n" |
56550 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56551 | "_xsaveopt64(void *__p, unsigned long long __m) {\n" |
56552 | " __builtin_ia32_xsaveopt64(__p, __m);\n" |
56553 | "}\n" |
56554 | "#endif\n" |
56555 | "\n" |
56556 | "#undef __DEFAULT_FN_ATTRS\n" |
56557 | "\n" |
56558 | "#endif\n" |
56559 | "" } , |
56560 | { "/builtins/xsavesintrin.h" , "/*===---- xsavesintrin.h - XSAVES intrinsic --------------------------------===\n" |
56561 | " *\n" |
56562 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
56563 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
56564 | " * in the Software without restriction, including without limitation the rights\n" |
56565 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
56566 | " * copies of the Software, and to permit persons to whom the Software is\n" |
56567 | " * furnished to do so, subject to the following conditions:\n" |
56568 | " *\n" |
56569 | " * The above copyright notice and this permission notice shall be included in\n" |
56570 | " * all copies or substantial portions of the Software.\n" |
56571 | " *\n" |
56572 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
56573 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
56574 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
56575 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
56576 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
56577 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
56578 | " * THE SOFTWARE.\n" |
56579 | " *\n" |
56580 | " *===-----------------------------------------------------------------------===\n" |
56581 | " */\n" |
56582 | "\n" |
56583 | "#ifndef __IMMINTRIN_H\n" |
56584 | "#error \"Never use <xsavesintrin.h> directly; include <immintrin.h> instead.\"\n" |
56585 | "#endif\n" |
56586 | "\n" |
56587 | "#ifndef __XSAVESINTRIN_H\n" |
56588 | "#define __XSAVESINTRIN_H\n" |
56589 | "\n" |
56590 | "/* Define the default attributes for the functions in this file. */\n" |
56591 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaves\")))\n" |
56592 | "\n" |
56593 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56594 | "_xsaves(void *__p, unsigned long long __m) {\n" |
56595 | " __builtin_ia32_xsaves(__p, __m);\n" |
56596 | "}\n" |
56597 | "\n" |
56598 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56599 | "_xrstors(void *__p, unsigned long long __m) {\n" |
56600 | " __builtin_ia32_xrstors(__p, __m);\n" |
56601 | "}\n" |
56602 | "\n" |
56603 | "#ifdef __x86_64__\n" |
56604 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56605 | "_xrstors64(void *__p, unsigned long long __m) {\n" |
56606 | " __builtin_ia32_xrstors64(__p, __m);\n" |
56607 | "}\n" |
56608 | "\n" |
56609 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
56610 | "_xsaves64(void *__p, unsigned long long __m) {\n" |
56611 | " __builtin_ia32_xsaves64(__p, __m);\n" |
56612 | "}\n" |
56613 | "#endif\n" |
56614 | "\n" |
56615 | "#undef __DEFAULT_FN_ATTRS\n" |
56616 | "\n" |
56617 | "#endif\n" |
56618 | "" } , |
56619 | { "/builtins/xtestintrin.h" , "/*===---- xtestintrin.h - XTEST intrinsic ----------------------------------===\n" |
56620 | " *\n" |
56621 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
56622 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
56623 | " * in the Software without restriction, including without limitation the rights\n" |
56624 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
56625 | " * copies of the Software, and to permit persons to whom the Software is\n" |
56626 | " * furnished to do so, subject to the following conditions:\n" |
56627 | " *\n" |
56628 | " * The above copyright notice and this permission notice shall be included in\n" |
56629 | " * all copies or substantial portions of the Software.\n" |
56630 | " *\n" |
56631 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
56632 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
56633 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
56634 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
56635 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
56636 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
56637 | " * THE SOFTWARE.\n" |
56638 | " *\n" |
56639 | " *===-----------------------------------------------------------------------===\n" |
56640 | " */\n" |
56641 | "\n" |
56642 | "#ifndef __IMMINTRIN_H\n" |
56643 | "#error \"Never use <xtestintrin.h> directly; include <immintrin.h> instead.\"\n" |
56644 | "#endif\n" |
56645 | "\n" |
56646 | "#ifndef __XTESTINTRIN_H\n" |
56647 | "#define __XTESTINTRIN_H\n" |
56648 | "\n" |
56649 | "/* xtest returns non-zero if the instruction is executed within an RTM or active\n" |
56650 | " * HLE region. */\n" |
56651 | "/* FIXME: This can be an either or for RTM/HLE. Deal with this when HLE is\n" |
56652 | " * supported. */\n" |
56653 | "static __inline__ int\n" |
56654 | " __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n" |
56655 | " _xtest(void) {\n" |
56656 | " return __builtin_ia32_xtest();\n" |
56657 | "}\n" |
56658 | "\n" |
56659 | "#endif\n" |
56660 | "" } , |
56661 | |
56662 | {} |
56663 | }; |
56664 | |
56665 | |
56666 | |