1/****************************************************************************
2 * Copyright (C) 2013-2016 Woboq GmbH
3 * Olivier Goffart <contact at woboq.com>
4 * https://woboq.com/
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#pragma once
21
22#include <utility>
23#include <vector>
24#include <string>
25
26
27struct EmbeddedFile {
28 const char *filename;
29 const char *content;
30 size_t size;
31 template <int N>
32 constexpr EmbeddedFile(const char *filename, const char (&data)[N])
33 : filename(filename) , content(data), size(N-1) {}
34 constexpr EmbeddedFile () : filename(nullptr) , content(nullptr), size(0) {}
35};
36
37static constexpr EmbeddedFile EmbeddedFiles[] = {
38 { "/builtins/__clang_cuda_builtin_vars.h" , "/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===\n"
39" *\n"
40" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
41" * of this software and associated documentation files (the \"Software\"), to deal\n"
42" * in the Software without restriction, including without limitation the rights\n"
43" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
44" * copies of the Software, and to permit persons to whom the Software is\n"
45" * furnished to do so, subject to the following conditions:\n"
46" *\n"
47" * The above copyright notice and this permission notice shall be included in\n"
48" * all copies or substantial portions of the Software.\n"
49" *\n"
50" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
51" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
53" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
54" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
55" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56" * THE SOFTWARE.\n"
57" *\n"
58" *===-----------------------------------------------------------------------===\n"
59" */\n"
60"\n"
61"#ifndef __CUDA_BUILTIN_VARS_H\n"
62"#define __CUDA_BUILTIN_VARS_H\n"
63"\n"
64"// Forward declares from vector_types.h.\n"
65"struct uint3;\n"
66"struct dim3;\n"
67"\n"
68"// The file implements built-in CUDA variables using __declspec(property).\n"
69"// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx\n"
70"// All read accesses of built-in variable fields get converted into calls to a\n"
71"// getter function which in turn calls the appropriate builtin to fetch the\n"
72"// value.\n"
73"//\n"
74"// Example:\n"
75"// int x = threadIdx.x;\n"
76"// IR output:\n"
77"// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3\n"
78"// PTX output:\n"
79"// mov.u32 %r2, %tid.x;\n"
80"\n"
81"#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \\\n"
82" __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \\\n"
83" static inline __attribute__((always_inline)) \\\n"
84" __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \\\n"
85" return INTRINSIC; \\\n"
86" }\n"
87"\n"
88"#if __cplusplus >= 201103L\n"
89"#define __DELETE =delete\n"
90"#else\n"
91"#define __DELETE\n"
92"#endif\n"
93"\n"
94"// Make sure nobody can create instances of the special variable types. nvcc\n"
95"// also disallows taking address of special variables, so we disable address-of\n"
96"// operator as well.\n"
97"#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \\\n"
98" __attribute__((device)) TypeName() __DELETE; \\\n"
99" __attribute__((device)) TypeName(const TypeName &) __DELETE; \\\n"
100" __attribute__((device)) void operator=(const TypeName &) const __DELETE; \\\n"
101" __attribute__((device)) TypeName *operator&() const __DELETE\n"
102"\n"
103"struct __cuda_builtin_threadIdx_t {\n"
104" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());\n"
105" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());\n"
106" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());\n"
107" // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n"
108" // uint3). This function is defined after we pull in vector_types.h.\n"
109" __attribute__((device)) operator uint3() const;\n"
110"private:\n"
111" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);\n"
112"};\n"
113"\n"
114"struct __cuda_builtin_blockIdx_t {\n"
115" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());\n"
116" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());\n"
117" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());\n"
118" // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n"
119" // uint3). This function is defined after we pull in vector_types.h.\n"
120" __attribute__((device)) operator uint3() const;\n"
121"private:\n"
122" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);\n"
123"};\n"
124"\n"
125"struct __cuda_builtin_blockDim_t {\n"
126" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());\n"
127" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());\n"
128" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());\n"
129" // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a\n"
130" // dim3). This function is defined after we pull in vector_types.h.\n"
131" __attribute__((device)) operator dim3() const;\n"
132"private:\n"
133" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);\n"
134"};\n"
135"\n"
136"struct __cuda_builtin_gridDim_t {\n"
137" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());\n"
138" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());\n"
139" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());\n"
140" // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a\n"
141" // dim3). This function is defined after we pull in vector_types.h.\n"
142" __attribute__((device)) operator dim3() const;\n"
143"private:\n"
144" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);\n"
145"};\n"
146"\n"
147"#define __CUDA_BUILTIN_VAR \\\n"
148" extern const __attribute__((device)) __attribute__((weak))\n"
149"__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;\n"
150"__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;\n"
151"__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;\n"
152"__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;\n"
153"\n"
154"// warpSize should translate to read of %WARP_SZ but there's currently no\n"
155"// builtin to do so. According to PTX v4.2 docs 'to date, all target\n"
156"// architectures have a WARP_SZ value of 32'.\n"
157"__attribute__((device)) const int warpSize = 32;\n"
158"\n"
159"#undef __CUDA_DEVICE_BUILTIN\n"
160"#undef __CUDA_BUILTIN_VAR\n"
161"#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS\n"
162"\n"
163"#endif /* __CUDA_BUILTIN_VARS_H */\n"
164"" } ,
165 { "/builtins/__clang_cuda_cmath.h" , "/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===\n"
166" *\n"
167" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
168" * of this software and associated documentation files (the \"Software\"), to deal\n"
169" * in the Software without restriction, including without limitation the rights\n"
170" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
171" * copies of the Software, and to permit persons to whom the Software is\n"
172" * furnished to do so, subject to the following conditions:\n"
173" *\n"
174" * The above copyright notice and this permission notice shall be included in\n"
175" * all copies or substantial portions of the Software.\n"
176" *\n"
177" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
178" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
179" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
180" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
181" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
182" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
183" * THE SOFTWARE.\n"
184" *\n"
185" *===-----------------------------------------------------------------------===\n"
186" */\n"
187"#ifndef __CLANG_CUDA_CMATH_H__\n"
188"#define __CLANG_CUDA_CMATH_H__\n"
189"#ifndef __CUDA__\n"
190"#error \"This file is for CUDA compilation only.\"\n"
191"#endif\n"
192"\n"
193"#include <limits>\n"
194"\n"
195"// CUDA lets us use various std math functions on the device side. This file\n"
196"// works in concert with __clang_cuda_math_forward_declares.h to make this work.\n"
197"//\n"
198"// Specifically, the forward-declares header declares __device__ overloads for\n"
199"// these functions in the global namespace, then pulls them into namespace std\n"
200"// with 'using' statements. Then this file implements those functions, after\n"
201"// their implementations have been pulled in.\n"
202"//\n"
203"// It's important that we declare the functions in the global namespace and pull\n"
204"// them into namespace std with using statements, as opposed to simply declaring\n"
205"// these functions in namespace std, because our device functions need to\n"
206"// overload the standard library functions, which may be declared in the global\n"
207"// namespace or in std, depending on the degree of conformance of the stdlib\n"
208"// implementation. Declaring in the global namespace and pulling into namespace\n"
209"// std covers all of the known knowns.\n"
210"\n"
211"#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))\n"
212"\n"
213"__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }\n"
214"__DEVICE__ long abs(long __n) { return ::labs(__n); }\n"
215"__DEVICE__ float abs(float __x) { return ::fabsf(__x); }\n"
216"__DEVICE__ double abs(double __x) { return ::fabs(__x); }\n"
217"__DEVICE__ float acos(float __x) { return ::acosf(__x); }\n"
218"__DEVICE__ float asin(float __x) { return ::asinf(__x); }\n"
219"__DEVICE__ float atan(float __x) { return ::atanf(__x); }\n"
220"__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }\n"
221"__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }\n"
222"__DEVICE__ float cos(float __x) { return ::cosf(__x); }\n"
223"__DEVICE__ float cosh(float __x) { return ::coshf(__x); }\n"
224"__DEVICE__ float exp(float __x) { return ::expf(__x); }\n"
225"__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }\n"
226"__DEVICE__ float floor(float __x) { return ::floorf(__x); }\n"
227"__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }\n"
228"__DEVICE__ int fpclassify(float __x) {\n"
229" return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n"
230" FP_ZERO, __x);\n"
231"}\n"
232"__DEVICE__ int fpclassify(double __x) {\n"
233" return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n"
234" FP_ZERO, __x);\n"
235"}\n"
236"__DEVICE__ float frexp(float __arg, int *__exp) {\n"
237" return ::frexpf(__arg, __exp);\n"
238"}\n"
239"\n"
240"// For inscrutable reasons, the CUDA headers define these functions for us on\n"
241"// Windows.\n"
242"#ifndef _MSC_VER\n"
243"__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }\n"
244"__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }\n"
245"__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }\n"
246"// For inscrutable reasons, __finite(), the double-precision version of\n"
247"// __finitef, does not exist when compiling for MacOS. __isfinited is available\n"
248"// everywhere and is just as good.\n"
249"__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }\n"
250"__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }\n"
251"__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }\n"
252"#endif\n"
253"\n"
254"__DEVICE__ bool isgreater(float __x, float __y) {\n"
255" return __builtin_isgreater(__x, __y);\n"
256"}\n"
257"__DEVICE__ bool isgreater(double __x, double __y) {\n"
258" return __builtin_isgreater(__x, __y);\n"
259"}\n"
260"__DEVICE__ bool isgreaterequal(float __x, float __y) {\n"
261" return __builtin_isgreaterequal(__x, __y);\n"
262"}\n"
263"__DEVICE__ bool isgreaterequal(double __x, double __y) {\n"
264" return __builtin_isgreaterequal(__x, __y);\n"
265"}\n"
266"__DEVICE__ bool isless(float __x, float __y) {\n"
267" return __builtin_isless(__x, __y);\n"
268"}\n"
269"__DEVICE__ bool isless(double __x, double __y) {\n"
270" return __builtin_isless(__x, __y);\n"
271"}\n"
272"__DEVICE__ bool islessequal(float __x, float __y) {\n"
273" return __builtin_islessequal(__x, __y);\n"
274"}\n"
275"__DEVICE__ bool islessequal(double __x, double __y) {\n"
276" return __builtin_islessequal(__x, __y);\n"
277"}\n"
278"__DEVICE__ bool islessgreater(float __x, float __y) {\n"
279" return __builtin_islessgreater(__x, __y);\n"
280"}\n"
281"__DEVICE__ bool islessgreater(double __x, double __y) {\n"
282" return __builtin_islessgreater(__x, __y);\n"
283"}\n"
284"__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }\n"
285"__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }\n"
286"__DEVICE__ bool isunordered(float __x, float __y) {\n"
287" return __builtin_isunordered(__x, __y);\n"
288"}\n"
289"__DEVICE__ bool isunordered(double __x, double __y) {\n"
290" return __builtin_isunordered(__x, __y);\n"
291"}\n"
292"__DEVICE__ float ldexp(float __arg, int __exp) {\n"
293" return ::ldexpf(__arg, __exp);\n"
294"}\n"
295"__DEVICE__ float log(float __x) { return ::logf(__x); }\n"
296"__DEVICE__ float log10(float __x) { return ::log10f(__x); }\n"
297"__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }\n"
298"__DEVICE__ float pow(float __base, float __exp) {\n"
299" return ::powf(__base, __exp);\n"
300"}\n"
301"__DEVICE__ float pow(float __base, int __iexp) {\n"
302" return ::powif(__base, __iexp);\n"
303"}\n"
304"__DEVICE__ double pow(double __base, int __iexp) {\n"
305" return ::powi(__base, __iexp);\n"
306"}\n"
307"__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }\n"
308"__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }\n"
309"__DEVICE__ float sin(float __x) { return ::sinf(__x); }\n"
310"__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }\n"
311"__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }\n"
312"__DEVICE__ float tan(float __x) { return ::tanf(__x); }\n"
313"__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }\n"
314"\n"
315"// Notably missing above is nexttoward. We omit it because\n"
316"// libdevice doesn't provide an implementation, and we don't want to be in the\n"
317"// business of implementing tricky libm functions in this header.\n"
318"\n"
319"// Now we've defined everything we promised we'd define in\n"
320"// __clang_cuda_math_forward_declares.h. We need to do two additional things to\n"
321"// fix up our math functions.\n"
322"//\n"
323"// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define\n"
324"// only sin(float) and sin(double), which means that e.g. sin(0) is\n"
325"// ambiguous.\n"
326"//\n"
327"// 2) Pull the __device__ overloads of \"foobarf\" math functions into namespace\n"
328"// std. These are defined in the CUDA headers in the global namespace,\n"
329"// independent of everything else we've done here.\n"
330"\n"
331"// We can't use std::enable_if, because we want to be pre-C++11 compatible. But\n"
332"// we go ahead and unconditionally define functions that are only available when\n"
333"// compiling for C++11 to match the behavior of the CUDA headers.\n"
334"template<bool __B, class __T = void>\n"
335"struct __clang_cuda_enable_if {};\n"
336"\n"
337"template <class __T> struct __clang_cuda_enable_if<true, __T> {\n"
338" typedef __T type;\n"
339"};\n"
340"\n"
341"// Defines an overload of __fn that accepts one integral argument, calls\n"
342"// __fn((double)x), and returns __retty.\n"
343"#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \\\n"
344" template <typename __T> \\\n"
345" __DEVICE__ \\\n"
346" typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \\\n"
347" __retty>::type \\\n"
348" __fn(__T __x) { \\\n"
349" return ::__fn((double)__x); \\\n"
350" }\n"
351"\n"
352"// Defines an overload of __fn that accepts one two arithmetic arguments, calls\n"
353"// __fn((double)x, (double)y), and returns a double.\n"
354"//\n"
355"// Note this is different from OVERLOAD_1, which generates an overload that\n"
356"// accepts only *integral* arguments.\n"
357"#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \\\n"
358" template <typename __T1, typename __T2> \\\n"
359" __DEVICE__ typename __clang_cuda_enable_if< \\\n"
360" std::numeric_limits<__T1>::is_specialized && \\\n"
361" std::numeric_limits<__T2>::is_specialized, \\\n"
362" __retty>::type \\\n"
363" __fn(__T1 __x, __T2 __y) { \\\n"
364" return __fn((double)__x, (double)__y); \\\n"
365" }\n"
366"\n"
367"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)\n"
368"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)\n"
369"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)\n"
370"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)\n"
371"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)\n"
372"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);\n"
373"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)\n"
374"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)\n"
375"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)\n"
376"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);\n"
377"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)\n"
378"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)\n"
379"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)\n"
380"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)\n"
381"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)\n"
382"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)\n"
383"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)\n"
384"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)\n"
385"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);\n"
386"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)\n"
387"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);\n"
388"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);\n"
389"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);\n"
390"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)\n"
391"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);\n"
392"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)\n"
393"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)\n"
394"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);\n"
395"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);\n"
396"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);\n"
397"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);\n"
398"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);\n"
399"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);\n"
400"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);\n"
401"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)\n"
402"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);\n"
403"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)\n"
404"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)\n"
405"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)\n"
406"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)\n"
407"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)\n"
408"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)\n"
409"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)\n"
410"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)\n"
411"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)\n"
412"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)\n"
413"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);\n"
414"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);\n"
415"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);\n"
416"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);\n"
417"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);\n"
418"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);\n"
419"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)\n"
420"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)\n"
421"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)\n"
422"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)\n"
423"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)\n"
424"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)\n"
425"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)\n"
426"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);\n"
427"\n"
428"#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1\n"
429"#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2\n"
430"\n"
431"// Overloads for functions that don't match the patterns expected by\n"
432"// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.\n"
433"template <typename __T1, typename __T2, typename __T3>\n"
434"__DEVICE__ typename __clang_cuda_enable_if<\n"
435" std::numeric_limits<__T1>::is_specialized &&\n"
436" std::numeric_limits<__T2>::is_specialized &&\n"
437" std::numeric_limits<__T3>::is_specialized,\n"
438" double>::type\n"
439"fma(__T1 __x, __T2 __y, __T3 __z) {\n"
440" return std::fma((double)__x, (double)__y, (double)__z);\n"
441"}\n"
442"\n"
443"template <typename __T>\n"
444"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
445" double>::type\n"
446"frexp(__T __x, int *__exp) {\n"
447" return std::frexp((double)__x, __exp);\n"
448"}\n"
449"\n"
450"template <typename __T>\n"
451"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
452" double>::type\n"
453"ldexp(__T __x, int __exp) {\n"
454" return std::ldexp((double)__x, __exp);\n"
455"}\n"
456"\n"
457"template <typename __T1, typename __T2>\n"
458"__DEVICE__ typename __clang_cuda_enable_if<\n"
459" std::numeric_limits<__T1>::is_specialized &&\n"
460" std::numeric_limits<__T2>::is_specialized,\n"
461" double>::type\n"
462"remquo(__T1 __x, __T2 __y, int *__quo) {\n"
463" return std::remquo((double)__x, (double)__y, __quo);\n"
464"}\n"
465"\n"
466"template <typename __T>\n"
467"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
468" double>::type\n"
469"scalbln(__T __x, long __exp) {\n"
470" return std::scalbln((double)__x, __exp);\n"
471"}\n"
472"\n"
473"template <typename __T>\n"
474"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
475" double>::type\n"
476"scalbn(__T __x, int __exp) {\n"
477" return std::scalbn((double)__x, __exp);\n"
478"}\n"
479"\n"
480"// We need to define these overloads in exactly the namespace our standard\n"
481"// library uses (including the right inline namespace), otherwise they won't be\n"
482"// picked up by other functions in the standard library (e.g. functions in\n"
483"// <complex>). Thus the ugliness below.\n"
484"#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n"
485"_LIBCPP_BEGIN_NAMESPACE_STD\n"
486"#else\n"
487"namespace std {\n"
488"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
489"_GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
490"#endif\n"
491"#endif\n"
492"\n"
493"// Pull the new overloads we defined above into namespace std.\n"
494"using ::acos;\n"
495"using ::acosh;\n"
496"using ::asin;\n"
497"using ::asinh;\n"
498"using ::atan;\n"
499"using ::atan2;\n"
500"using ::atanh;\n"
501"using ::cbrt;\n"
502"using ::ceil;\n"
503"using ::copysign;\n"
504"using ::cos;\n"
505"using ::cosh;\n"
506"using ::erf;\n"
507"using ::erfc;\n"
508"using ::exp;\n"
509"using ::exp2;\n"
510"using ::expm1;\n"
511"using ::fabs;\n"
512"using ::fdim;\n"
513"using ::floor;\n"
514"using ::fma;\n"
515"using ::fmax;\n"
516"using ::fmin;\n"
517"using ::fmod;\n"
518"using ::fpclassify;\n"
519"using ::frexp;\n"
520"using ::hypot;\n"
521"using ::ilogb;\n"
522"using ::isfinite;\n"
523"using ::isgreater;\n"
524"using ::isgreaterequal;\n"
525"using ::isless;\n"
526"using ::islessequal;\n"
527"using ::islessgreater;\n"
528"using ::isnormal;\n"
529"using ::isunordered;\n"
530"using ::ldexp;\n"
531"using ::lgamma;\n"
532"using ::llrint;\n"
533"using ::llround;\n"
534"using ::log;\n"
535"using ::log10;\n"
536"using ::log1p;\n"
537"using ::log2;\n"
538"using ::logb;\n"
539"using ::lrint;\n"
540"using ::lround;\n"
541"using ::nearbyint;\n"
542"using ::nextafter;\n"
543"using ::pow;\n"
544"using ::remainder;\n"
545"using ::remquo;\n"
546"using ::rint;\n"
547"using ::round;\n"
548"using ::scalbln;\n"
549"using ::scalbn;\n"
550"using ::signbit;\n"
551"using ::sin;\n"
552"using ::sinh;\n"
553"using ::sqrt;\n"
554"using ::tan;\n"
555"using ::tanh;\n"
556"using ::tgamma;\n"
557"using ::trunc;\n"
558"\n"
559"// Well this is fun: We need to pull these symbols in for libc++, but we can't\n"
560"// pull them in with libstdc++, because its ::isinf and ::isnan are different\n"
561"// than its std::isinf and std::isnan.\n"
562"#ifndef __GLIBCXX__\n"
563"using ::isinf;\n"
564"using ::isnan;\n"
565"#endif\n"
566"\n"
567"// Finally, pull the \"foobarf\" functions that CUDA defines in its headers into\n"
568"// namespace std.\n"
569"using ::acosf;\n"
570"using ::acoshf;\n"
571"using ::asinf;\n"
572"using ::asinhf;\n"
573"using ::atan2f;\n"
574"using ::atanf;\n"
575"using ::atanhf;\n"
576"using ::cbrtf;\n"
577"using ::ceilf;\n"
578"using ::copysignf;\n"
579"using ::cosf;\n"
580"using ::coshf;\n"
581"using ::erfcf;\n"
582"using ::erff;\n"
583"using ::exp2f;\n"
584"using ::expf;\n"
585"using ::expm1f;\n"
586"using ::fabsf;\n"
587"using ::fdimf;\n"
588"using ::floorf;\n"
589"using ::fmaf;\n"
590"using ::fmaxf;\n"
591"using ::fminf;\n"
592"using ::fmodf;\n"
593"using ::frexpf;\n"
594"using ::hypotf;\n"
595"using ::ilogbf;\n"
596"using ::ldexpf;\n"
597"using ::lgammaf;\n"
598"using ::llrintf;\n"
599"using ::llroundf;\n"
600"using ::log10f;\n"
601"using ::log1pf;\n"
602"using ::log2f;\n"
603"using ::logbf;\n"
604"using ::logf;\n"
605"using ::lrintf;\n"
606"using ::lroundf;\n"
607"using ::modff;\n"
608"using ::nearbyintf;\n"
609"using ::nextafterf;\n"
610"using ::powf;\n"
611"using ::remainderf;\n"
612"using ::remquof;\n"
613"using ::rintf;\n"
614"using ::roundf;\n"
615"using ::scalblnf;\n"
616"using ::scalbnf;\n"
617"using ::sinf;\n"
618"using ::sinhf;\n"
619"using ::sqrtf;\n"
620"using ::tanf;\n"
621"using ::tanhf;\n"
622"using ::tgammaf;\n"
623"using ::truncf;\n"
624"\n"
625"#ifdef _LIBCPP_END_NAMESPACE_STD\n"
626"_LIBCPP_END_NAMESPACE_STD\n"
627"#else\n"
628"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
629"_GLIBCXX_END_NAMESPACE_VERSION\n"
630"#endif\n"
631"} // namespace std\n"
632"#endif\n"
633"\n"
634"#undef __DEVICE__\n"
635"\n"
636"#endif\n"
637"" } ,
638 { "/builtins/__clang_cuda_complex_builtins.h" , "/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===\n"
639" *\n"
640" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
641" * of this software and associated documentation files (the \"Software\"), to deal\n"
642" * in the Software without restriction, including without limitation the rights\n"
643" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
644" * copies of the Software, and to permit persons to whom the Software is\n"
645" * furnished to do so, subject to the following conditions:\n"
646" *\n"
647" * The above copyright notice and this permission notice shall be included in\n"
648" * all copies or substantial portions of the Software.\n"
649" *\n"
650" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
651" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
652" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
653" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
654" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
655" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
656" * THE SOFTWARE.\n"
657" *\n"
658" *===-----------------------------------------------------------------------===\n"
659" */\n"
660"\n"
661"#ifndef __CLANG_CUDA_COMPLEX_BUILTINS\n"
662"#define __CLANG_CUDA_COMPLEX_BUILTINS\n"
663"\n"
664"// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are\n"
665"// libgcc functions that clang assumes are available when compiling c99 complex\n"
666"// operations. (These implementations come from libc++, and have been modified\n"
667"// to work with CUDA.)\n"
668"\n"
669"extern \"C\" inline __device__ double _Complex __muldc3(double __a, double __b,\n"
670" double __c, double __d) {\n"
671" double __ac = __a * __c;\n"
672" double __bd = __b * __d;\n"
673" double __ad = __a * __d;\n"
674" double __bc = __b * __c;\n"
675" double _Complex z;\n"
676" __real__(z) = __ac - __bd;\n"
677" __imag__(z) = __ad + __bc;\n"
678" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
679" int __recalc = 0;\n"
680" if (std::isinf(__a) || std::isinf(__b)) {\n"
681" __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n"
682" __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n"
683" if (std::isnan(__c))\n"
684" __c = std::copysign(0, __c);\n"
685" if (std::isnan(__d))\n"
686" __d = std::copysign(0, __d);\n"
687" __recalc = 1;\n"
688" }\n"
689" if (std::isinf(__c) || std::isinf(__d)) {\n"
690" __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n"
691" __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n"
692" if (std::isnan(__a))\n"
693" __a = std::copysign(0, __a);\n"
694" if (std::isnan(__b))\n"
695" __b = std::copysign(0, __b);\n"
696" __recalc = 1;\n"
697" }\n"
698" if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n"
699" std::isinf(__ad) || std::isinf(__bc))) {\n"
700" if (std::isnan(__a))\n"
701" __a = std::copysign(0, __a);\n"
702" if (std::isnan(__b))\n"
703" __b = std::copysign(0, __b);\n"
704" if (std::isnan(__c))\n"
705" __c = std::copysign(0, __c);\n"
706" if (std::isnan(__d))\n"
707" __d = std::copysign(0, __d);\n"
708" __recalc = 1;\n"
709" }\n"
710" if (__recalc) {\n"
711" // Can't use std::numeric_limits<double>::infinity() -- that doesn't have\n"
712" // a device overload (and isn't constexpr before C++11, naturally).\n"
713" __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n"
714" __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n"
715" }\n"
716" }\n"
717" return z;\n"
718"}\n"
719"\n"
720"extern \"C\" inline __device__ float _Complex __mulsc3(float __a, float __b,\n"
721" float __c, float __d) {\n"
722" float __ac = __a * __c;\n"
723" float __bd = __b * __d;\n"
724" float __ad = __a * __d;\n"
725" float __bc = __b * __c;\n"
726" float _Complex z;\n"
727" __real__(z) = __ac - __bd;\n"
728" __imag__(z) = __ad + __bc;\n"
729" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
730" int __recalc = 0;\n"
731" if (std::isinf(__a) || std::isinf(__b)) {\n"
732" __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n"
733" __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n"
734" if (std::isnan(__c))\n"
735" __c = std::copysign(0, __c);\n"
736" if (std::isnan(__d))\n"
737" __d = std::copysign(0, __d);\n"
738" __recalc = 1;\n"
739" }\n"
740" if (std::isinf(__c) || std::isinf(__d)) {\n"
741" __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n"
742" __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n"
743" if (std::isnan(__a))\n"
744" __a = std::copysign(0, __a);\n"
745" if (std::isnan(__b))\n"
746" __b = std::copysign(0, __b);\n"
747" __recalc = 1;\n"
748" }\n"
749" if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n"
750" std::isinf(__ad) || std::isinf(__bc))) {\n"
751" if (std::isnan(__a))\n"
752" __a = std::copysign(0, __a);\n"
753" if (std::isnan(__b))\n"
754" __b = std::copysign(0, __b);\n"
755" if (std::isnan(__c))\n"
756" __c = std::copysign(0, __c);\n"
757" if (std::isnan(__d))\n"
758" __d = std::copysign(0, __d);\n"
759" __recalc = 1;\n"
760" }\n"
761" if (__recalc) {\n"
762" __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n"
763" __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n"
764" }\n"
765" }\n"
766" return z;\n"
767"}\n"
768"\n"
769"extern \"C\" inline __device__ double _Complex __divdc3(double __a, double __b,\n"
770" double __c, double __d) {\n"
771" int __ilogbw = 0;\n"
772" // Can't use std::max, because that's defined in <algorithm>, and we don't\n"
773" // want to pull that in for every compile. The CUDA headers define\n"
774" // ::max(float, float) and ::max(double, double), which is sufficient for us.\n"
775" double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n"
776" if (std::isfinite(__logbw)) {\n"
777" __ilogbw = (int)__logbw;\n"
778" __c = std::scalbn(__c, -__ilogbw);\n"
779" __d = std::scalbn(__d, -__ilogbw);\n"
780" }\n"
781" double __denom = __c * __c + __d * __d;\n"
782" double _Complex z;\n"
783" __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n"
784" __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n"
785" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
786" if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {\n"
787" __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n"
788" __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n"
789" } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n"
790" std::isfinite(__d)) {\n"
791" __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);\n"
792" __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);\n"
793" __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n"
794" __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n"
795" } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&\n"
796" std::isfinite(__b)) {\n"
797" __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);\n"
798" __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);\n"
799" __real__(z) = 0.0 * (__a * __c + __b * __d);\n"
800" __imag__(z) = 0.0 * (__b * __c - __a * __d);\n"
801" }\n"
802" }\n"
803" return z;\n"
804"}\n"
805"\n"
806"extern \"C\" inline __device__ float _Complex __divsc3(float __a, float __b,\n"
807" float __c, float __d) {\n"
808" int __ilogbw = 0;\n"
809" float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n"
810" if (std::isfinite(__logbw)) {\n"
811" __ilogbw = (int)__logbw;\n"
812" __c = std::scalbn(__c, -__ilogbw);\n"
813" __d = std::scalbn(__d, -__ilogbw);\n"
814" }\n"
815" float __denom = __c * __c + __d * __d;\n"
816" float _Complex z;\n"
817" __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n"
818" __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n"
819" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
820" if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {\n"
821" __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n"
822" __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n"
823" } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n"
824" std::isfinite(__d)) {\n"
825" __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n"
826" __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n"
827" __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n"
828" __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n"
829" } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&\n"
830" std::isfinite(__b)) {\n"
831" __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n"
832" __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n"
833" __real__(z) = 0 * (__a * __c + __b * __d);\n"
834" __imag__(z) = 0 * (__b * __c - __a * __d);\n"
835" }\n"
836" }\n"
837" return z;\n"
838"}\n"
839"\n"
840"#endif // __CLANG_CUDA_COMPLEX_BUILTINS\n"
841"" } ,
842 { "/builtins/__clang_cuda_device_functions.h" , "/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===\n"
843" *\n"
844" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
845" * of this software and associated documentation files (the \"Software\"), to deal\n"
846" * in the Software without restriction, including without limitation the rights\n"
847" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
848" * copies of the Software, and to permit persons to whom the Software is\n"
849" * furnished to do so, subject to the following conditions:\n"
850" *\n"
851" * The above copyright notice and this permission notice shall be included in\n"
852" * all copies or substantial portions of the Software.\n"
853" *\n"
854" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
855" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
856" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
857" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
858" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
859" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
860" * THE SOFTWARE.\n"
861" *\n"
862" *===-----------------------------------------------------------------------===\n"
863" */\n"
864"\n"
865"#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n"
866"#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n"
867"\n"
868"#if CUDA_VERSION < 9000\n"
869"#error This file is intended to be used with CUDA-9+ only.\n"
870"#endif\n"
871"\n"
872"// __DEVICE__ is a helper macro with common set of attributes for the wrappers\n"
873"// we implement in this file. We need static in order to avoid emitting unused\n"
874"// functions and __forceinline__ helps inlining these wrappers at -O1.\n"
875"#pragma push_macro(\"__DEVICE__\")\n"
876"#define __DEVICE__ static __device__ __forceinline__\n"
877"\n"
878"// libdevice provides fast low precision and slow full-recision implementations\n"
879"// for some functions. Which one gets selected depends on\n"
880"// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if\n"
881"// -ffast-math or -fcuda-approx-transcendentals are in effect.\n"
882"#pragma push_macro(\"__FAST_OR_SLOW\")\n"
883"#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n"
884"#define __FAST_OR_SLOW(fast, slow) fast\n"
885"#else\n"
886"#define __FAST_OR_SLOW(fast, slow) slow\n"
887"#endif\n"
888"\n"
889"__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }\n"
890"__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }\n"
891"__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }\n"
892"__DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }\n"
893"__DEVICE__ unsigned long long __brevll(unsigned long long __a) {\n"
894" return __nv_brevll(__a);\n"
895"}\n"
896"__DEVICE__ void __brkpt() { asm volatile(\"brkpt;\"); }\n"
897"__DEVICE__ void __brkpt(int __a) { __brkpt(); }\n"
898"__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,\n"
899" unsigned int __c) {\n"
900" return __nv_byte_perm(__a, __b, __c);\n"
901"}\n"
902"__DEVICE__ int __clz(int __a) { return __nv_clz(__a); }\n"
903"__DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); }\n"
904"__DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); }\n"
905"__DEVICE__ double __dAtomicAdd(double *__p, double __v) {\n"
906" return __nvvm_atom_add_gen_d(__p, __v);\n"
907"}\n"
908"__DEVICE__ double __dAtomicAdd_block(double *__p, double __v) {\n"
909" return __nvvm_atom_cta_add_gen_d(__p, __v);\n"
910"}\n"
911"__DEVICE__ double __dAtomicAdd_system(double *__p, double __v) {\n"
912" return __nvvm_atom_sys_add_gen_d(__p, __v);\n"
913"}\n"
914"__DEVICE__ double __dadd_rd(double __a, double __b) {\n"
915" return __nv_dadd_rd(__a, __b);\n"
916"}\n"
917"__DEVICE__ double __dadd_rn(double __a, double __b) {\n"
918" return __nv_dadd_rn(__a, __b);\n"
919"}\n"
920"__DEVICE__ double __dadd_ru(double __a, double __b) {\n"
921" return __nv_dadd_ru(__a, __b);\n"
922"}\n"
923"__DEVICE__ double __dadd_rz(double __a, double __b) {\n"
924" return __nv_dadd_rz(__a, __b);\n"
925"}\n"
926"__DEVICE__ double __ddiv_rd(double __a, double __b) {\n"
927" return __nv_ddiv_rd(__a, __b);\n"
928"}\n"
929"__DEVICE__ double __ddiv_rn(double __a, double __b) {\n"
930" return __nv_ddiv_rn(__a, __b);\n"
931"}\n"
932"__DEVICE__ double __ddiv_ru(double __a, double __b) {\n"
933" return __nv_ddiv_ru(__a, __b);\n"
934"}\n"
935"__DEVICE__ double __ddiv_rz(double __a, double __b) {\n"
936" return __nv_ddiv_rz(__a, __b);\n"
937"}\n"
938"__DEVICE__ double __dmul_rd(double __a, double __b) {\n"
939" return __nv_dmul_rd(__a, __b);\n"
940"}\n"
941"__DEVICE__ double __dmul_rn(double __a, double __b) {\n"
942" return __nv_dmul_rn(__a, __b);\n"
943"}\n"
944"__DEVICE__ double __dmul_ru(double __a, double __b) {\n"
945" return __nv_dmul_ru(__a, __b);\n"
946"}\n"
947"__DEVICE__ double __dmul_rz(double __a, double __b) {\n"
948" return __nv_dmul_rz(__a, __b);\n"
949"}\n"
950"__DEVICE__ float __double2float_rd(double __a) {\n"
951" return __nv_double2float_rd(__a);\n"
952"}\n"
953"__DEVICE__ float __double2float_rn(double __a) {\n"
954" return __nv_double2float_rn(__a);\n"
955"}\n"
956"__DEVICE__ float __double2float_ru(double __a) {\n"
957" return __nv_double2float_ru(__a);\n"
958"}\n"
959"__DEVICE__ float __double2float_rz(double __a) {\n"
960" return __nv_double2float_rz(__a);\n"
961"}\n"
962"__DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); }\n"
963"__DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); }\n"
964"__DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); }\n"
965"__DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); }\n"
966"__DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); }\n"
967"__DEVICE__ long long __double2ll_rd(double __a) {\n"
968" return __nv_double2ll_rd(__a);\n"
969"}\n"
970"__DEVICE__ long long __double2ll_rn(double __a) {\n"
971" return __nv_double2ll_rn(__a);\n"
972"}\n"
973"__DEVICE__ long long __double2ll_ru(double __a) {\n"
974" return __nv_double2ll_ru(__a);\n"
975"}\n"
976"__DEVICE__ long long __double2ll_rz(double __a) {\n"
977" return __nv_double2ll_rz(__a);\n"
978"}\n"
979"__DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); }\n"
980"__DEVICE__ unsigned int __double2uint_rd(double __a) {\n"
981" return __nv_double2uint_rd(__a);\n"
982"}\n"
983"__DEVICE__ unsigned int __double2uint_rn(double __a) {\n"
984" return __nv_double2uint_rn(__a);\n"
985"}\n"
986"__DEVICE__ unsigned int __double2uint_ru(double __a) {\n"
987" return __nv_double2uint_ru(__a);\n"
988"}\n"
989"__DEVICE__ unsigned int __double2uint_rz(double __a) {\n"
990" return __nv_double2uint_rz(__a);\n"
991"}\n"
992"__DEVICE__ unsigned long long __double2ull_rd(double __a) {\n"
993" return __nv_double2ull_rd(__a);\n"
994"}\n"
995"__DEVICE__ unsigned long long __double2ull_rn(double __a) {\n"
996" return __nv_double2ull_rn(__a);\n"
997"}\n"
998"__DEVICE__ unsigned long long __double2ull_ru(double __a) {\n"
999" return __nv_double2ull_ru(__a);\n"
1000"}\n"
1001"__DEVICE__ unsigned long long __double2ull_rz(double __a) {\n"
1002" return __nv_double2ull_rz(__a);\n"
1003"}\n"
1004"__DEVICE__ long long __double_as_longlong(double __a) {\n"
1005" return __nv_double_as_longlong(__a);\n"
1006"}\n"
1007"__DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); }\n"
1008"__DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); }\n"
1009"__DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); }\n"
1010"__DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); }\n"
1011"__DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); }\n"
1012"__DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); }\n"
1013"__DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); }\n"
1014"__DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); }\n"
1015"__DEVICE__ double __dsub_rd(double __a, double __b) {\n"
1016" return __nv_dsub_rd(__a, __b);\n"
1017"}\n"
1018"__DEVICE__ double __dsub_rn(double __a, double __b) {\n"
1019" return __nv_dsub_rn(__a, __b);\n"
1020"}\n"
1021"__DEVICE__ double __dsub_ru(double __a, double __b) {\n"
1022" return __nv_dsub_ru(__a, __b);\n"
1023"}\n"
1024"__DEVICE__ double __dsub_rz(double __a, double __b) {\n"
1025" return __nv_dsub_rz(__a, __b);\n"
1026"}\n"
1027"__DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); }\n"
1028"__DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); }\n"
1029"__DEVICE__ float __fAtomicAdd(float *__p, float __v) {\n"
1030" return __nvvm_atom_add_gen_f(__p, __v);\n"
1031"}\n"
1032"__DEVICE__ float __fAtomicAdd_block(float *__p, float __v) {\n"
1033" return __nvvm_atom_cta_add_gen_f(__p, __v);\n"
1034"}\n"
1035"__DEVICE__ float __fAtomicAdd_system(float *__p, float __v) {\n"
1036" return __nvvm_atom_sys_add_gen_f(__p, __v);\n"
1037"}\n"
1038"__DEVICE__ float __fAtomicExch(float *__p, float __v) {\n"
1039" return __nv_int_as_float(\n"
1040" __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n"
1041"}\n"
1042"__DEVICE__ float __fAtomicExch_block(float *__p, float __v) {\n"
1043" return __nv_int_as_float(\n"
1044" __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n"
1045"}\n"
1046"__DEVICE__ float __fAtomicExch_system(float *__p, float __v) {\n"
1047" return __nv_int_as_float(\n"
1048" __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n"
1049"}\n"
1050"__DEVICE__ float __fadd_rd(float __a, float __b) {\n"
1051" return __nv_fadd_rd(__a, __b);\n"
1052"}\n"
1053"__DEVICE__ float __fadd_rn(float __a, float __b) {\n"
1054" return __nv_fadd_rn(__a, __b);\n"
1055"}\n"
1056"__DEVICE__ float __fadd_ru(float __a, float __b) {\n"
1057" return __nv_fadd_ru(__a, __b);\n"
1058"}\n"
1059"__DEVICE__ float __fadd_rz(float __a, float __b) {\n"
1060" return __nv_fadd_rz(__a, __b);\n"
1061"}\n"
1062"__DEVICE__ float __fdiv_rd(float __a, float __b) {\n"
1063" return __nv_fdiv_rd(__a, __b);\n"
1064"}\n"
1065"__DEVICE__ float __fdiv_rn(float __a, float __b) {\n"
1066" return __nv_fdiv_rn(__a, __b);\n"
1067"}\n"
1068"__DEVICE__ float __fdiv_ru(float __a, float __b) {\n"
1069" return __nv_fdiv_ru(__a, __b);\n"
1070"}\n"
1071"__DEVICE__ float __fdiv_rz(float __a, float __b) {\n"
1072" return __nv_fdiv_rz(__a, __b);\n"
1073"}\n"
1074"__DEVICE__ float __fdividef(float __a, float __b) {\n"
1075" return __nv_fast_fdividef(__a, __b);\n"
1076"}\n"
1077"__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }\n"
1078"__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }\n"
1079"__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }\n"
1080"__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }\n"
1081"__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }\n"
1082"__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }\n"
1083"__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }\n"
1084"__DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); }\n"
1085"__DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); }\n"
1086"__DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); }\n"
1087"__DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); }\n"
1088"__DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); }\n"
1089"__DEVICE__ unsigned int __float2uint_rd(float __a) {\n"
1090" return __nv_float2uint_rd(__a);\n"
1091"}\n"
1092"__DEVICE__ unsigned int __float2uint_rn(float __a) {\n"
1093" return __nv_float2uint_rn(__a);\n"
1094"}\n"
1095"__DEVICE__ unsigned int __float2uint_ru(float __a) {\n"
1096" return __nv_float2uint_ru(__a);\n"
1097"}\n"
1098"__DEVICE__ unsigned int __float2uint_rz(float __a) {\n"
1099" return __nv_float2uint_rz(__a);\n"
1100"}\n"
1101"__DEVICE__ unsigned long long __float2ull_rd(float __a) {\n"
1102" return __nv_float2ull_rd(__a);\n"
1103"}\n"
1104"__DEVICE__ unsigned long long __float2ull_rn(float __a) {\n"
1105" return __nv_float2ull_rn(__a);\n"
1106"}\n"
1107"__DEVICE__ unsigned long long __float2ull_ru(float __a) {\n"
1108" return __nv_float2ull_ru(__a);\n"
1109"}\n"
1110"__DEVICE__ unsigned long long __float2ull_rz(float __a) {\n"
1111" return __nv_float2ull_rz(__a);\n"
1112"}\n"
1113"__DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); }\n"
1114"__DEVICE__ unsigned int __float_as_uint(float __a) {\n"
1115" return __nv_float_as_uint(__a);\n"
1116"}\n"
1117"__DEVICE__ double __fma_rd(double __a, double __b, double __c) {\n"
1118" return __nv_fma_rd(__a, __b, __c);\n"
1119"}\n"
1120"__DEVICE__ double __fma_rn(double __a, double __b, double __c) {\n"
1121" return __nv_fma_rn(__a, __b, __c);\n"
1122"}\n"
1123"__DEVICE__ double __fma_ru(double __a, double __b, double __c) {\n"
1124" return __nv_fma_ru(__a, __b, __c);\n"
1125"}\n"
1126"__DEVICE__ double __fma_rz(double __a, double __b, double __c) {\n"
1127" return __nv_fma_rz(__a, __b, __c);\n"
1128"}\n"
1129"__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) {\n"
1130" return __nv_fmaf_ieee_rd(__a, __b, __c);\n"
1131"}\n"
1132"__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) {\n"
1133" return __nv_fmaf_ieee_rn(__a, __b, __c);\n"
1134"}\n"
1135"__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) {\n"
1136" return __nv_fmaf_ieee_ru(__a, __b, __c);\n"
1137"}\n"
1138"__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) {\n"
1139" return __nv_fmaf_ieee_rz(__a, __b, __c);\n"
1140"}\n"
1141"__DEVICE__ float __fmaf_rd(float __a, float __b, float __c) {\n"
1142" return __nv_fmaf_rd(__a, __b, __c);\n"
1143"}\n"
1144"__DEVICE__ float __fmaf_rn(float __a, float __b, float __c) {\n"
1145" return __nv_fmaf_rn(__a, __b, __c);\n"
1146"}\n"
1147"__DEVICE__ float __fmaf_ru(float __a, float __b, float __c) {\n"
1148" return __nv_fmaf_ru(__a, __b, __c);\n"
1149"}\n"
1150"__DEVICE__ float __fmaf_rz(float __a, float __b, float __c) {\n"
1151" return __nv_fmaf_rz(__a, __b, __c);\n"
1152"}\n"
1153"__DEVICE__ float __fmul_rd(float __a, float __b) {\n"
1154" return __nv_fmul_rd(__a, __b);\n"
1155"}\n"
1156"__DEVICE__ float __fmul_rn(float __a, float __b) {\n"
1157" return __nv_fmul_rn(__a, __b);\n"
1158"}\n"
1159"__DEVICE__ float __fmul_ru(float __a, float __b) {\n"
1160" return __nv_fmul_ru(__a, __b);\n"
1161"}\n"
1162"__DEVICE__ float __fmul_rz(float __a, float __b) {\n"
1163" return __nv_fmul_rz(__a, __b);\n"
1164"}\n"
1165"__DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); }\n"
1166"__DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); }\n"
1167"__DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); }\n"
1168"__DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); }\n"
1169"__DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); }\n"
1170"__DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); }\n"
1171"__DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); }\n"
1172"__DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); }\n"
1173"__DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); }\n"
1174"__DEVICE__ float __fsub_rd(float __a, float __b) {\n"
1175" return __nv_fsub_rd(__a, __b);\n"
1176"}\n"
1177"__DEVICE__ float __fsub_rn(float __a, float __b) {\n"
1178" return __nv_fsub_rn(__a, __b);\n"
1179"}\n"
1180"__DEVICE__ float __fsub_ru(float __a, float __b) {\n"
1181" return __nv_fsub_ru(__a, __b);\n"
1182"}\n"
1183"__DEVICE__ float __fsub_rz(float __a, float __b) {\n"
1184" return __nv_fsub_rz(__a, __b);\n"
1185"}\n"
1186"__DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }\n"
1187"__DEVICE__ double __hiloint2double(int __a, int __b) {\n"
1188" return __nv_hiloint2double(__a, __b);\n"
1189"}\n"
1190"__DEVICE__ int __iAtomicAdd(int *__p, int __v) {\n"
1191" return __nvvm_atom_add_gen_i(__p, __v);\n"
1192"}\n"
1193"__DEVICE__ int __iAtomicAdd_block(int *__p, int __v) {\n"
1194" __nvvm_atom_cta_add_gen_i(__p, __v);\n"
1195"}\n"
1196"__DEVICE__ int __iAtomicAdd_system(int *__p, int __v) {\n"
1197" __nvvm_atom_sys_add_gen_i(__p, __v);\n"
1198"}\n"
1199"__DEVICE__ int __iAtomicAnd(int *__p, int __v) {\n"
1200" return __nvvm_atom_and_gen_i(__p, __v);\n"
1201"}\n"
1202"__DEVICE__ int __iAtomicAnd_block(int *__p, int __v) {\n"
1203" return __nvvm_atom_cta_and_gen_i(__p, __v);\n"
1204"}\n"
1205"__DEVICE__ int __iAtomicAnd_system(int *__p, int __v) {\n"
1206" return __nvvm_atom_sys_and_gen_i(__p, __v);\n"
1207"}\n"
1208"__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) {\n"
1209" return __nvvm_atom_cas_gen_i(__p, __cmp, __v);\n"
1210"}\n"
1211"__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) {\n"
1212" return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v);\n"
1213"}\n"
1214"__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) {\n"
1215" return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v);\n"
1216"}\n"
1217"__DEVICE__ int __iAtomicExch(int *__p, int __v) {\n"
1218" return __nvvm_atom_xchg_gen_i(__p, __v);\n"
1219"}\n"
1220"__DEVICE__ int __iAtomicExch_block(int *__p, int __v) {\n"
1221" return __nvvm_atom_cta_xchg_gen_i(__p, __v);\n"
1222"}\n"
1223"__DEVICE__ int __iAtomicExch_system(int *__p, int __v) {\n"
1224" return __nvvm_atom_sys_xchg_gen_i(__p, __v);\n"
1225"}\n"
1226"__DEVICE__ int __iAtomicMax(int *__p, int __v) {\n"
1227" return __nvvm_atom_max_gen_i(__p, __v);\n"
1228"}\n"
1229"__DEVICE__ int __iAtomicMax_block(int *__p, int __v) {\n"
1230" return __nvvm_atom_cta_max_gen_i(__p, __v);\n"
1231"}\n"
1232"__DEVICE__ int __iAtomicMax_system(int *__p, int __v) {\n"
1233" return __nvvm_atom_sys_max_gen_i(__p, __v);\n"
1234"}\n"
1235"__DEVICE__ int __iAtomicMin(int *__p, int __v) {\n"
1236" return __nvvm_atom_min_gen_i(__p, __v);\n"
1237"}\n"
1238"__DEVICE__ int __iAtomicMin_block(int *__p, int __v) {\n"
1239" return __nvvm_atom_cta_min_gen_i(__p, __v);\n"
1240"}\n"
1241"__DEVICE__ int __iAtomicMin_system(int *__p, int __v) {\n"
1242" return __nvvm_atom_sys_min_gen_i(__p, __v);\n"
1243"}\n"
1244"__DEVICE__ int __iAtomicOr(int *__p, int __v) {\n"
1245" return __nvvm_atom_or_gen_i(__p, __v);\n"
1246"}\n"
1247"__DEVICE__ int __iAtomicOr_block(int *__p, int __v) {\n"
1248" return __nvvm_atom_cta_or_gen_i(__p, __v);\n"
1249"}\n"
1250"__DEVICE__ int __iAtomicOr_system(int *__p, int __v) {\n"
1251" return __nvvm_atom_sys_or_gen_i(__p, __v);\n"
1252"}\n"
1253"__DEVICE__ int __iAtomicXor(int *__p, int __v) {\n"
1254" return __nvvm_atom_xor_gen_i(__p, __v);\n"
1255"}\n"
1256"__DEVICE__ int __iAtomicXor_block(int *__p, int __v) {\n"
1257" return __nvvm_atom_cta_xor_gen_i(__p, __v);\n"
1258"}\n"
1259"__DEVICE__ int __iAtomicXor_system(int *__p, int __v) {\n"
1260" return __nvvm_atom_sys_xor_gen_i(__p, __v);\n"
1261"}\n"
1262"__DEVICE__ long long __illAtomicMax(long long *__p, long long __v) {\n"
1263" return __nvvm_atom_max_gen_ll(__p, __v);\n"
1264"}\n"
1265"__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) {\n"
1266" return __nvvm_atom_cta_max_gen_ll(__p, __v);\n"
1267"}\n"
1268"__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) {\n"
1269" return __nvvm_atom_sys_max_gen_ll(__p, __v);\n"
1270"}\n"
1271"__DEVICE__ long long __illAtomicMin(long long *__p, long long __v) {\n"
1272" return __nvvm_atom_min_gen_ll(__p, __v);\n"
1273"}\n"
1274"__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) {\n"
1275" return __nvvm_atom_cta_min_gen_ll(__p, __v);\n"
1276"}\n"
1277"__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) {\n"
1278" return __nvvm_atom_sys_min_gen_ll(__p, __v);\n"
1279"}\n"
1280"__DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); }\n"
1281"__DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); }\n"
1282"__DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }\n"
1283"__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }\n"
1284"__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }\n"
1285"__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }\n"
1286"__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }\n"
1287"__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }\n"
1288"__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }\n"
1289"__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }\n"
1290"__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }\n"
1291"__DEVICE__ double __ll2double_rd(long long __a) {\n"
1292" return __nv_ll2double_rd(__a);\n"
1293"}\n"
1294"__DEVICE__ double __ll2double_rn(long long __a) {\n"
1295" return __nv_ll2double_rn(__a);\n"
1296"}\n"
1297"__DEVICE__ double __ll2double_ru(long long __a) {\n"
1298" return __nv_ll2double_ru(__a);\n"
1299"}\n"
1300"__DEVICE__ double __ll2double_rz(long long __a) {\n"
1301" return __nv_ll2double_rz(__a);\n"
1302"}\n"
1303"__DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); }\n"
1304"__DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); }\n"
1305"__DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); }\n"
1306"__DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); }\n"
1307"__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) {\n"
1308" return __nvvm_atom_and_gen_ll(__p, __v);\n"
1309"}\n"
1310"__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) {\n"
1311" return __nvvm_atom_cta_and_gen_ll(__p, __v);\n"
1312"}\n"
1313"__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) {\n"
1314" return __nvvm_atom_sys_and_gen_ll(__p, __v);\n"
1315"}\n"
1316"__DEVICE__ long long __llAtomicOr(long long *__p, long long __v) {\n"
1317" return __nvvm_atom_or_gen_ll(__p, __v);\n"
1318"}\n"
1319"__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) {\n"
1320" return __nvvm_atom_cta_or_gen_ll(__p, __v);\n"
1321"}\n"
1322"__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) {\n"
1323" return __nvvm_atom_sys_or_gen_ll(__p, __v);\n"
1324"}\n"
1325"__DEVICE__ long long __llAtomicXor(long long *__p, long long __v) {\n"
1326" return __nvvm_atom_xor_gen_ll(__p, __v);\n"
1327"}\n"
1328"__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) {\n"
1329" return __nvvm_atom_cta_xor_gen_ll(__p, __v);\n"
1330"}\n"
1331"__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) {\n"
1332" return __nvvm_atom_sys_xor_gen_ll(__p, __v);\n"
1333"}\n"
1334"__DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); }\n"
1335"__DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); }\n"
1336"__DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); }\n"
1337"__DEVICE__ double __longlong_as_double(long long __a) {\n"
1338" return __nv_longlong_as_double(__a);\n"
1339"}\n"
1340"__DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }\n"
1341"__DEVICE__ long long __mul64hi(long long __a, long long __b) {\n"
1342" return __nv_mul64hi(__a, __b);\n"
1343"}\n"
1344"__DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }\n"
1345"__DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }\n"
1346"__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }\n"
1347"__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }\n"
1348"__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }\n"
1349"__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }\n"
1350"__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }\n"
1351"__DEVICE__ float __powf(float __a, float __b) {\n"
1352" return __nv_fast_powf(__a, __b);\n"
1353"}\n"
1354"\n"
1355"// Parameter must have a known integer value.\n"
1356"#define __prof_trigger(__a) asm __volatile__(\"pmevent \\t%0;\" ::\"i\"(__a))\n"
1357"__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }\n"
1358"__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {\n"
1359" return __nv_sad(__a, __b, __c);\n"
1360"}\n"
1361"__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }\n"
1362"__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }\n"
1363"__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }\n"
1364"__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {\n"
1365" return __nv_fast_sincosf(__a, __sptr, __cptr);\n"
1366"}\n"
1367"__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }\n"
1368"__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }\n"
1369"__DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); }\n"
1370"__DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); }\n"
1371"__DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }\n"
1372"__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }\n"
1373"__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };\n"
1374"__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };\n"
1375"__DEVICE__ void __trap(void) { asm volatile(\"trap;\"); }\n"
1376"__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {\n"
1377" return __nvvm_atom_add_gen_i((int *)__p, __v);\n"
1378"}\n"
1379"__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p,\n"
1380" unsigned int __v) {\n"
1381" return __nvvm_atom_cta_add_gen_i((int *)__p, __v);\n"
1382"}\n"
1383"__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p,\n"
1384" unsigned int __v) {\n"
1385" return __nvvm_atom_sys_add_gen_i((int *)__p, __v);\n"
1386"}\n"
1387"__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) {\n"
1388" return __nvvm_atom_and_gen_i((int *)__p, __v);\n"
1389"}\n"
1390"__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p,\n"
1391" unsigned int __v) {\n"
1392" return __nvvm_atom_cta_and_gen_i((int *)__p, __v);\n"
1393"}\n"
1394"__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p,\n"
1395" unsigned int __v) {\n"
1396" return __nvvm_atom_sys_and_gen_i((int *)__p, __v);\n"
1397"}\n"
1398"__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp,\n"
1399" unsigned int __v) {\n"
1400" return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v);\n"
1401"}\n"
1402"__DEVICE__ unsigned int\n"
1403"__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n"
1404" return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v);\n"
1405"}\n"
1406"__DEVICE__ unsigned int\n"
1407"__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n"
1408" return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v);\n"
1409"}\n"
1410"__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) {\n"
1411" return __nvvm_atom_dec_gen_ui(__p, __v);\n"
1412"}\n"
1413"__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p,\n"
1414" unsigned int __v) {\n"
1415" return __nvvm_atom_cta_dec_gen_ui(__p, __v);\n"
1416"}\n"
1417"__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p,\n"
1418" unsigned int __v) {\n"
1419" return __nvvm_atom_sys_dec_gen_ui(__p, __v);\n"
1420"}\n"
1421"__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) {\n"
1422" return __nvvm_atom_xchg_gen_i((int *)__p, __v);\n"
1423"}\n"
1424"__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p,\n"
1425" unsigned int __v) {\n"
1426" return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v);\n"
1427"}\n"
1428"__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p,\n"
1429" unsigned int __v) {\n"
1430" return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v);\n"
1431"}\n"
1432"__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) {\n"
1433" return __nvvm_atom_inc_gen_ui(__p, __v);\n"
1434"}\n"
1435"__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p,\n"
1436" unsigned int __v) {\n"
1437" return __nvvm_atom_cta_inc_gen_ui(__p, __v);\n"
1438"}\n"
1439"__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p,\n"
1440" unsigned int __v) {\n"
1441" return __nvvm_atom_sys_inc_gen_ui(__p, __v);\n"
1442"}\n"
1443"__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) {\n"
1444" return __nvvm_atom_max_gen_ui(__p, __v);\n"
1445"}\n"
1446"__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p,\n"
1447" unsigned int __v) {\n"
1448" return __nvvm_atom_cta_max_gen_ui(__p, __v);\n"
1449"}\n"
1450"__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p,\n"
1451" unsigned int __v) {\n"
1452" return __nvvm_atom_sys_max_gen_ui(__p, __v);\n"
1453"}\n"
1454"__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) {\n"
1455" return __nvvm_atom_min_gen_ui(__p, __v);\n"
1456"}\n"
1457"__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p,\n"
1458" unsigned int __v) {\n"
1459" return __nvvm_atom_cta_min_gen_ui(__p, __v);\n"
1460"}\n"
1461"__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p,\n"
1462" unsigned int __v) {\n"
1463" return __nvvm_atom_sys_min_gen_ui(__p, __v);\n"
1464"}\n"
1465"__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) {\n"
1466" return __nvvm_atom_or_gen_i((int *)__p, __v);\n"
1467"}\n"
1468"__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) {\n"
1469" return __nvvm_atom_cta_or_gen_i((int *)__p, __v);\n"
1470"}\n"
1471"__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p,\n"
1472" unsigned int __v) {\n"
1473" return __nvvm_atom_sys_or_gen_i((int *)__p, __v);\n"
1474"}\n"
1475"__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) {\n"
1476" return __nvvm_atom_xor_gen_i((int *)__p, __v);\n"
1477"}\n"
1478"__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p,\n"
1479" unsigned int __v) {\n"
1480" return __nvvm_atom_cta_xor_gen_i((int *)__p, __v);\n"
1481"}\n"
1482"__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p,\n"
1483" unsigned int __v) {\n"
1484" return __nvvm_atom_sys_xor_gen_i((int *)__p, __v);\n"
1485"}\n"
1486"__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) {\n"
1487" return __nv_uhadd(__a, __b);\n"
1488"}\n"
1489"__DEVICE__ double __uint2double_rn(unsigned int __a) {\n"
1490" return __nv_uint2double_rn(__a);\n"
1491"}\n"
1492"__DEVICE__ float __uint2float_rd(unsigned int __a) {\n"
1493" return __nv_uint2float_rd(__a);\n"
1494"}\n"
1495"__DEVICE__ float __uint2float_rn(unsigned int __a) {\n"
1496" return __nv_uint2float_rn(__a);\n"
1497"}\n"
1498"__DEVICE__ float __uint2float_ru(unsigned int __a) {\n"
1499" return __nv_uint2float_ru(__a);\n"
1500"}\n"
1501"__DEVICE__ float __uint2float_rz(unsigned int __a) {\n"
1502" return __nv_uint2float_rz(__a);\n"
1503"}\n"
1504"__DEVICE__ float __uint_as_float(unsigned int __a) {\n"
1505" return __nv_uint_as_float(__a);\n"
1506"} //\n"
1507"__DEVICE__ double __ull2double_rd(unsigned long long __a) {\n"
1508" return __nv_ull2double_rd(__a);\n"
1509"}\n"
1510"__DEVICE__ double __ull2double_rn(unsigned long long __a) {\n"
1511" return __nv_ull2double_rn(__a);\n"
1512"}\n"
1513"__DEVICE__ double __ull2double_ru(unsigned long long __a) {\n"
1514" return __nv_ull2double_ru(__a);\n"
1515"}\n"
1516"__DEVICE__ double __ull2double_rz(unsigned long long __a) {\n"
1517" return __nv_ull2double_rz(__a);\n"
1518"}\n"
1519"__DEVICE__ float __ull2float_rd(unsigned long long __a) {\n"
1520" return __nv_ull2float_rd(__a);\n"
1521"}\n"
1522"__DEVICE__ float __ull2float_rn(unsigned long long __a) {\n"
1523" return __nv_ull2float_rn(__a);\n"
1524"}\n"
1525"__DEVICE__ float __ull2float_ru(unsigned long long __a) {\n"
1526" return __nv_ull2float_ru(__a);\n"
1527"}\n"
1528"__DEVICE__ float __ull2float_rz(unsigned long long __a) {\n"
1529" return __nv_ull2float_rz(__a);\n"
1530"}\n"
1531"__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p,\n"
1532" unsigned long long __v) {\n"
1533" return __nvvm_atom_add_gen_ll((long long *)__p, __v);\n"
1534"}\n"
1535"__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p,\n"
1536" unsigned long long __v) {\n"
1537" return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v);\n"
1538"}\n"
1539"__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p,\n"
1540" unsigned long long __v) {\n"
1541" return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v);\n"
1542"}\n"
1543"__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p,\n"
1544" unsigned long long __v) {\n"
1545" return __nvvm_atom_and_gen_ll((long long *)__p, __v);\n"
1546"}\n"
1547"__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p,\n"
1548" unsigned long long __v) {\n"
1549" return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v);\n"
1550"}\n"
1551"__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p,\n"
1552" unsigned long long __v) {\n"
1553" return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v);\n"
1554"}\n"
1555"__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p,\n"
1556" unsigned long long __cmp,\n"
1557" unsigned long long __v) {\n"
1558" return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v);\n"
1559"}\n"
1560"__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p,\n"
1561" unsigned long long __cmp,\n"
1562" unsigned long long __v) {\n"
1563" return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v);\n"
1564"}\n"
1565"__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p,\n"
1566" unsigned long long __cmp,\n"
1567" unsigned long long __v) {\n"
1568" return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v);\n"
1569"}\n"
1570"__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p,\n"
1571" unsigned long long __v) {\n"
1572" return __nvvm_atom_xchg_gen_ll((long long *)__p, __v);\n"
1573"}\n"
1574"__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p,\n"
1575" unsigned long long __v) {\n"
1576" return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v);\n"
1577"}\n"
1578"__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p,\n"
1579" unsigned long long __v) {\n"
1580" return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v);\n"
1581"}\n"
1582"__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p,\n"
1583" unsigned long long __v) {\n"
1584" return __nvvm_atom_max_gen_ull(__p, __v);\n"
1585"}\n"
1586"__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p,\n"
1587" unsigned long long __v) {\n"
1588" return __nvvm_atom_cta_max_gen_ull(__p, __v);\n"
1589"}\n"
1590"__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p,\n"
1591" unsigned long long __v) {\n"
1592" return __nvvm_atom_sys_max_gen_ull(__p, __v);\n"
1593"}\n"
1594"__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p,\n"
1595" unsigned long long __v) {\n"
1596" return __nvvm_atom_min_gen_ull(__p, __v);\n"
1597"}\n"
1598"__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p,\n"
1599" unsigned long long __v) {\n"
1600" return __nvvm_atom_cta_min_gen_ull(__p, __v);\n"
1601"}\n"
1602"__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p,\n"
1603" unsigned long long __v) {\n"
1604" return __nvvm_atom_sys_min_gen_ull(__p, __v);\n"
1605"}\n"
1606"__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p,\n"
1607" unsigned long long __v) {\n"
1608" return __nvvm_atom_or_gen_ll((long long *)__p, __v);\n"
1609"}\n"
1610"__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p,\n"
1611" unsigned long long __v) {\n"
1612" return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v);\n"
1613"}\n"
1614"__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p,\n"
1615" unsigned long long __v) {\n"
1616" return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v);\n"
1617"}\n"
1618"__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p,\n"
1619" unsigned long long __v) {\n"
1620" return __nvvm_atom_xor_gen_ll((long long *)__p, __v);\n"
1621"}\n"
1622"__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p,\n"
1623" unsigned long long __v) {\n"
1624" return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v);\n"
1625"}\n"
1626"__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p,\n"
1627" unsigned long long __v) {\n"
1628" return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v);\n"
1629"}\n"
1630"__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) {\n"
1631" return __nv_umul24(__a, __b);\n"
1632"}\n"
1633"__DEVICE__ unsigned long long __umul64hi(unsigned long long __a,\n"
1634" unsigned long long __b) {\n"
1635" return __nv_umul64hi(__a, __b);\n"
1636"}\n"
1637"__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) {\n"
1638" return __nv_umulhi(__a, __b);\n"
1639"}\n"
1640"__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) {\n"
1641" return __nv_urhadd(__a, __b);\n"
1642"}\n"
1643"__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b,\n"
1644" unsigned int __c) {\n"
1645" return __nv_usad(__a, __b, __c);\n"
1646"}\n"
1647"\n"
1648"#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n"
1649"__DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); }\n"
1650"__DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); }\n"
1651"__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n"
1652" return __nv_vabsdiffs2(__a, __b);\n"
1653"}\n"
1654"__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n"
1655" return __nv_vabsdiffs4(__a, __b);\n"
1656"}\n"
1657"__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n"
1658" return __nv_vabsdiffu2(__a, __b);\n"
1659"}\n"
1660"__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n"
1661" return __nv_vabsdiffu4(__a, __b);\n"
1662"}\n"
1663"__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n"
1664" return __nv_vabsss2(__a);\n"
1665"}\n"
1666"__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n"
1667" return __nv_vabsss4(__a);\n"
1668"}\n"
1669"__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n"
1670" return __nv_vadd2(__a, __b);\n"
1671"}\n"
1672"__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n"
1673" return __nv_vadd4(__a, __b);\n"
1674"}\n"
1675"__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n"
1676" return __nv_vaddss2(__a, __b);\n"
1677"}\n"
1678"__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n"
1679" return __nv_vaddss4(__a, __b);\n"
1680"}\n"
1681"__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n"
1682" return __nv_vaddus2(__a, __b);\n"
1683"}\n"
1684"__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n"
1685" return __nv_vaddus4(__a, __b);\n"
1686"}\n"
1687"__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n"
1688" return __nv_vavgs2(__a, __b);\n"
1689"}\n"
1690"__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n"
1691" return __nv_vavgs4(__a, __b);\n"
1692"}\n"
1693"__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n"
1694" return __nv_vavgu2(__a, __b);\n"
1695"}\n"
1696"__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n"
1697" return __nv_vavgu4(__a, __b);\n"
1698"}\n"
1699"__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n"
1700" return __nv_vcmpeq2(__a, __b);\n"
1701"}\n"
1702"__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n"
1703" return __nv_vcmpeq4(__a, __b);\n"
1704"}\n"
1705"__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n"
1706" return __nv_vcmpges2(__a, __b);\n"
1707"}\n"
1708"__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n"
1709" return __nv_vcmpges4(__a, __b);\n"
1710"}\n"
1711"__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n"
1712" return __nv_vcmpgeu2(__a, __b);\n"
1713"}\n"
1714"__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n"
1715" return __nv_vcmpgeu4(__a, __b);\n"
1716"}\n"
1717"__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n"
1718" return __nv_vcmpgts2(__a, __b);\n"
1719"}\n"
1720"__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n"
1721" return __nv_vcmpgts4(__a, __b);\n"
1722"}\n"
1723"__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n"
1724" return __nv_vcmpgtu2(__a, __b);\n"
1725"}\n"
1726"__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n"
1727" return __nv_vcmpgtu4(__a, __b);\n"
1728"}\n"
1729"__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n"
1730" return __nv_vcmples2(__a, __b);\n"
1731"}\n"
1732"__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n"
1733" return __nv_vcmples4(__a, __b);\n"
1734"}\n"
1735"__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n"
1736" return __nv_vcmpleu2(__a, __b);\n"
1737"}\n"
1738"__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n"
1739" return __nv_vcmpleu4(__a, __b);\n"
1740"}\n"
1741"__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n"
1742" return __nv_vcmplts2(__a, __b);\n"
1743"}\n"
1744"__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n"
1745" return __nv_vcmplts4(__a, __b);\n"
1746"}\n"
1747"__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n"
1748" return __nv_vcmpltu2(__a, __b);\n"
1749"}\n"
1750"__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n"
1751" return __nv_vcmpltu4(__a, __b);\n"
1752"}\n"
1753"__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n"
1754" return __nv_vcmpne2(__a, __b);\n"
1755"}\n"
1756"__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n"
1757" return __nv_vcmpne4(__a, __b);\n"
1758"}\n"
1759"__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n"
1760" return __nv_vhaddu2(__a, __b);\n"
1761"}\n"
1762"__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n"
1763" return __nv_vhaddu4(__a, __b);\n"
1764"}\n"
1765"__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n"
1766" return __nv_vmaxs2(__a, __b);\n"
1767"}\n"
1768"__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n"
1769" return __nv_vmaxs4(__a, __b);\n"
1770"}\n"
1771"__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n"
1772" return __nv_vmaxu2(__a, __b);\n"
1773"}\n"
1774"__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n"
1775" return __nv_vmaxu4(__a, __b);\n"
1776"}\n"
1777"__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n"
1778" return __nv_vmins2(__a, __b);\n"
1779"}\n"
1780"__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n"
1781" return __nv_vmins4(__a, __b);\n"
1782"}\n"
1783"__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n"
1784" return __nv_vminu2(__a, __b);\n"
1785"}\n"
1786"__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n"
1787" return __nv_vminu4(__a, __b);\n"
1788"}\n"
1789"__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); }\n"
1790"__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); }\n"
1791"__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n"
1792" return __nv_vnegss2(__a);\n"
1793"}\n"
1794"__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n"
1795" return __nv_vnegss4(__a);\n"
1796"}\n"
1797"__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n"
1798" return __nv_vsads2(__a, __b);\n"
1799"}\n"
1800"__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n"
1801" return __nv_vsads4(__a, __b);\n"
1802"}\n"
1803"__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n"
1804" return __nv_vsadu2(__a, __b);\n"
1805"}\n"
1806"__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n"
1807" return __nv_vsadu4(__a, __b);\n"
1808"}\n"
1809"__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n"
1810" return __nv_vseteq2(__a, __b);\n"
1811"}\n"
1812"__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n"
1813" return __nv_vseteq4(__a, __b);\n"
1814"}\n"
1815"__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n"
1816" return __nv_vsetges2(__a, __b);\n"
1817"}\n"
1818"__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n"
1819" return __nv_vsetges4(__a, __b);\n"
1820"}\n"
1821"__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n"
1822" return __nv_vsetgeu2(__a, __b);\n"
1823"}\n"
1824"__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n"
1825" return __nv_vsetgeu4(__a, __b);\n"
1826"}\n"
1827"__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n"
1828" return __nv_vsetgts2(__a, __b);\n"
1829"}\n"
1830"__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n"
1831" return __nv_vsetgts4(__a, __b);\n"
1832"}\n"
1833"__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n"
1834" return __nv_vsetgtu2(__a, __b);\n"
1835"}\n"
1836"__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n"
1837" return __nv_vsetgtu4(__a, __b);\n"
1838"}\n"
1839"__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n"
1840" return __nv_vsetles2(__a, __b);\n"
1841"}\n"
1842"__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n"
1843" return __nv_vsetles4(__a, __b);\n"
1844"}\n"
1845"__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n"
1846" return __nv_vsetleu2(__a, __b);\n"
1847"}\n"
1848"__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n"
1849" return __nv_vsetleu4(__a, __b);\n"
1850"}\n"
1851"__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n"
1852" return __nv_vsetlts2(__a, __b);\n"
1853"}\n"
1854"__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n"
1855" return __nv_vsetlts4(__a, __b);\n"
1856"}\n"
1857"__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n"
1858" return __nv_vsetltu2(__a, __b);\n"
1859"}\n"
1860"__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n"
1861" return __nv_vsetltu4(__a, __b);\n"
1862"}\n"
1863"__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n"
1864" return __nv_vsetne2(__a, __b);\n"
1865"}\n"
1866"__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n"
1867" return __nv_vsetne4(__a, __b);\n"
1868"}\n"
1869"__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n"
1870" return __nv_vsub2(__a, __b);\n"
1871"}\n"
1872"__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n"
1873" return __nv_vsub4(__a, __b);\n"
1874"}\n"
1875"__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n"
1876" return __nv_vsubss2(__a, __b);\n"
1877"}\n"
1878"__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n"
1879" return __nv_vsubss4(__a, __b);\n"
1880"}\n"
1881"__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n"
1882" return __nv_vsubus2(__a, __b);\n"
1883"}\n"
1884"__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n"
1885" return __nv_vsubus4(__a, __b);\n"
1886"}\n"
1887"#else // CUDA_VERSION >= 9020\n"
1888"// CUDA no longer provides inline assembly (or bitcode) implementation of these\n"
1889"// functions, so we have to reimplment them. The implementation is naive and is\n"
1890"// not optimized for performance.\n"
1891"\n"
1892"// Helper function to convert N-bit boolean subfields into all-0 or all-1.\n"
1893"// E.g. __bool2mask(0x01000100,8) -> 0xff00ff00\n"
1894"// __bool2mask(0x00010000,16) -> 0xffff0000\n"
1895"__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {\n"
1896" return (__a << shift) - __a;\n"
1897"}\n"
1898"__DEVICE__ unsigned int __vabs2(unsigned int __a) {\n"
1899" unsigned int r;\n"
1900" asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n"
1901" : \"=r\"(r)\n"
1902" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1903" return r;\n"
1904"}\n"
1905"__DEVICE__ unsigned int __vabs4(unsigned int __a) {\n"
1906" unsigned int r;\n"
1907" asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n"
1908" : \"=r\"(r)\n"
1909" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1910" return r;\n"
1911"}\n"
1912"__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n"
1913" unsigned int r;\n"
1914" asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n"
1915" : \"=r\"(r)\n"
1916" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1917" return r;\n"
1918"}\n"
1919"\n"
1920"__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n"
1921" unsigned int r;\n"
1922" asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n"
1923" : \"=r\"(r)\n"
1924" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1925" return r;\n"
1926"}\n"
1927"__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n"
1928" unsigned int r;\n"
1929" asm(\"vabsdiff2.u32.u32.u32 %0,%1,%2,%3;\"\n"
1930" : \"=r\"(r)\n"
1931" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1932" return r;\n"
1933"}\n"
1934"__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n"
1935" unsigned int r;\n"
1936" asm(\"vabsdiff4.u32.u32.u32 %0,%1,%2,%3;\"\n"
1937" : \"=r\"(r)\n"
1938" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1939" return r;\n"
1940"}\n"
1941"__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n"
1942" unsigned int r;\n"
1943" asm(\"vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1944" : \"=r\"(r)\n"
1945" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1946" return r;\n"
1947"}\n"
1948"__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n"
1949" unsigned int r;\n"
1950" asm(\"vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1951" : \"=r\"(r)\n"
1952" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1953" return r;\n"
1954"}\n"
1955"__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n"
1956" unsigned int r;\n"
1957" asm(\"vadd2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1958" return r;\n"
1959"}\n"
1960"__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n"
1961" unsigned int r;\n"
1962" asm(\"vadd4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1963" return r;\n"
1964"}\n"
1965"__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n"
1966" unsigned int r;\n"
1967" asm(\"vadd2.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1968" : \"=r\"(r)\n"
1969" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1970" return r;\n"
1971"}\n"
1972"__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n"
1973" unsigned int r;\n"
1974" asm(\"vadd4.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1975" : \"=r\"(r)\n"
1976" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1977" return r;\n"
1978"}\n"
1979"__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n"
1980" unsigned int r;\n"
1981" asm(\"vadd2.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
1982" : \"=r\"(r)\n"
1983" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1984" return r;\n"
1985"}\n"
1986"__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n"
1987" unsigned int r;\n"
1988" asm(\"vadd4.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
1989" : \"=r\"(r)\n"
1990" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1991" return r;\n"
1992"}\n"
1993"__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n"
1994" unsigned int r;\n"
1995" asm(\"vavrg2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1996" return r;\n"
1997"}\n"
1998"__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n"
1999" unsigned int r;\n"
2000" asm(\"vavrg4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2001" return r;\n"
2002"}\n"
2003"__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n"
2004" unsigned int r;\n"
2005" asm(\"vavrg2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2006" return r;\n"
2007"}\n"
2008"__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n"
2009" unsigned int r;\n"
2010" asm(\"vavrg4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2011" return r;\n"
2012"}\n"
2013"__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n"
2014" unsigned int r;\n"
2015" asm(\"vset2.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2016" return r;\n"
2017"}\n"
2018"__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n"
2019" return __bool2mask(__vseteq2(__a, __b), 16);\n"
2020"}\n"
2021"__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n"
2022" unsigned int r;\n"
2023" asm(\"vset4.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2024" return r;\n"
2025"}\n"
2026"__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n"
2027" return __bool2mask(__vseteq4(__a, __b), 8);\n"
2028"}\n"
2029"__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n"
2030" unsigned int r;\n"
2031" asm(\"vset2.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2032" return r;\n"
2033"}\n"
2034"__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n"
2035" return __bool2mask(__vsetges2(__a, __b), 16);\n"
2036"}\n"
2037"__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n"
2038" unsigned int r;\n"
2039" asm(\"vset4.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2040" return r;\n"
2041"}\n"
2042"__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n"
2043" return __bool2mask(__vsetges4(__a, __b), 8);\n"
2044"}\n"
2045"__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n"
2046" unsigned int r;\n"
2047" asm(\"vset2.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2048" return r;\n"
2049"}\n"
2050"__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n"
2051" return __bool2mask(__vsetgeu2(__a, __b), 16);\n"
2052"}\n"
2053"__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n"
2054" unsigned int r;\n"
2055" asm(\"vset4.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2056" return r;\n"
2057"}\n"
2058"__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n"
2059" return __bool2mask(__vsetgeu4(__a, __b), 8);\n"
2060"}\n"
2061"__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n"
2062" unsigned int r;\n"
2063" asm(\"vset2.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2064" return r;\n"
2065"}\n"
2066"__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n"
2067" return __bool2mask(__vsetgts2(__a, __b), 16);\n"
2068"}\n"
2069"__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n"
2070" unsigned int r;\n"
2071" asm(\"vset4.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2072" return r;\n"
2073"}\n"
2074"__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n"
2075" return __bool2mask(__vsetgts4(__a, __b), 8);\n"
2076"}\n"
2077"__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n"
2078" unsigned int r;\n"
2079" asm(\"vset2.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2080" return r;\n"
2081"}\n"
2082"__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n"
2083" return __bool2mask(__vsetgtu2(__a, __b), 16);\n"
2084"}\n"
2085"__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n"
2086" unsigned int r;\n"
2087" asm(\"vset4.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2088" return r;\n"
2089"}\n"
2090"__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n"
2091" return __bool2mask(__vsetgtu4(__a, __b), 8);\n"
2092"}\n"
2093"__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n"
2094" unsigned int r;\n"
2095" asm(\"vset2.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2096" return r;\n"
2097"}\n"
2098"__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n"
2099" return __bool2mask(__vsetles2(__a, __b), 16);\n"
2100"}\n"
2101"__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n"
2102" unsigned int r;\n"
2103" asm(\"vset4.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2104" return r;\n"
2105"}\n"
2106"__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n"
2107" return __bool2mask(__vsetles4(__a, __b), 8);\n"
2108"}\n"
2109"__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n"
2110" unsigned int r;\n"
2111" asm(\"vset2.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2112" return r;\n"
2113"}\n"
2114"__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n"
2115" return __bool2mask(__vsetleu2(__a, __b), 16);\n"
2116"}\n"
2117"__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n"
2118" unsigned int r;\n"
2119" asm(\"vset4.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2120" return r;\n"
2121"}\n"
2122"__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n"
2123" return __bool2mask(__vsetleu4(__a, __b), 8);\n"
2124"}\n"
2125"__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n"
2126" unsigned int r;\n"
2127" asm(\"vset2.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2128" return r;\n"
2129"}\n"
2130"__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n"
2131" return __bool2mask(__vsetlts2(__a, __b), 16);\n"
2132"}\n"
2133"__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n"
2134" unsigned int r;\n"
2135" asm(\"vset4.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2136" return r;\n"
2137"}\n"
2138"__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n"
2139" return __bool2mask(__vsetlts4(__a, __b), 8);\n"
2140"}\n"
2141"__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n"
2142" unsigned int r;\n"
2143" asm(\"vset2.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2144" return r;\n"
2145"}\n"
2146"__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n"
2147" return __bool2mask(__vsetltu2(__a, __b), 16);\n"
2148"}\n"
2149"__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n"
2150" unsigned int r;\n"
2151" asm(\"vset4.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2152" return r;\n"
2153"}\n"
2154"__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n"
2155" return __bool2mask(__vsetltu4(__a, __b), 8);\n"
2156"}\n"
2157"__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n"
2158" unsigned int r;\n"
2159" asm(\"vset2.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2160" return r;\n"
2161"}\n"
2162"__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n"
2163" return __bool2mask(__vsetne2(__a, __b), 16);\n"
2164"}\n"
2165"__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n"
2166" unsigned int r;\n"
2167" asm(\"vset4.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2168" return r;\n"
2169"}\n"
2170"__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n"
2171" return __bool2mask(__vsetne4(__a, __b), 8);\n"
2172"}\n"
2173"\n"
2174"// Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086\n"
2175"// (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) =>\n"
2176"// (a + b) / 2 = ((a ^ b) >> 1) + (a & b)\n"
2177"// To operate on multiple sub-elements we need to make sure to mask out bits\n"
2178"// that crossed over into adjacent elements during the shift.\n"
2179"__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n"
2180" return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b);\n"
2181"}\n"
2182"__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n"
2183" return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b);\n"
2184"}\n"
2185"\n"
2186"__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n"
2187" unsigned int r;\n"
2188" if ((__a & 0x8000) && (__b & 0x8000)) {\n"
2189" // Work around a bug in ptxas which produces invalid result if low element\n"
2190" // is negative.\n"
2191" unsigned mask = __vcmpgts2(__a, __b);\n"
2192" r = (__a & mask) | (__b & ~mask);\n"
2193" } else {\n"
2194" asm(\"vmax2.s32.s32.s32 %0,%1,%2,%3;\"\n"
2195" : \"=r\"(r)\n"
2196" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2197" }\n"
2198" return r;\n"
2199"}\n"
2200"__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n"
2201" unsigned int r;\n"
2202" asm(\"vmax4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2203" return r;\n"
2204"}\n"
2205"__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n"
2206" unsigned int r;\n"
2207" asm(\"vmax2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2208" return r;\n"
2209"}\n"
2210"__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n"
2211" unsigned int r;\n"
2212" asm(\"vmax4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2213" return r;\n"
2214"}\n"
2215"__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n"
2216" unsigned int r;\n"
2217" asm(\"vmin2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2218" return r;\n"
2219"}\n"
2220"__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n"
2221" unsigned int r;\n"
2222" asm(\"vmin4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2223" return r;\n"
2224"}\n"
2225"__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n"
2226" unsigned int r;\n"
2227" asm(\"vmin2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2228" return r;\n"
2229"}\n"
2230"__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n"
2231" unsigned int r;\n"
2232" asm(\"vmin4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2233" return r;\n"
2234"}\n"
2235"__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n"
2236" unsigned int r;\n"
2237" asm(\"vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;\"\n"
2238" : \"=r\"(r)\n"
2239" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2240" return r;\n"
2241"}\n"
2242"__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n"
2243" unsigned int r;\n"
2244" asm(\"vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;\"\n"
2245" : \"=r\"(r)\n"
2246" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2247" return r;\n"
2248"}\n"
2249"__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n"
2250" unsigned int r;\n"
2251" asm(\"vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;\"\n"
2252" : \"=r\"(r)\n"
2253" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2254" return r;\n"
2255"}\n"
2256"__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n"
2257" unsigned int r;\n"
2258" asm(\"vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;\"\n"
2259" : \"=r\"(r)\n"
2260" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2261" return r;\n"
2262"}\n"
2263"\n"
2264"__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n"
2265" unsigned int r;\n"
2266" asm(\"vsub2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2267" return r;\n"
2268"}\n"
2269"__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }\n"
2270"\n"
2271"__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n"
2272" unsigned int r;\n"
2273" asm(\"vsub4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2274" return r;\n"
2275"}\n"
2276"__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }\n"
2277"__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n"
2278" unsigned int r;\n"
2279" asm(\"vsub2.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
2280" : \"=r\"(r)\n"
2281" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2282" return r;\n"
2283"}\n"
2284"__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n"
2285" return __vsubss2(0, __a);\n"
2286"}\n"
2287"__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n"
2288" unsigned int r;\n"
2289" asm(\"vsub4.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
2290" : \"=r\"(r)\n"
2291" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2292" return r;\n"
2293"}\n"
2294"__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n"
2295" return __vsubss4(0, __a);\n"
2296"}\n"
2297"__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n"
2298" unsigned int r;\n"
2299" asm(\"vsub2.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
2300" : \"=r\"(r)\n"
2301" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2302" return r;\n"
2303"}\n"
2304"__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n"
2305" unsigned int r;\n"
2306" asm(\"vsub4.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
2307" : \"=r\"(r)\n"
2308" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2309" return r;\n"
2310"}\n"
2311"#endif // CUDA_VERSION >= 9020\n"
2312"__DEVICE__ int abs(int __a) { return __nv_abs(__a); }\n"
2313"__DEVICE__ double acos(double __a) { return __nv_acos(__a); }\n"
2314"__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }\n"
2315"__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }\n"
2316"__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }\n"
2317"__DEVICE__ double asin(double __a) { return __nv_asin(__a); }\n"
2318"__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }\n"
2319"__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }\n"
2320"__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }\n"
2321"__DEVICE__ double atan(double __a) { return __nv_atan(__a); }\n"
2322"__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }\n"
2323"__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }\n"
2324"__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }\n"
2325"__DEVICE__ double atanh(double __a) { return __nv_atanh(__a); }\n"
2326"__DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); }\n"
2327"__DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }\n"
2328"__DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }\n"
2329"__DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }\n"
2330"__DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }\n"
2331"__DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }\n"
2332"__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }\n"
2333"__DEVICE__ double copysign(double __a, double __b) {\n"
2334" return __nv_copysign(__a, __b);\n"
2335"}\n"
2336"__DEVICE__ float copysignf(float __a, float __b) {\n"
2337" return __nv_copysignf(__a, __b);\n"
2338"}\n"
2339"__DEVICE__ double cos(double __a) { return __nv_cos(__a); }\n"
2340"__DEVICE__ float cosf(float __a) {\n"
2341" return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a);\n"
2342"}\n"
2343"__DEVICE__ double cosh(double __a) { return __nv_cosh(__a); }\n"
2344"__DEVICE__ float coshf(float __a) { return __nv_coshf(__a); }\n"
2345"__DEVICE__ double cospi(double __a) { return __nv_cospi(__a); }\n"
2346"__DEVICE__ float cospif(float __a) { return __nv_cospif(__a); }\n"
2347"__DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); }\n"
2348"__DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); }\n"
2349"__DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); }\n"
2350"__DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); }\n"
2351"__DEVICE__ double erf(double __a) { return __nv_erf(__a); }\n"
2352"__DEVICE__ double erfc(double __a) { return __nv_erfc(__a); }\n"
2353"__DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); }\n"
2354"__DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); }\n"
2355"__DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); }\n"
2356"__DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); }\n"
2357"__DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); }\n"
2358"__DEVICE__ float erff(float __a) { return __nv_erff(__a); }\n"
2359"__DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); }\n"
2360"__DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); }\n"
2361"__DEVICE__ double exp(double __a) { return __nv_exp(__a); }\n"
2362"__DEVICE__ double exp10(double __a) { return __nv_exp10(__a); }\n"
2363"__DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); }\n"
2364"__DEVICE__ double exp2(double __a) { return __nv_exp2(__a); }\n"
2365"__DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }\n"
2366"__DEVICE__ float expf(float __a) { return __nv_expf(__a); }\n"
2367"__DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }\n"
2368"__DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }\n"
2369"__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }\n"
2370"__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }\n"
2371"__DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }\n"
2372"__DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }\n"
2373"__DEVICE__ double fdivide(double __a, double __b) { return __a / __b; }\n"
2374"__DEVICE__ float fdividef(float __a, float __b) {\n"
2375"#if __FAST_MATH__ && !__CUDA_PREC_DIV\n"
2376" return __nv_fast_fdividef(__a, __b);\n"
2377"#else\n"
2378" return __a / __b;\n"
2379"#endif\n"
2380"}\n"
2381"__DEVICE__ double floor(double __f) { return __nv_floor(__f); }\n"
2382"__DEVICE__ float floorf(float __f) { return __nv_floorf(__f); }\n"
2383"__DEVICE__ double fma(double __a, double __b, double __c) {\n"
2384" return __nv_fma(__a, __b, __c);\n"
2385"}\n"
2386"__DEVICE__ float fmaf(float __a, float __b, float __c) {\n"
2387" return __nv_fmaf(__a, __b, __c);\n"
2388"}\n"
2389"__DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); }\n"
2390"__DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); }\n"
2391"__DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); }\n"
2392"__DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); }\n"
2393"__DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); }\n"
2394"__DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); }\n"
2395"__DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); }\n"
2396"__DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); }\n"
2397"__DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); }\n"
2398"__DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); }\n"
2399"__DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); }\n"
2400"__DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); }\n"
2401"__DEVICE__ double j0(double __a) { return __nv_j0(__a); }\n"
2402"__DEVICE__ float j0f(float __a) { return __nv_j0f(__a); }\n"
2403"__DEVICE__ double j1(double __a) { return __nv_j1(__a); }\n"
2404"__DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }\n"
2405"__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }\n"
2406"__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }\n"
2407"#if defined(__LP64__)\n"
2408"__DEVICE__ long labs(long __a) { return llabs(__a); };\n"
2409"#else\n"
2410"__DEVICE__ long labs(long __a) { return __nv_abs(__a); };\n"
2411"#endif\n"
2412"__DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }\n"
2413"__DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }\n"
2414"__DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }\n"
2415"__DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }\n"
2416"__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }\n"
2417"__DEVICE__ long long llmax(long long __a, long long __b) {\n"
2418" return __nv_llmax(__a, __b);\n"
2419"}\n"
2420"__DEVICE__ long long llmin(long long __a, long long __b) {\n"
2421" return __nv_llmin(__a, __b);\n"
2422"}\n"
2423"__DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); }\n"
2424"__DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); }\n"
2425"__DEVICE__ long long llround(double __a) { return __nv_llround(__a); }\n"
2426"__DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); }\n"
2427"__DEVICE__ double log(double __a) { return __nv_log(__a); }\n"
2428"__DEVICE__ double log10(double __a) { return __nv_log10(__a); }\n"
2429"__DEVICE__ float log10f(float __a) { return __nv_log10f(__a); }\n"
2430"__DEVICE__ double log1p(double __a) { return __nv_log1p(__a); }\n"
2431"__DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); }\n"
2432"__DEVICE__ double log2(double __a) { return __nv_log2(__a); }\n"
2433"__DEVICE__ float log2f(float __a) {\n"
2434" return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a);\n"
2435"}\n"
2436"__DEVICE__ double logb(double __a) { return __nv_logb(__a); }\n"
2437"__DEVICE__ float logbf(float __a) { return __nv_logbf(__a); }\n"
2438"__DEVICE__ float logf(float __a) {\n"
2439" return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);\n"
2440"}\n"
2441"#if defined(__LP64__)\n"
2442"__DEVICE__ long lrint(double __a) { return llrint(__a); }\n"
2443"__DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }\n"
2444"__DEVICE__ long lround(double __a) { return llround(__a); }\n"
2445"__DEVICE__ long lroundf(float __a) { return llroundf(__a); }\n"
2446"#else\n"
2447"__DEVICE__ long lrint(double __a) { return (long)rint(__a); }\n"
2448"__DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); }\n"
2449"__DEVICE__ long lround(double __a) { return round(__a); }\n"
2450"__DEVICE__ long lroundf(float __a) { return roundf(__a); }\n"
2451"#endif\n"
2452"__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }\n"
2453"__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {\n"
2454" return __builtin_memcpy(__a, __b, __c);\n"
2455"}\n"
2456"__DEVICE__ void *memset(void *__a, int __b, size_t __c) {\n"
2457" return __builtin_memset(__a, __b, __c);\n"
2458"}\n"
2459"__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }\n"
2460"__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }\n"
2461"__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }\n"
2462"__DEVICE__ double nearbyint(double __a) { return __nv_nearbyint(__a); }\n"
2463"__DEVICE__ float nearbyintf(float __a) { return __nv_nearbyintf(__a); }\n"
2464"__DEVICE__ double nextafter(double __a, double __b) {\n"
2465" return __nv_nextafter(__a, __b);\n"
2466"}\n"
2467"__DEVICE__ float nextafterf(float __a, float __b) {\n"
2468" return __nv_nextafterf(__a, __b);\n"
2469"}\n"
2470"__DEVICE__ double norm(int __dim, const double *__t) {\n"
2471" return __nv_norm(__dim, __t);\n"
2472"}\n"
2473"__DEVICE__ double norm3d(double __a, double __b, double __c) {\n"
2474" return __nv_norm3d(__a, __b, __c);\n"
2475"}\n"
2476"__DEVICE__ float norm3df(float __a, float __b, float __c) {\n"
2477" return __nv_norm3df(__a, __b, __c);\n"
2478"}\n"
2479"__DEVICE__ double norm4d(double __a, double __b, double __c, double __d) {\n"
2480" return __nv_norm4d(__a, __b, __c, __d);\n"
2481"}\n"
2482"__DEVICE__ float norm4df(float __a, float __b, float __c, float __d) {\n"
2483" return __nv_norm4df(__a, __b, __c, __d);\n"
2484"}\n"
2485"__DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); }\n"
2486"__DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); }\n"
2487"__DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); }\n"
2488"__DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); }\n"
2489"__DEVICE__ float normf(int __dim, const float *__t) {\n"
2490" return __nv_normf(__dim, __t);\n"
2491"}\n"
2492"__DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); }\n"
2493"__DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); }\n"
2494"__DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); }\n"
2495"__DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); }\n"
2496"__DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); }\n"
2497"__DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); }\n"
2498"__DEVICE__ double remainder(double __a, double __b) {\n"
2499" return __nv_remainder(__a, __b);\n"
2500"}\n"
2501"__DEVICE__ float remainderf(float __a, float __b) {\n"
2502" return __nv_remainderf(__a, __b);\n"
2503"}\n"
2504"__DEVICE__ double remquo(double __a, double __b, int *__c) {\n"
2505" return __nv_remquo(__a, __b, __c);\n"
2506"}\n"
2507"__DEVICE__ float remquof(float __a, float __b, int *__c) {\n"
2508" return __nv_remquof(__a, __b, __c);\n"
2509"}\n"
2510"__DEVICE__ double rhypot(double __a, double __b) {\n"
2511" return __nv_rhypot(__a, __b);\n"
2512"}\n"
2513"__DEVICE__ float rhypotf(float __a, float __b) {\n"
2514" return __nv_rhypotf(__a, __b);\n"
2515"}\n"
2516"__DEVICE__ double rint(double __a) { return __nv_rint(__a); }\n"
2517"__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); }\n"
2518"__DEVICE__ double rnorm(int __a, const double *__b) {\n"
2519" return __nv_rnorm(__a, __b);\n"
2520"}\n"
2521"__DEVICE__ double rnorm3d(double __a, double __b, double __c) {\n"
2522" return __nv_rnorm3d(__a, __b, __c);\n"
2523"}\n"
2524"__DEVICE__ float rnorm3df(float __a, float __b, float __c) {\n"
2525" return __nv_rnorm3df(__a, __b, __c);\n"
2526"}\n"
2527"__DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) {\n"
2528" return __nv_rnorm4d(__a, __b, __c, __d);\n"
2529"}\n"
2530"__DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) {\n"
2531" return __nv_rnorm4df(__a, __b, __c, __d);\n"
2532"}\n"
2533"__DEVICE__ float rnormf(int __dim, const float *__t) {\n"
2534" return __nv_rnormf(__dim, __t);\n"
2535"}\n"
2536"__DEVICE__ double round(double __a) { return __nv_round(__a); }\n"
2537"__DEVICE__ float roundf(float __a) { return __nv_roundf(__a); }\n"
2538"__DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }\n"
2539"__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }\n"
2540"__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }\n"
2541"__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }\n"
2542"__DEVICE__ double scalbln(double __a, long __b) {\n"
2543" if (__b > INT_MAX)\n"
2544" return __a > 0 ? HUGE_VAL : -HUGE_VAL;\n"
2545" if (__b < INT_MIN)\n"
2546" return __a > 0 ? 0.0 : -0.0;\n"
2547" return scalbn(__a, (int)__b);\n"
2548"}\n"
2549"__DEVICE__ float scalblnf(float __a, long __b) {\n"
2550" if (__b > INT_MAX)\n"
2551" return __a > 0 ? HUGE_VALF : -HUGE_VALF;\n"
2552" if (__b < INT_MIN)\n"
2553" return __a > 0 ? 0.f : -0.f;\n"
2554" return scalbnf(__a, (int)__b);\n"
2555"}\n"
2556"__DEVICE__ double sin(double __a) { return __nv_sin(__a); }\n"
2557"__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {\n"
2558" return __nv_sincos(__a, __sptr, __cptr);\n"
2559"}\n"
2560"__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) {\n"
2561" return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr);\n"
2562"}\n"
2563"__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) {\n"
2564" return __nv_sincospi(__a, __sptr, __cptr);\n"
2565"}\n"
2566"__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) {\n"
2567" return __nv_sincospif(__a, __sptr, __cptr);\n"
2568"}\n"
2569"__DEVICE__ float sinf(float __a) {\n"
2570" return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);\n"
2571"}\n"
2572"__DEVICE__ double sinh(double __a) { return __nv_sinh(__a); }\n"
2573"__DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); }\n"
2574"__DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); }\n"
2575"__DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); }\n"
2576"__DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); }\n"
2577"__DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); }\n"
2578"__DEVICE__ double tan(double __a) { return __nv_tan(__a); }\n"
2579"__DEVICE__ float tanf(float __a) { return __nv_tanf(__a); }\n"
2580"__DEVICE__ double tanh(double __a) { return __nv_tanh(__a); }\n"
2581"__DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); }\n"
2582"__DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); }\n"
2583"__DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); }\n"
2584"__DEVICE__ double trunc(double __a) { return __nv_trunc(__a); }\n"
2585"__DEVICE__ float truncf(float __a) { return __nv_truncf(__a); }\n"
2586"__DEVICE__ unsigned long long ullmax(unsigned long long __a,\n"
2587" unsigned long long __b) {\n"
2588" return __nv_ullmax(__a, __b);\n"
2589"}\n"
2590"__DEVICE__ unsigned long long ullmin(unsigned long long __a,\n"
2591" unsigned long long __b) {\n"
2592" return __nv_ullmin(__a, __b);\n"
2593"}\n"
2594"__DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) {\n"
2595" return __nv_umax(__a, __b);\n"
2596"}\n"
2597"__DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) {\n"
2598" return __nv_umin(__a, __b);\n"
2599"}\n"
2600"__DEVICE__ double y0(double __a) { return __nv_y0(__a); }\n"
2601"__DEVICE__ float y0f(float __a) { return __nv_y0f(__a); }\n"
2602"__DEVICE__ double y1(double __a) { return __nv_y1(__a); }\n"
2603"__DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }\n"
2604"__DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }\n"
2605"__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }\n"
2606"\n"
2607"#pragma pop_macro(\"__DEVICE__\")\n"
2608"#pragma pop_macro(\"__FAST_OR_SLOW\")\n"
2609"#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n"
2610"" } ,
2611 { "/builtins/__clang_cuda_intrinsics.h" , "/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---===\n"
2612" *\n"
2613" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
2614" * of this software and associated documentation files (the \"Software\"), to deal\n"
2615" * in the Software without restriction, including without limitation the rights\n"
2616" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
2617" * copies of the Software, and to permit persons to whom the Software is\n"
2618" * furnished to do so, subject to the following conditions:\n"
2619" *\n"
2620" * The above copyright notice and this permission notice shall be included in\n"
2621" * all copies or substantial portions of the Software.\n"
2622" *\n"
2623" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
2624" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
2625" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
2626" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
2627" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
2628" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
2629" * THE SOFTWARE.\n"
2630" *\n"
2631" *===-----------------------------------------------------------------------===\n"
2632" */\n"
2633"#ifndef __CLANG_CUDA_INTRINSICS_H__\n"
2634"#define __CLANG_CUDA_INTRINSICS_H__\n"
2635"#ifndef __CUDA__\n"
2636"#error \"This file is for CUDA compilation only.\"\n"
2637"#endif\n"
2638"\n"
2639"// sm_30 intrinsics: __shfl_{up,down,xor}.\n"
2640"\n"
2641"#define __SM_30_INTRINSICS_H__\n"
2642"#define __SM_30_INTRINSICS_HPP__\n"
2643"\n"
2644"#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n"
2645"\n"
2646"#pragma push_macro(\"__MAKE_SHUFFLES\")\n"
2647"#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \\\n"
2648" __Type) \\\n"
2649" inline __device__ int __FnName(int __val, __Type __offset, \\\n"
2650" int __width = warpSize) { \\\n"
2651" return __IntIntrinsic(__val, __offset, \\\n"
2652" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2653" } \\\n"
2654" inline __device__ float __FnName(float __val, __Type __offset, \\\n"
2655" int __width = warpSize) { \\\n"
2656" return __FloatIntrinsic(__val, __offset, \\\n"
2657" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2658" } \\\n"
2659" inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \\\n"
2660" int __width = warpSize) { \\\n"
2661" return static_cast<unsigned int>( \\\n"
2662" ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n"
2663" } \\\n"
2664" inline __device__ long long __FnName(long long __val, __Type __offset, \\\n"
2665" int __width = warpSize) { \\\n"
2666" struct __Bits { \\\n"
2667" int __a, __b; \\\n"
2668" }; \\\n"
2669" _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n"
2670" _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n"
2671" __Bits __tmp; \\\n"
2672" memcpy(&__val, &__tmp, sizeof(__val)); \\\n"
2673" __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \\\n"
2674" __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \\\n"
2675" long long __ret; \\\n"
2676" memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n"
2677" return __ret; \\\n"
2678" } \\\n"
2679" inline __device__ long __FnName(long __val, __Type __offset, \\\n"
2680" int __width = warpSize) { \\\n"
2681" _Static_assert(sizeof(long) == sizeof(long long) || \\\n"
2682" sizeof(long) == sizeof(int)); \\\n"
2683" if (sizeof(long) == sizeof(long long)) { \\\n"
2684" return static_cast<long>( \\\n"
2685" ::__FnName(static_cast<long long>(__val), __offset, __width)); \\\n"
2686" } else if (sizeof(long) == sizeof(int)) { \\\n"
2687" return static_cast<long>( \\\n"
2688" ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n"
2689" } \\\n"
2690" } \\\n"
2691" inline __device__ unsigned long __FnName( \\\n"
2692" unsigned long __val, __Type __offset, int __width = warpSize) { \\\n"
2693" return static_cast<unsigned long>( \\\n"
2694" ::__FnName(static_cast<long>(__val), __offset, __width)); \\\n"
2695" } \\\n"
2696" inline __device__ unsigned long long __FnName( \\\n"
2697" unsigned long long __val, __Type __offset, int __width = warpSize) { \\\n"
2698" return static_cast<unsigned long long>(::__FnName( \\\n"
2699" static_cast<unsigned long long>(__val), __offset, __width)); \\\n"
2700" } \\\n"
2701" inline __device__ double __FnName(double __val, __Type __offset, \\\n"
2702" int __width = warpSize) { \\\n"
2703" long long __tmp; \\\n"
2704" _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n"
2705" memcpy(&__tmp, &__val, sizeof(__val)); \\\n"
2706" __tmp = ::__FnName(__tmp, __offset, __width); \\\n"
2707" double __ret; \\\n"
2708" memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n"
2709" return __ret; \\\n"
2710" }\n"
2711"\n"
2712"__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);\n"
2713"// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n"
2714"// maxLane.\n"
2715"__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,\n"
2716" unsigned int);\n"
2717"__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,\n"
2718" unsigned int);\n"
2719"__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,\n"
2720" int);\n"
2721"#pragma pop_macro(\"__MAKE_SHUFFLES\")\n"
2722"\n"
2723"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n"
2724"\n"
2725"#if CUDA_VERSION >= 9000\n"
2726"#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)\n"
2727"// __shfl_sync_* variants available in CUDA-9\n"
2728"#pragma push_macro(\"__MAKE_SYNC_SHUFFLES\")\n"
2729"#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \\\n"
2730" __Mask, __Type) \\\n"
2731" inline __device__ int __FnName(unsigned int __mask, int __val, \\\n"
2732" __Type __offset, int __width = warpSize) { \\\n"
2733" return __IntIntrinsic(__mask, __val, __offset, \\\n"
2734" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2735" } \\\n"
2736" inline __device__ float __FnName(unsigned int __mask, float __val, \\\n"
2737" __Type __offset, int __width = warpSize) { \\\n"
2738" return __FloatIntrinsic(__mask, __val, __offset, \\\n"
2739" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2740" } \\\n"
2741" inline __device__ unsigned int __FnName(unsigned int __mask, \\\n"
2742" unsigned int __val, __Type __offset, \\\n"
2743" int __width = warpSize) { \\\n"
2744" return static_cast<unsigned int>( \\\n"
2745" ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n"
2746" } \\\n"
2747" inline __device__ long long __FnName(unsigned int __mask, long long __val, \\\n"
2748" __Type __offset, \\\n"
2749" int __width = warpSize) { \\\n"
2750" struct __Bits { \\\n"
2751" int __a, __b; \\\n"
2752" }; \\\n"
2753" _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n"
2754" _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n"
2755" __Bits __tmp; \\\n"
2756" memcpy(&__val, &__tmp, sizeof(__val)); \\\n"
2757" __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \\\n"
2758" __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \\\n"
2759" long long __ret; \\\n"
2760" memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n"
2761" return __ret; \\\n"
2762" } \\\n"
2763" inline __device__ unsigned long long __FnName( \\\n"
2764" unsigned int __mask, unsigned long long __val, __Type __offset, \\\n"
2765" int __width = warpSize) { \\\n"
2766" return static_cast<unsigned long long>(::__FnName( \\\n"
2767" __mask, static_cast<unsigned long long>(__val), __offset, __width)); \\\n"
2768" } \\\n"
2769" inline __device__ long __FnName(unsigned int __mask, long __val, \\\n"
2770" __Type __offset, int __width = warpSize) { \\\n"
2771" _Static_assert(sizeof(long) == sizeof(long long) || \\\n"
2772" sizeof(long) == sizeof(int)); \\\n"
2773" if (sizeof(long) == sizeof(long long)) { \\\n"
2774" return static_cast<long>(::__FnName( \\\n"
2775" __mask, static_cast<long long>(__val), __offset, __width)); \\\n"
2776" } else if (sizeof(long) == sizeof(int)) { \\\n"
2777" return static_cast<long>( \\\n"
2778" ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n"
2779" } \\\n"
2780" } \\\n"
2781" inline __device__ unsigned long __FnName( \\\n"
2782" unsigned int __mask, unsigned long __val, __Type __offset, \\\n"
2783" int __width = warpSize) { \\\n"
2784" return static_cast<unsigned long>( \\\n"
2785" ::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \\\n"
2786" } \\\n"
2787" inline __device__ double __FnName(unsigned int __mask, double __val, \\\n"
2788" __Type __offset, int __width = warpSize) { \\\n"
2789" long long __tmp; \\\n"
2790" _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n"
2791" memcpy(&__tmp, &__val, sizeof(__val)); \\\n"
2792" __tmp = ::__FnName(__mask, __tmp, __offset, __width); \\\n"
2793" double __ret; \\\n"
2794" memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n"
2795" return __ret; \\\n"
2796" }\n"
2797"__MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,\n"
2798" __nvvm_shfl_sync_idx_f32, 0x1f, int);\n"
2799"// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n"
2800"// maxLane.\n"
2801"__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,\n"
2802" __nvvm_shfl_sync_up_f32, 0, unsigned int);\n"
2803"__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,\n"
2804" __nvvm_shfl_sync_down_f32, 0x1f, unsigned int);\n"
2805"__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,\n"
2806" __nvvm_shfl_sync_bfly_f32, 0x1f, int);\n"
2807"#pragma pop_macro(\"__MAKE_SYNC_SHUFFLES\")\n"
2808"\n"
2809"inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {\n"
2810" return __nvvm_bar_warp_sync(mask);\n"
2811"}\n"
2812"\n"
2813"inline __device__ void __barrier_sync(unsigned int id) {\n"
2814" __nvvm_barrier_sync(id);\n"
2815"}\n"
2816"\n"
2817"inline __device__ void __barrier_sync_count(unsigned int id,\n"
2818" unsigned int count) {\n"
2819" __nvvm_barrier_sync_cnt(id, count);\n"
2820"}\n"
2821"\n"
2822"inline __device__ int __all_sync(unsigned int mask, int pred) {\n"
2823" return __nvvm_vote_all_sync(mask, pred);\n"
2824"}\n"
2825"\n"
2826"inline __device__ int __any_sync(unsigned int mask, int pred) {\n"
2827" return __nvvm_vote_any_sync(mask, pred);\n"
2828"}\n"
2829"\n"
2830"inline __device__ int __uni_sync(unsigned int mask, int pred) {\n"
2831" return __nvvm_vote_uni_sync(mask, pred);\n"
2832"}\n"
2833"\n"
2834"inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {\n"
2835" return __nvvm_vote_ballot_sync(mask, pred);\n"
2836"}\n"
2837"\n"
2838"inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }\n"
2839"\n"
2840"inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {\n"
2841" return __nvvm_fns(mask, base, offset);\n"
2842"}\n"
2843"\n"
2844"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n"
2845"\n"
2846"// Define __match* builtins CUDA-9 headers expect to see.\n"
2847"#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n"
2848"inline __device__ unsigned int __match32_any_sync(unsigned int mask,\n"
2849" unsigned int value) {\n"
2850" return __nvvm_match_any_sync_i32(mask, value);\n"
2851"}\n"
2852"\n"
2853"inline __device__ unsigned long long\n"
2854"__match64_any_sync(unsigned int mask, unsigned long long value) {\n"
2855" return __nvvm_match_any_sync_i64(mask, value);\n"
2856"}\n"
2857"\n"
2858"inline __device__ unsigned int\n"
2859"__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {\n"
2860" return __nvvm_match_all_sync_i32p(mask, value, pred);\n"
2861"}\n"
2862"\n"
2863"inline __device__ unsigned long long\n"
2864"__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {\n"
2865" return __nvvm_match_all_sync_i64p(mask, value, pred);\n"
2866"}\n"
2867"#include \"crt/sm_70_rt.hpp\"\n"
2868"\n"
2869"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n"
2870"#endif // __CUDA_VERSION >= 9000\n"
2871"\n"
2872"// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.\n"
2873"\n"
2874"// Prevent the vanilla sm_32 intrinsics header from being included.\n"
2875"#define __SM_32_INTRINSICS_H__\n"
2876"#define __SM_32_INTRINSICS_HPP__\n"
2877"\n"
2878"#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n"
2879"\n"
2880"inline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); }\n"
2881"inline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); }\n"
2882"inline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); }\n"
2883"inline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); }\n"
2884"inline __device__ long long __ldg(const long long *ptr) {\n"
2885" return __nvvm_ldg_ll(ptr);\n"
2886"}\n"
2887"inline __device__ unsigned char __ldg(const unsigned char *ptr) {\n"
2888" return __nvvm_ldg_uc(ptr);\n"
2889"}\n"
2890"inline __device__ signed char __ldg(const signed char *ptr) {\n"
2891" return __nvvm_ldg_uc((const unsigned char *)ptr);\n"
2892"}\n"
2893"inline __device__ unsigned short __ldg(const unsigned short *ptr) {\n"
2894" return __nvvm_ldg_us(ptr);\n"
2895"}\n"
2896"inline __device__ unsigned int __ldg(const unsigned int *ptr) {\n"
2897" return __nvvm_ldg_ui(ptr);\n"
2898"}\n"
2899"inline __device__ unsigned long __ldg(const unsigned long *ptr) {\n"
2900" return __nvvm_ldg_ul(ptr);\n"
2901"}\n"
2902"inline __device__ unsigned long long __ldg(const unsigned long long *ptr) {\n"
2903" return __nvvm_ldg_ull(ptr);\n"
2904"}\n"
2905"inline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); }\n"
2906"inline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); }\n"
2907"\n"
2908"inline __device__ char2 __ldg(const char2 *ptr) {\n"
2909" typedef char c2 __attribute__((ext_vector_type(2)));\n"
2910" // We can assume that ptr is aligned at least to char2's alignment, but the\n"
2911" // load will assume that ptr is aligned to char2's alignment. This is only\n"
2912" // safe if alignof(c2) <= alignof(char2).\n"
2913" c2 rv = __nvvm_ldg_c2(reinterpret_cast<const c2 *>(ptr));\n"
2914" char2 ret;\n"
2915" ret.x = rv[0];\n"
2916" ret.y = rv[1];\n"
2917" return ret;\n"
2918"}\n"
2919"inline __device__ char4 __ldg(const char4 *ptr) {\n"
2920" typedef char c4 __attribute__((ext_vector_type(4)));\n"
2921" c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr));\n"
2922" char4 ret;\n"
2923" ret.x = rv[0];\n"
2924" ret.y = rv[1];\n"
2925" ret.z = rv[2];\n"
2926" ret.w = rv[3];\n"
2927" return ret;\n"
2928"}\n"
2929"inline __device__ short2 __ldg(const short2 *ptr) {\n"
2930" typedef short s2 __attribute__((ext_vector_type(2)));\n"
2931" s2 rv = __nvvm_ldg_s2(reinterpret_cast<const s2 *>(ptr));\n"
2932" short2 ret;\n"
2933" ret.x = rv[0];\n"
2934" ret.y = rv[1];\n"
2935" return ret;\n"
2936"}\n"
2937"inline __device__ short4 __ldg(const short4 *ptr) {\n"
2938" typedef short s4 __attribute__((ext_vector_type(4)));\n"
2939" s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr));\n"
2940" short4 ret;\n"
2941" ret.x = rv[0];\n"
2942" ret.y = rv[1];\n"
2943" ret.z = rv[2];\n"
2944" ret.w = rv[3];\n"
2945" return ret;\n"
2946"}\n"
2947"inline __device__ int2 __ldg(const int2 *ptr) {\n"
2948" typedef int i2 __attribute__((ext_vector_type(2)));\n"
2949" i2 rv = __nvvm_ldg_i2(reinterpret_cast<const i2 *>(ptr));\n"
2950" int2 ret;\n"
2951" ret.x = rv[0];\n"
2952" ret.y = rv[1];\n"
2953" return ret;\n"
2954"}\n"
2955"inline __device__ int4 __ldg(const int4 *ptr) {\n"
2956" typedef int i4 __attribute__((ext_vector_type(4)));\n"
2957" i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr));\n"
2958" int4 ret;\n"
2959" ret.x = rv[0];\n"
2960" ret.y = rv[1];\n"
2961" ret.z = rv[2];\n"
2962" ret.w = rv[3];\n"
2963" return ret;\n"
2964"}\n"
2965"inline __device__ longlong2 __ldg(const longlong2 *ptr) {\n"
2966" typedef long long ll2 __attribute__((ext_vector_type(2)));\n"
2967" ll2 rv = __nvvm_ldg_ll2(reinterpret_cast<const ll2 *>(ptr));\n"
2968" longlong2 ret;\n"
2969" ret.x = rv[0];\n"
2970" ret.y = rv[1];\n"
2971" return ret;\n"
2972"}\n"
2973"\n"
2974"inline __device__ uchar2 __ldg(const uchar2 *ptr) {\n"
2975" typedef unsigned char uc2 __attribute__((ext_vector_type(2)));\n"
2976" uc2 rv = __nvvm_ldg_uc2(reinterpret_cast<const uc2 *>(ptr));\n"
2977" uchar2 ret;\n"
2978" ret.x = rv[0];\n"
2979" ret.y = rv[1];\n"
2980" return ret;\n"
2981"}\n"
2982"inline __device__ uchar4 __ldg(const uchar4 *ptr) {\n"
2983" typedef unsigned char uc4 __attribute__((ext_vector_type(4)));\n"
2984" uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr));\n"
2985" uchar4 ret;\n"
2986" ret.x = rv[0];\n"
2987" ret.y = rv[1];\n"
2988" ret.z = rv[2];\n"
2989" ret.w = rv[3];\n"
2990" return ret;\n"
2991"}\n"
2992"inline __device__ ushort2 __ldg(const ushort2 *ptr) {\n"
2993" typedef unsigned short us2 __attribute__((ext_vector_type(2)));\n"
2994" us2 rv = __nvvm_ldg_us2(reinterpret_cast<const us2 *>(ptr));\n"
2995" ushort2 ret;\n"
2996" ret.x = rv[0];\n"
2997" ret.y = rv[1];\n"
2998" return ret;\n"
2999"}\n"
3000"inline __device__ ushort4 __ldg(const ushort4 *ptr) {\n"
3001" typedef unsigned short us4 __attribute__((ext_vector_type(4)));\n"
3002" us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr));\n"
3003" ushort4 ret;\n"
3004" ret.x = rv[0];\n"
3005" ret.y = rv[1];\n"
3006" ret.z = rv[2];\n"
3007" ret.w = rv[3];\n"
3008" return ret;\n"
3009"}\n"
3010"inline __device__ uint2 __ldg(const uint2 *ptr) {\n"
3011" typedef unsigned int ui2 __attribute__((ext_vector_type(2)));\n"
3012" ui2 rv = __nvvm_ldg_ui2(reinterpret_cast<const ui2 *>(ptr));\n"
3013" uint2 ret;\n"
3014" ret.x = rv[0];\n"
3015" ret.y = rv[1];\n"
3016" return ret;\n"
3017"}\n"
3018"inline __device__ uint4 __ldg(const uint4 *ptr) {\n"
3019" typedef unsigned int ui4 __attribute__((ext_vector_type(4)));\n"
3020" ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr));\n"
3021" uint4 ret;\n"
3022" ret.x = rv[0];\n"
3023" ret.y = rv[1];\n"
3024" ret.z = rv[2];\n"
3025" ret.w = rv[3];\n"
3026" return ret;\n"
3027"}\n"
3028"inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {\n"
3029" typedef unsigned long long ull2 __attribute__((ext_vector_type(2)));\n"
3030" ull2 rv = __nvvm_ldg_ull2(reinterpret_cast<const ull2 *>(ptr));\n"
3031" ulonglong2 ret;\n"
3032" ret.x = rv[0];\n"
3033" ret.y = rv[1];\n"
3034" return ret;\n"
3035"}\n"
3036"\n"
3037"inline __device__ float2 __ldg(const float2 *ptr) {\n"
3038" typedef float f2 __attribute__((ext_vector_type(2)));\n"
3039" f2 rv = __nvvm_ldg_f2(reinterpret_cast<const f2 *>(ptr));\n"
3040" float2 ret;\n"
3041" ret.x = rv[0];\n"
3042" ret.y = rv[1];\n"
3043" return ret;\n"
3044"}\n"
3045"inline __device__ float4 __ldg(const float4 *ptr) {\n"
3046" typedef float f4 __attribute__((ext_vector_type(4)));\n"
3047" f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr));\n"
3048" float4 ret;\n"
3049" ret.x = rv[0];\n"
3050" ret.y = rv[1];\n"
3051" ret.z = rv[2];\n"
3052" ret.w = rv[3];\n"
3053" return ret;\n"
3054"}\n"
3055"inline __device__ double2 __ldg(const double2 *ptr) {\n"
3056" typedef double d2 __attribute__((ext_vector_type(2)));\n"
3057" d2 rv = __nvvm_ldg_d2(reinterpret_cast<const d2 *>(ptr));\n"
3058" double2 ret;\n"
3059" ret.x = rv[0];\n"
3060" ret.y = rv[1];\n"
3061" return ret;\n"
3062"}\n"
3063"\n"
3064"// TODO: Implement these as intrinsics, so the backend can work its magic on\n"
3065"// these. Alternatively, we could implement these as plain C and try to get\n"
3066"// llvm to recognize the relevant patterns.\n"
3067"inline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32,\n"
3068" unsigned shiftWidth) {\n"
3069" unsigned result;\n"
3070" asm(\"shf.l.wrap.b32 %0, %1, %2, %3;\"\n"
3071" : \"=r\"(result)\n"
3072" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3073" return result;\n"
3074"}\n"
3075"inline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32,\n"
3076" unsigned shiftWidth) {\n"
3077" unsigned result;\n"
3078" asm(\"shf.l.clamp.b32 %0, %1, %2, %3;\"\n"
3079" : \"=r\"(result)\n"
3080" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3081" return result;\n"
3082"}\n"
3083"inline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32,\n"
3084" unsigned shiftWidth) {\n"
3085" unsigned result;\n"
3086" asm(\"shf.r.wrap.b32 %0, %1, %2, %3;\"\n"
3087" : \"=r\"(result)\n"
3088" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3089" return result;\n"
3090"}\n"
3091"inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,\n"
3092" unsigned shiftWidth) {\n"
3093" unsigned ret;\n"
3094" asm(\"shf.r.clamp.b32 %0, %1, %2, %3;\"\n"
3095" : \"=r\"(ret)\n"
3096" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3097" return ret;\n"
3098"}\n"
3099"\n"
3100"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n"
3101"\n"
3102"#endif // defined(__CLANG_CUDA_INTRINSICS_H__)\n"
3103"" } ,
3104 { "/builtins/__clang_cuda_libdevice_declares.h" , "/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===\n"
3105" *\n"
3106" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
3107" * of this software and associated documentation files (the \"Software\"), to deal\n"
3108" * in the Software without restriction, including without limitation the rights\n"
3109" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
3110" * copies of the Software, and to permit persons to whom the Software is\n"
3111" * furnished to do so, subject to the following conditions:\n"
3112" *\n"
3113" * The above copyright notice and this permission notice shall be included in\n"
3114" * all copies or substantial portions of the Software.\n"
3115" *\n"
3116" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
3117" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
3118" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
3119" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
3120" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
3121" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
3122" * THE SOFTWARE.\n"
3123" *\n"
3124" *===-----------------------------------------------------------------------===\n"
3125" */\n"
3126"\n"
3127"#ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n"
3128"#define __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n"
3129"\n"
3130"extern \"C\" {\n"
3131"\n"
3132"__device__ int __nv_abs(int __a);\n"
3133"__device__ double __nv_acos(double __a);\n"
3134"__device__ float __nv_acosf(float __a);\n"
3135"__device__ double __nv_acosh(double __a);\n"
3136"__device__ float __nv_acoshf(float __a);\n"
3137"__device__ double __nv_asin(double __a);\n"
3138"__device__ float __nv_asinf(float __a);\n"
3139"__device__ double __nv_asinh(double __a);\n"
3140"__device__ float __nv_asinhf(float __a);\n"
3141"__device__ double __nv_atan2(double __a, double __b);\n"
3142"__device__ float __nv_atan2f(float __a, float __b);\n"
3143"__device__ double __nv_atan(double __a);\n"
3144"__device__ float __nv_atanf(float __a);\n"
3145"__device__ double __nv_atanh(double __a);\n"
3146"__device__ float __nv_atanhf(float __a);\n"
3147"__device__ int __nv_brev(int __a);\n"
3148"__device__ long long __nv_brevll(long long __a);\n"
3149"__device__ int __nv_byte_perm(int __a, int __b, int __c);\n"
3150"__device__ double __nv_cbrt(double __a);\n"
3151"__device__ float __nv_cbrtf(float __a);\n"
3152"__device__ double __nv_ceil(double __a);\n"
3153"__device__ float __nv_ceilf(float __a);\n"
3154"__device__ int __nv_clz(int __a);\n"
3155"__device__ int __nv_clzll(long long __a);\n"
3156"__device__ double __nv_copysign(double __a, double __b);\n"
3157"__device__ float __nv_copysignf(float __a, float __b);\n"
3158"__device__ double __nv_cos(double __a);\n"
3159"__device__ float __nv_cosf(float __a);\n"
3160"__device__ double __nv_cosh(double __a);\n"
3161"__device__ float __nv_coshf(float __a);\n"
3162"__device__ double __nv_cospi(double __a);\n"
3163"__device__ float __nv_cospif(float __a);\n"
3164"__device__ double __nv_cyl_bessel_i0(double __a);\n"
3165"__device__ float __nv_cyl_bessel_i0f(float __a);\n"
3166"__device__ double __nv_cyl_bessel_i1(double __a);\n"
3167"__device__ float __nv_cyl_bessel_i1f(float __a);\n"
3168"__device__ double __nv_dadd_rd(double __a, double __b);\n"
3169"__device__ double __nv_dadd_rn(double __a, double __b);\n"
3170"__device__ double __nv_dadd_ru(double __a, double __b);\n"
3171"__device__ double __nv_dadd_rz(double __a, double __b);\n"
3172"__device__ double __nv_ddiv_rd(double __a, double __b);\n"
3173"__device__ double __nv_ddiv_rn(double __a, double __b);\n"
3174"__device__ double __nv_ddiv_ru(double __a, double __b);\n"
3175"__device__ double __nv_ddiv_rz(double __a, double __b);\n"
3176"__device__ double __nv_dmul_rd(double __a, double __b);\n"
3177"__device__ double __nv_dmul_rn(double __a, double __b);\n"
3178"__device__ double __nv_dmul_ru(double __a, double __b);\n"
3179"__device__ double __nv_dmul_rz(double __a, double __b);\n"
3180"__device__ float __nv_double2float_rd(double __a);\n"
3181"__device__ float __nv_double2float_rn(double __a);\n"
3182"__device__ float __nv_double2float_ru(double __a);\n"
3183"__device__ float __nv_double2float_rz(double __a);\n"
3184"__device__ int __nv_double2hiint(double __a);\n"
3185"__device__ int __nv_double2int_rd(double __a);\n"
3186"__device__ int __nv_double2int_rn(double __a);\n"
3187"__device__ int __nv_double2int_ru(double __a);\n"
3188"__device__ int __nv_double2int_rz(double __a);\n"
3189"__device__ long long __nv_double2ll_rd(double __a);\n"
3190"__device__ long long __nv_double2ll_rn(double __a);\n"
3191"__device__ long long __nv_double2ll_ru(double __a);\n"
3192"__device__ long long __nv_double2ll_rz(double __a);\n"
3193"__device__ int __nv_double2loint(double __a);\n"
3194"__device__ unsigned int __nv_double2uint_rd(double __a);\n"
3195"__device__ unsigned int __nv_double2uint_rn(double __a);\n"
3196"__device__ unsigned int __nv_double2uint_ru(double __a);\n"
3197"__device__ unsigned int __nv_double2uint_rz(double __a);\n"
3198"__device__ unsigned long long __nv_double2ull_rd(double __a);\n"
3199"__device__ unsigned long long __nv_double2ull_rn(double __a);\n"
3200"__device__ unsigned long long __nv_double2ull_ru(double __a);\n"
3201"__device__ unsigned long long __nv_double2ull_rz(double __a);\n"
3202"__device__ unsigned long long __nv_double_as_longlong(double __a);\n"
3203"__device__ double __nv_drcp_rd(double __a);\n"
3204"__device__ double __nv_drcp_rn(double __a);\n"
3205"__device__ double __nv_drcp_ru(double __a);\n"
3206"__device__ double __nv_drcp_rz(double __a);\n"
3207"__device__ double __nv_dsqrt_rd(double __a);\n"
3208"__device__ double __nv_dsqrt_rn(double __a);\n"
3209"__device__ double __nv_dsqrt_ru(double __a);\n"
3210"__device__ double __nv_dsqrt_rz(double __a);\n"
3211"__device__ double __nv_dsub_rd(double __a, double __b);\n"
3212"__device__ double __nv_dsub_rn(double __a, double __b);\n"
3213"__device__ double __nv_dsub_ru(double __a, double __b);\n"
3214"__device__ double __nv_dsub_rz(double __a, double __b);\n"
3215"__device__ double __nv_erfc(double __a);\n"
3216"__device__ float __nv_erfcf(float __a);\n"
3217"__device__ double __nv_erfcinv(double __a);\n"
3218"__device__ float __nv_erfcinvf(float __a);\n"
3219"__device__ double __nv_erfcx(double __a);\n"
3220"__device__ float __nv_erfcxf(float __a);\n"
3221"__device__ double __nv_erf(double __a);\n"
3222"__device__ float __nv_erff(float __a);\n"
3223"__device__ double __nv_erfinv(double __a);\n"
3224"__device__ float __nv_erfinvf(float __a);\n"
3225"__device__ double __nv_exp10(double __a);\n"
3226"__device__ float __nv_exp10f(float __a);\n"
3227"__device__ double __nv_exp2(double __a);\n"
3228"__device__ float __nv_exp2f(float __a);\n"
3229"__device__ double __nv_exp(double __a);\n"
3230"__device__ float __nv_expf(float __a);\n"
3231"__device__ double __nv_expm1(double __a);\n"
3232"__device__ float __nv_expm1f(float __a);\n"
3233"__device__ double __nv_fabs(double __a);\n"
3234"__device__ float __nv_fabsf(float __a);\n"
3235"__device__ float __nv_fadd_rd(float __a, float __b);\n"
3236"__device__ float __nv_fadd_rn(float __a, float __b);\n"
3237"__device__ float __nv_fadd_ru(float __a, float __b);\n"
3238"__device__ float __nv_fadd_rz(float __a, float __b);\n"
3239"__device__ float __nv_fast_cosf(float __a);\n"
3240"__device__ float __nv_fast_exp10f(float __a);\n"
3241"__device__ float __nv_fast_expf(float __a);\n"
3242"__device__ float __nv_fast_fdividef(float __a, float __b);\n"
3243"__device__ float __nv_fast_log10f(float __a);\n"
3244"__device__ float __nv_fast_log2f(float __a);\n"
3245"__device__ float __nv_fast_logf(float __a);\n"
3246"__device__ float __nv_fast_powf(float __a, float __b);\n"
3247"__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);\n"
3248"__device__ float __nv_fast_sinf(float __a);\n"
3249"__device__ float __nv_fast_tanf(float __a);\n"
3250"__device__ double __nv_fdim(double __a, double __b);\n"
3251"__device__ float __nv_fdimf(float __a, float __b);\n"
3252"__device__ float __nv_fdiv_rd(float __a, float __b);\n"
3253"__device__ float __nv_fdiv_rn(float __a, float __b);\n"
3254"__device__ float __nv_fdiv_ru(float __a, float __b);\n"
3255"__device__ float __nv_fdiv_rz(float __a, float __b);\n"
3256"__device__ int __nv_ffs(int __a);\n"
3257"__device__ int __nv_ffsll(long long __a);\n"
3258"__device__ int __nv_finitef(float __a);\n"
3259"__device__ unsigned short __nv_float2half_rn(float __a);\n"
3260"__device__ int __nv_float2int_rd(float __a);\n"
3261"__device__ int __nv_float2int_rn(float __a);\n"
3262"__device__ int __nv_float2int_ru(float __a);\n"
3263"__device__ int __nv_float2int_rz(float __a);\n"
3264"__device__ long long __nv_float2ll_rd(float __a);\n"
3265"__device__ long long __nv_float2ll_rn(float __a);\n"
3266"__device__ long long __nv_float2ll_ru(float __a);\n"
3267"__device__ long long __nv_float2ll_rz(float __a);\n"
3268"__device__ unsigned int __nv_float2uint_rd(float __a);\n"
3269"__device__ unsigned int __nv_float2uint_rn(float __a);\n"
3270"__device__ unsigned int __nv_float2uint_ru(float __a);\n"
3271"__device__ unsigned int __nv_float2uint_rz(float __a);\n"
3272"__device__ unsigned long long __nv_float2ull_rd(float __a);\n"
3273"__device__ unsigned long long __nv_float2ull_rn(float __a);\n"
3274"__device__ unsigned long long __nv_float2ull_ru(float __a);\n"
3275"__device__ unsigned long long __nv_float2ull_rz(float __a);\n"
3276"__device__ int __nv_float_as_int(float __a);\n"
3277"__device__ unsigned int __nv_float_as_uint(float __a);\n"
3278"__device__ double __nv_floor(double __a);\n"
3279"__device__ float __nv_floorf(float __a);\n"
3280"__device__ double __nv_fma(double __a, double __b, double __c);\n"
3281"__device__ float __nv_fmaf(float __a, float __b, float __c);\n"
3282"__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);\n"
3283"__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);\n"
3284"__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);\n"
3285"__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);\n"
3286"__device__ float __nv_fmaf_rd(float __a, float __b, float __c);\n"
3287"__device__ float __nv_fmaf_rn(float __a, float __b, float __c);\n"
3288"__device__ float __nv_fmaf_ru(float __a, float __b, float __c);\n"
3289"__device__ float __nv_fmaf_rz(float __a, float __b, float __c);\n"
3290"__device__ double __nv_fma_rd(double __a, double __b, double __c);\n"
3291"__device__ double __nv_fma_rn(double __a, double __b, double __c);\n"
3292"__device__ double __nv_fma_ru(double __a, double __b, double __c);\n"
3293"__device__ double __nv_fma_rz(double __a, double __b, double __c);\n"
3294"__device__ double __nv_fmax(double __a, double __b);\n"
3295"__device__ float __nv_fmaxf(float __a, float __b);\n"
3296"__device__ double __nv_fmin(double __a, double __b);\n"
3297"__device__ float __nv_fminf(float __a, float __b);\n"
3298"__device__ double __nv_fmod(double __a, double __b);\n"
3299"__device__ float __nv_fmodf(float __a, float __b);\n"
3300"__device__ float __nv_fmul_rd(float __a, float __b);\n"
3301"__device__ float __nv_fmul_rn(float __a, float __b);\n"
3302"__device__ float __nv_fmul_ru(float __a, float __b);\n"
3303"__device__ float __nv_fmul_rz(float __a, float __b);\n"
3304"__device__ float __nv_frcp_rd(float __a);\n"
3305"__device__ float __nv_frcp_rn(float __a);\n"
3306"__device__ float __nv_frcp_ru(float __a);\n"
3307"__device__ float __nv_frcp_rz(float __a);\n"
3308"__device__ double __nv_frexp(double __a, int *__b);\n"
3309"__device__ float __nv_frexpf(float __a, int *__b);\n"
3310"__device__ float __nv_frsqrt_rn(float __a);\n"
3311"__device__ float __nv_fsqrt_rd(float __a);\n"
3312"__device__ float __nv_fsqrt_rn(float __a);\n"
3313"__device__ float __nv_fsqrt_ru(float __a);\n"
3314"__device__ float __nv_fsqrt_rz(float __a);\n"
3315"__device__ float __nv_fsub_rd(float __a, float __b);\n"
3316"__device__ float __nv_fsub_rn(float __a, float __b);\n"
3317"__device__ float __nv_fsub_ru(float __a, float __b);\n"
3318"__device__ float __nv_fsub_rz(float __a, float __b);\n"
3319"__device__ int __nv_hadd(int __a, int __b);\n"
3320"__device__ float __nv_half2float(unsigned short __h);\n"
3321"__device__ double __nv_hiloint2double(int __a, int __b);\n"
3322"__device__ double __nv_hypot(double __a, double __b);\n"
3323"__device__ float __nv_hypotf(float __a, float __b);\n"
3324"__device__ int __nv_ilogb(double __a);\n"
3325"__device__ int __nv_ilogbf(float __a);\n"
3326"__device__ double __nv_int2double_rn(int __a);\n"
3327"__device__ float __nv_int2float_rd(int __a);\n"
3328"__device__ float __nv_int2float_rn(int __a);\n"
3329"__device__ float __nv_int2float_ru(int __a);\n"
3330"__device__ float __nv_int2float_rz(int __a);\n"
3331"__device__ float __nv_int_as_float(int __a);\n"
3332"__device__ int __nv_isfinited(double __a);\n"
3333"__device__ int __nv_isinfd(double __a);\n"
3334"__device__ int __nv_isinff(float __a);\n"
3335"__device__ int __nv_isnand(double __a);\n"
3336"__device__ int __nv_isnanf(float __a);\n"
3337"__device__ double __nv_j0(double __a);\n"
3338"__device__ float __nv_j0f(float __a);\n"
3339"__device__ double __nv_j1(double __a);\n"
3340"__device__ float __nv_j1f(float __a);\n"
3341"__device__ float __nv_jnf(int __a, float __b);\n"
3342"__device__ double __nv_jn(int __a, double __b);\n"
3343"__device__ double __nv_ldexp(double __a, int __b);\n"
3344"__device__ float __nv_ldexpf(float __a, int __b);\n"
3345"__device__ double __nv_lgamma(double __a);\n"
3346"__device__ float __nv_lgammaf(float __a);\n"
3347"__device__ double __nv_ll2double_rd(long long __a);\n"
3348"__device__ double __nv_ll2double_rn(long long __a);\n"
3349"__device__ double __nv_ll2double_ru(long long __a);\n"
3350"__device__ double __nv_ll2double_rz(long long __a);\n"
3351"__device__ float __nv_ll2float_rd(long long __a);\n"
3352"__device__ float __nv_ll2float_rn(long long __a);\n"
3353"__device__ float __nv_ll2float_ru(long long __a);\n"
3354"__device__ float __nv_ll2float_rz(long long __a);\n"
3355"__device__ long long __nv_llabs(long long __a);\n"
3356"__device__ long long __nv_llmax(long long __a, long long __b);\n"
3357"__device__ long long __nv_llmin(long long __a, long long __b);\n"
3358"__device__ long long __nv_llrint(double __a);\n"
3359"__device__ long long __nv_llrintf(float __a);\n"
3360"__device__ long long __nv_llround(double __a);\n"
3361"__device__ long long __nv_llroundf(float __a);\n"
3362"__device__ double __nv_log10(double __a);\n"
3363"__device__ float __nv_log10f(float __a);\n"
3364"__device__ double __nv_log1p(double __a);\n"
3365"__device__ float __nv_log1pf(float __a);\n"
3366"__device__ double __nv_log2(double __a);\n"
3367"__device__ float __nv_log2f(float __a);\n"
3368"__device__ double __nv_logb(double __a);\n"
3369"__device__ float __nv_logbf(float __a);\n"
3370"__device__ double __nv_log(double __a);\n"
3371"__device__ float __nv_logf(float __a);\n"
3372"__device__ double __nv_longlong_as_double(long long __a);\n"
3373"__device__ int __nv_max(int __a, int __b);\n"
3374"__device__ int __nv_min(int __a, int __b);\n"
3375"__device__ double __nv_modf(double __a, double *__b);\n"
3376"__device__ float __nv_modff(float __a, float *__b);\n"
3377"__device__ int __nv_mul24(int __a, int __b);\n"
3378"__device__ long long __nv_mul64hi(long long __a, long long __b);\n"
3379"__device__ int __nv_mulhi(int __a, int __b);\n"
3380"__device__ double __nv_nan(const signed char *__a);\n"
3381"__device__ float __nv_nanf(const signed char *__a);\n"
3382"__device__ double __nv_nearbyint(double __a);\n"
3383"__device__ float __nv_nearbyintf(float __a);\n"
3384"__device__ double __nv_nextafter(double __a, double __b);\n"
3385"__device__ float __nv_nextafterf(float __a, float __b);\n"
3386"__device__ double __nv_norm3d(double __a, double __b, double __c);\n"
3387"__device__ float __nv_norm3df(float __a, float __b, float __c);\n"
3388"__device__ double __nv_norm4d(double __a, double __b, double __c, double __d);\n"
3389"__device__ float __nv_norm4df(float __a, float __b, float __c, float __d);\n"
3390"__device__ double __nv_normcdf(double __a);\n"
3391"__device__ float __nv_normcdff(float __a);\n"
3392"__device__ double __nv_normcdfinv(double __a);\n"
3393"__device__ float __nv_normcdfinvf(float __a);\n"
3394"__device__ float __nv_normf(int __a, const float *__b);\n"
3395"__device__ double __nv_norm(int __a, const double *__b);\n"
3396"__device__ int __nv_popc(int __a);\n"
3397"__device__ int __nv_popcll(long long __a);\n"
3398"__device__ double __nv_pow(double __a, double __b);\n"
3399"__device__ float __nv_powf(float __a, float __b);\n"
3400"__device__ double __nv_powi(double __a, int __b);\n"
3401"__device__ float __nv_powif(float __a, int __b);\n"
3402"__device__ double __nv_rcbrt(double __a);\n"
3403"__device__ float __nv_rcbrtf(float __a);\n"
3404"__device__ double __nv_rcp64h(double __a);\n"
3405"__device__ double __nv_remainder(double __a, double __b);\n"
3406"__device__ float __nv_remainderf(float __a, float __b);\n"
3407"__device__ double __nv_remquo(double __a, double __b, int *__c);\n"
3408"__device__ float __nv_remquof(float __a, float __b, int *__c);\n"
3409"__device__ int __nv_rhadd(int __a, int __b);\n"
3410"__device__ double __nv_rhypot(double __a, double __b);\n"
3411"__device__ float __nv_rhypotf(float __a, float __b);\n"
3412"__device__ double __nv_rint(double __a);\n"
3413"__device__ float __nv_rintf(float __a);\n"
3414"__device__ double __nv_rnorm3d(double __a, double __b, double __c);\n"
3415"__device__ float __nv_rnorm3df(float __a, float __b, float __c);\n"
3416"__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);\n"
3417"__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);\n"
3418"__device__ float __nv_rnormf(int __a, const float *__b);\n"
3419"__device__ double __nv_rnorm(int __a, const double *__b);\n"
3420"__device__ double __nv_round(double __a);\n"
3421"__device__ float __nv_roundf(float __a);\n"
3422"__device__ double __nv_rsqrt(double __a);\n"
3423"__device__ float __nv_rsqrtf(float __a);\n"
3424"__device__ int __nv_sad(int __a, int __b, int __c);\n"
3425"__device__ float __nv_saturatef(float __a);\n"
3426"__device__ double __nv_scalbn(double __a, int __b);\n"
3427"__device__ float __nv_scalbnf(float __a, int __b);\n"
3428"__device__ int __nv_signbitd(double __a);\n"
3429"__device__ int __nv_signbitf(float __a);\n"
3430"__device__ void __nv_sincos(double __a, double *__b, double *__c);\n"
3431"__device__ void __nv_sincosf(float __a, float *__b, float *__c);\n"
3432"__device__ void __nv_sincospi(double __a, double *__b, double *__c);\n"
3433"__device__ void __nv_sincospif(float __a, float *__b, float *__c);\n"
3434"__device__ double __nv_sin(double __a);\n"
3435"__device__ float __nv_sinf(float __a);\n"
3436"__device__ double __nv_sinh(double __a);\n"
3437"__device__ float __nv_sinhf(float __a);\n"
3438"__device__ double __nv_sinpi(double __a);\n"
3439"__device__ float __nv_sinpif(float __a);\n"
3440"__device__ double __nv_sqrt(double __a);\n"
3441"__device__ float __nv_sqrtf(float __a);\n"
3442"__device__ double __nv_tan(double __a);\n"
3443"__device__ float __nv_tanf(float __a);\n"
3444"__device__ double __nv_tanh(double __a);\n"
3445"__device__ float __nv_tanhf(float __a);\n"
3446"__device__ double __nv_tgamma(double __a);\n"
3447"__device__ float __nv_tgammaf(float __a);\n"
3448"__device__ double __nv_trunc(double __a);\n"
3449"__device__ float __nv_truncf(float __a);\n"
3450"__device__ int __nv_uhadd(unsigned int __a, unsigned int __b);\n"
3451"__device__ double __nv_uint2double_rn(unsigned int __i);\n"
3452"__device__ float __nv_uint2float_rd(unsigned int __a);\n"
3453"__device__ float __nv_uint2float_rn(unsigned int __a);\n"
3454"__device__ float __nv_uint2float_ru(unsigned int __a);\n"
3455"__device__ float __nv_uint2float_rz(unsigned int __a);\n"
3456"__device__ float __nv_uint_as_float(unsigned int __a);\n"
3457"__device__ double __nv_ull2double_rd(unsigned long long __a);\n"
3458"__device__ double __nv_ull2double_rn(unsigned long long __a);\n"
3459"__device__ double __nv_ull2double_ru(unsigned long long __a);\n"
3460"__device__ double __nv_ull2double_rz(unsigned long long __a);\n"
3461"__device__ float __nv_ull2float_rd(unsigned long long __a);\n"
3462"__device__ float __nv_ull2float_rn(unsigned long long __a);\n"
3463"__device__ float __nv_ull2float_ru(unsigned long long __a);\n"
3464"__device__ float __nv_ull2float_rz(unsigned long long __a);\n"
3465"__device__ unsigned long long __nv_ullmax(unsigned long long __a,\n"
3466" unsigned long long __b);\n"
3467"__device__ unsigned long long __nv_ullmin(unsigned long long __a,\n"
3468" unsigned long long __b);\n"
3469"__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);\n"
3470"__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);\n"
3471"__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);\n"
3472"__device__ unsigned long long __nv_umul64hi(unsigned long long __a,\n"
3473" unsigned long long __b);\n"
3474"__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);\n"
3475"__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);\n"
3476"__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,\n"
3477" unsigned int __c);\n"
3478"#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n"
3479"__device__ int __nv_vabs2(int __a);\n"
3480"__device__ int __nv_vabs4(int __a);\n"
3481"__device__ int __nv_vabsdiffs2(int __a, int __b);\n"
3482"__device__ int __nv_vabsdiffs4(int __a, int __b);\n"
3483"__device__ int __nv_vabsdiffu2(int __a, int __b);\n"
3484"__device__ int __nv_vabsdiffu4(int __a, int __b);\n"
3485"__device__ int __nv_vabsss2(int __a);\n"
3486"__device__ int __nv_vabsss4(int __a);\n"
3487"__device__ int __nv_vadd2(int __a, int __b);\n"
3488"__device__ int __nv_vadd4(int __a, int __b);\n"
3489"__device__ int __nv_vaddss2(int __a, int __b);\n"
3490"__device__ int __nv_vaddss4(int __a, int __b);\n"
3491"__device__ int __nv_vaddus2(int __a, int __b);\n"
3492"__device__ int __nv_vaddus4(int __a, int __b);\n"
3493"__device__ int __nv_vavgs2(int __a, int __b);\n"
3494"__device__ int __nv_vavgs4(int __a, int __b);\n"
3495"__device__ int __nv_vavgu2(int __a, int __b);\n"
3496"__device__ int __nv_vavgu4(int __a, int __b);\n"
3497"__device__ int __nv_vcmpeq2(int __a, int __b);\n"
3498"__device__ int __nv_vcmpeq4(int __a, int __b);\n"
3499"__device__ int __nv_vcmpges2(int __a, int __b);\n"
3500"__device__ int __nv_vcmpges4(int __a, int __b);\n"
3501"__device__ int __nv_vcmpgeu2(int __a, int __b);\n"
3502"__device__ int __nv_vcmpgeu4(int __a, int __b);\n"
3503"__device__ int __nv_vcmpgts2(int __a, int __b);\n"
3504"__device__ int __nv_vcmpgts4(int __a, int __b);\n"
3505"__device__ int __nv_vcmpgtu2(int __a, int __b);\n"
3506"__device__ int __nv_vcmpgtu4(int __a, int __b);\n"
3507"__device__ int __nv_vcmples2(int __a, int __b);\n"
3508"__device__ int __nv_vcmples4(int __a, int __b);\n"
3509"__device__ int __nv_vcmpleu2(int __a, int __b);\n"
3510"__device__ int __nv_vcmpleu4(int __a, int __b);\n"
3511"__device__ int __nv_vcmplts2(int __a, int __b);\n"
3512"__device__ int __nv_vcmplts4(int __a, int __b);\n"
3513"__device__ int __nv_vcmpltu2(int __a, int __b);\n"
3514"__device__ int __nv_vcmpltu4(int __a, int __b);\n"
3515"__device__ int __nv_vcmpne2(int __a, int __b);\n"
3516"__device__ int __nv_vcmpne4(int __a, int __b);\n"
3517"__device__ int __nv_vhaddu2(int __a, int __b);\n"
3518"__device__ int __nv_vhaddu4(int __a, int __b);\n"
3519"__device__ int __nv_vmaxs2(int __a, int __b);\n"
3520"__device__ int __nv_vmaxs4(int __a, int __b);\n"
3521"__device__ int __nv_vmaxu2(int __a, int __b);\n"
3522"__device__ int __nv_vmaxu4(int __a, int __b);\n"
3523"__device__ int __nv_vmins2(int __a, int __b);\n"
3524"__device__ int __nv_vmins4(int __a, int __b);\n"
3525"__device__ int __nv_vminu2(int __a, int __b);\n"
3526"__device__ int __nv_vminu4(int __a, int __b);\n"
3527"__device__ int __nv_vneg2(int __a);\n"
3528"__device__ int __nv_vneg4(int __a);\n"
3529"__device__ int __nv_vnegss2(int __a);\n"
3530"__device__ int __nv_vnegss4(int __a);\n"
3531"__device__ int __nv_vsads2(int __a, int __b);\n"
3532"__device__ int __nv_vsads4(int __a, int __b);\n"
3533"__device__ int __nv_vsadu2(int __a, int __b);\n"
3534"__device__ int __nv_vsadu4(int __a, int __b);\n"
3535"__device__ int __nv_vseteq2(int __a, int __b);\n"
3536"__device__ int __nv_vseteq4(int __a, int __b);\n"
3537"__device__ int __nv_vsetges2(int __a, int __b);\n"
3538"__device__ int __nv_vsetges4(int __a, int __b);\n"
3539"__device__ int __nv_vsetgeu2(int __a, int __b);\n"
3540"__device__ int __nv_vsetgeu4(int __a, int __b);\n"
3541"__device__ int __nv_vsetgts2(int __a, int __b);\n"
3542"__device__ int __nv_vsetgts4(int __a, int __b);\n"
3543"__device__ int __nv_vsetgtu2(int __a, int __b);\n"
3544"__device__ int __nv_vsetgtu4(int __a, int __b);\n"
3545"__device__ int __nv_vsetles2(int __a, int __b);\n"
3546"__device__ int __nv_vsetles4(int __a, int __b);\n"
3547"__device__ int __nv_vsetleu2(int __a, int __b);\n"
3548"__device__ int __nv_vsetleu4(int __a, int __b);\n"
3549"__device__ int __nv_vsetlts2(int __a, int __b);\n"
3550"__device__ int __nv_vsetlts4(int __a, int __b);\n"
3551"__device__ int __nv_vsetltu2(int __a, int __b);\n"
3552"__device__ int __nv_vsetltu4(int __a, int __b);\n"
3553"__device__ int __nv_vsetne2(int __a, int __b);\n"
3554"__device__ int __nv_vsetne4(int __a, int __b);\n"
3555"__device__ int __nv_vsub2(int __a, int __b);\n"
3556"__device__ int __nv_vsub4(int __a, int __b);\n"
3557"__device__ int __nv_vsubss2(int __a, int __b);\n"
3558"__device__ int __nv_vsubss4(int __a, int __b);\n"
3559"__device__ int __nv_vsubus2(int __a, int __b);\n"
3560"__device__ int __nv_vsubus4(int __a, int __b);\n"
3561"#endif // CUDA_VERSION\n"
3562"__device__ double __nv_y0(double __a);\n"
3563"__device__ float __nv_y0f(float __a);\n"
3564"__device__ double __nv_y1(double __a);\n"
3565"__device__ float __nv_y1f(float __a);\n"
3566"__device__ float __nv_ynf(int __a, float __b);\n"
3567"__device__ double __nv_yn(int __a, double __b);\n"
3568"} // extern \"C\"\n"
3569"#endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n"
3570"" } ,
3571 { "/builtins/__clang_cuda_math_forward_declares.h" , "/*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --===\n"
3572" *\n"
3573" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
3574" * of this software and associated documentation files (the \"Software\"), to deal\n"
3575" * in the Software without restriction, including without limitation the rights\n"
3576" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
3577" * copies of the Software, and to permit persons to whom the Software is\n"
3578" * furnished to do so, subject to the following conditions:\n"
3579" *\n"
3580" * The above copyright notice and this permission notice shall be included in\n"
3581" * all copies or substantial portions of the Software.\n"
3582" *\n"
3583" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
3584" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
3585" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
3586" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
3587" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
3588" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
3589" * THE SOFTWARE.\n"
3590" *\n"
3591" *===-----------------------------------------------------------------------===\n"
3592" */\n"
3593"#ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n"
3594"#define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n"
3595"#ifndef __CUDA__\n"
3596"#error \"This file is for CUDA compilation only.\"\n"
3597"#endif\n"
3598"\n"
3599"// This file forward-declares of some math functions we (or the CUDA headers)\n"
3600"// will define later. We need to do this, and do it before cmath is included,\n"
3601"// because the standard library may have constexpr math functions. In the\n"
3602"// absence of a prior __device__ decl, those constexpr functions may become\n"
3603"// implicitly host+device. host+device functions can't be overloaded, so that\n"
3604"// would preclude the use of our own __device__ overloads for these functions.\n"
3605"\n"
3606"#pragma push_macro(\"__DEVICE__\")\n"
3607"#define __DEVICE__ \\\n"
3608" static __inline__ __attribute__((always_inline)) __attribute__((device))\n"
3609"\n"
3610"__DEVICE__ double abs(double);\n"
3611"__DEVICE__ float abs(float);\n"
3612"__DEVICE__ int abs(int);\n"
3613"__DEVICE__ long abs(long);\n"
3614"__DEVICE__ long long abs(long long);\n"
3615"__DEVICE__ double acos(double);\n"
3616"__DEVICE__ float acos(float);\n"
3617"__DEVICE__ double acosh(double);\n"
3618"__DEVICE__ float acosh(float);\n"
3619"__DEVICE__ double asin(double);\n"
3620"__DEVICE__ float asin(float);\n"
3621"__DEVICE__ double asinh(double);\n"
3622"__DEVICE__ float asinh(float);\n"
3623"__DEVICE__ double atan2(double, double);\n"
3624"__DEVICE__ float atan2(float, float);\n"
3625"__DEVICE__ double atan(double);\n"
3626"__DEVICE__ float atan(float);\n"
3627"__DEVICE__ double atanh(double);\n"
3628"__DEVICE__ float atanh(float);\n"
3629"__DEVICE__ double cbrt(double);\n"
3630"__DEVICE__ float cbrt(float);\n"
3631"__DEVICE__ double ceil(double);\n"
3632"__DEVICE__ float ceil(float);\n"
3633"__DEVICE__ double copysign(double, double);\n"
3634"__DEVICE__ float copysign(float, float);\n"
3635"__DEVICE__ double cos(double);\n"
3636"__DEVICE__ float cos(float);\n"
3637"__DEVICE__ double cosh(double);\n"
3638"__DEVICE__ float cosh(float);\n"
3639"__DEVICE__ double erfc(double);\n"
3640"__DEVICE__ float erfc(float);\n"
3641"__DEVICE__ double erf(double);\n"
3642"__DEVICE__ float erf(float);\n"
3643"__DEVICE__ double exp2(double);\n"
3644"__DEVICE__ float exp2(float);\n"
3645"__DEVICE__ double exp(double);\n"
3646"__DEVICE__ float exp(float);\n"
3647"__DEVICE__ double expm1(double);\n"
3648"__DEVICE__ float expm1(float);\n"
3649"__DEVICE__ double fabs(double);\n"
3650"__DEVICE__ float fabs(float);\n"
3651"__DEVICE__ double fdim(double, double);\n"
3652"__DEVICE__ float fdim(float, float);\n"
3653"__DEVICE__ double floor(double);\n"
3654"__DEVICE__ float floor(float);\n"
3655"__DEVICE__ double fma(double, double, double);\n"
3656"__DEVICE__ float fma(float, float, float);\n"
3657"__DEVICE__ double fmax(double, double);\n"
3658"__DEVICE__ float fmax(float, float);\n"
3659"__DEVICE__ double fmin(double, double);\n"
3660"__DEVICE__ float fmin(float, float);\n"
3661"__DEVICE__ double fmod(double, double);\n"
3662"__DEVICE__ float fmod(float, float);\n"
3663"__DEVICE__ int fpclassify(double);\n"
3664"__DEVICE__ int fpclassify(float);\n"
3665"__DEVICE__ double frexp(double, int *);\n"
3666"__DEVICE__ float frexp(float, int *);\n"
3667"__DEVICE__ double hypot(double, double);\n"
3668"__DEVICE__ float hypot(float, float);\n"
3669"__DEVICE__ int ilogb(double);\n"
3670"__DEVICE__ int ilogb(float);\n"
3671"__DEVICE__ bool isfinite(double);\n"
3672"__DEVICE__ bool isfinite(float);\n"
3673"__DEVICE__ bool isgreater(double, double);\n"
3674"__DEVICE__ bool isgreaterequal(double, double);\n"
3675"__DEVICE__ bool isgreaterequal(float, float);\n"
3676"__DEVICE__ bool isgreater(float, float);\n"
3677"__DEVICE__ bool isinf(double);\n"
3678"__DEVICE__ bool isinf(float);\n"
3679"__DEVICE__ bool isless(double, double);\n"
3680"__DEVICE__ bool islessequal(double, double);\n"
3681"__DEVICE__ bool islessequal(float, float);\n"
3682"__DEVICE__ bool isless(float, float);\n"
3683"__DEVICE__ bool islessgreater(double, double);\n"
3684"__DEVICE__ bool islessgreater(float, float);\n"
3685"__DEVICE__ bool isnan(double);\n"
3686"__DEVICE__ bool isnan(float);\n"
3687"__DEVICE__ bool isnormal(double);\n"
3688"__DEVICE__ bool isnormal(float);\n"
3689"__DEVICE__ bool isunordered(double, double);\n"
3690"__DEVICE__ bool isunordered(float, float);\n"
3691"__DEVICE__ long labs(long);\n"
3692"__DEVICE__ double ldexp(double, int);\n"
3693"__DEVICE__ float ldexp(float, int);\n"
3694"__DEVICE__ double lgamma(double);\n"
3695"__DEVICE__ float lgamma(float);\n"
3696"__DEVICE__ long long llabs(long long);\n"
3697"__DEVICE__ long long llrint(double);\n"
3698"__DEVICE__ long long llrint(float);\n"
3699"__DEVICE__ double log10(double);\n"
3700"__DEVICE__ float log10(float);\n"
3701"__DEVICE__ double log1p(double);\n"
3702"__DEVICE__ float log1p(float);\n"
3703"__DEVICE__ double log2(double);\n"
3704"__DEVICE__ float log2(float);\n"
3705"__DEVICE__ double logb(double);\n"
3706"__DEVICE__ float logb(float);\n"
3707"__DEVICE__ double log(double);\n"
3708"__DEVICE__ float log(float);\n"
3709"__DEVICE__ long lrint(double);\n"
3710"__DEVICE__ long lrint(float);\n"
3711"__DEVICE__ long lround(double);\n"
3712"__DEVICE__ long lround(float);\n"
3713"__DEVICE__ long long llround(float); // No llround(double).\n"
3714"__DEVICE__ double modf(double, double *);\n"
3715"__DEVICE__ float modf(float, float *);\n"
3716"__DEVICE__ double nan(const char *);\n"
3717"__DEVICE__ float nanf(const char *);\n"
3718"__DEVICE__ double nearbyint(double);\n"
3719"__DEVICE__ float nearbyint(float);\n"
3720"__DEVICE__ double nextafter(double, double);\n"
3721"__DEVICE__ float nextafter(float, float);\n"
3722"__DEVICE__ double pow(double, double);\n"
3723"__DEVICE__ double pow(double, int);\n"
3724"__DEVICE__ float pow(float, float);\n"
3725"__DEVICE__ float pow(float, int);\n"
3726"__DEVICE__ double remainder(double, double);\n"
3727"__DEVICE__ float remainder(float, float);\n"
3728"__DEVICE__ double remquo(double, double, int *);\n"
3729"__DEVICE__ float remquo(float, float, int *);\n"
3730"__DEVICE__ double rint(double);\n"
3731"__DEVICE__ float rint(float);\n"
3732"__DEVICE__ double round(double);\n"
3733"__DEVICE__ float round(float);\n"
3734"__DEVICE__ double scalbln(double, long);\n"
3735"__DEVICE__ float scalbln(float, long);\n"
3736"__DEVICE__ double scalbn(double, int);\n"
3737"__DEVICE__ float scalbn(float, int);\n"
3738"__DEVICE__ bool signbit(double);\n"
3739"__DEVICE__ bool signbit(float);\n"
3740"__DEVICE__ double sin(double);\n"
3741"__DEVICE__ float sin(float);\n"
3742"__DEVICE__ double sinh(double);\n"
3743"__DEVICE__ float sinh(float);\n"
3744"__DEVICE__ double sqrt(double);\n"
3745"__DEVICE__ float sqrt(float);\n"
3746"__DEVICE__ double tan(double);\n"
3747"__DEVICE__ float tan(float);\n"
3748"__DEVICE__ double tanh(double);\n"
3749"__DEVICE__ float tanh(float);\n"
3750"__DEVICE__ double tgamma(double);\n"
3751"__DEVICE__ float tgamma(float);\n"
3752"__DEVICE__ double trunc(double);\n"
3753"__DEVICE__ float trunc(float);\n"
3754"\n"
3755"// Notably missing above is nexttoward, which we don't define on\n"
3756"// the device side because libdevice doesn't give us an implementation, and we\n"
3757"// don't want to be in the business of writing one ourselves.\n"
3758"\n"
3759"// We need to define these overloads in exactly the namespace our standard\n"
3760"// library uses (including the right inline namespace), otherwise they won't be\n"
3761"// picked up by other functions in the standard library (e.g. functions in\n"
3762"// <complex>). Thus the ugliness below.\n"
3763"#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n"
3764"_LIBCPP_BEGIN_NAMESPACE_STD\n"
3765"#else\n"
3766"namespace std {\n"
3767"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
3768"_GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
3769"#endif\n"
3770"#endif\n"
3771"\n"
3772"using ::abs;\n"
3773"using ::acos;\n"
3774"using ::acosh;\n"
3775"using ::asin;\n"
3776"using ::asinh;\n"
3777"using ::atan;\n"
3778"using ::atan2;\n"
3779"using ::atanh;\n"
3780"using ::cbrt;\n"
3781"using ::ceil;\n"
3782"using ::copysign;\n"
3783"using ::cos;\n"
3784"using ::cosh;\n"
3785"using ::erf;\n"
3786"using ::erfc;\n"
3787"using ::exp;\n"
3788"using ::exp2;\n"
3789"using ::expm1;\n"
3790"using ::fabs;\n"
3791"using ::fdim;\n"
3792"using ::floor;\n"
3793"using ::fma;\n"
3794"using ::fmax;\n"
3795"using ::fmin;\n"
3796"using ::fmod;\n"
3797"using ::fpclassify;\n"
3798"using ::frexp;\n"
3799"using ::hypot;\n"
3800"using ::ilogb;\n"
3801"using ::isfinite;\n"
3802"using ::isgreater;\n"
3803"using ::isgreaterequal;\n"
3804"using ::isinf;\n"
3805"using ::isless;\n"
3806"using ::islessequal;\n"
3807"using ::islessgreater;\n"
3808"using ::isnan;\n"
3809"using ::isnormal;\n"
3810"using ::isunordered;\n"
3811"using ::labs;\n"
3812"using ::ldexp;\n"
3813"using ::lgamma;\n"
3814"using ::llabs;\n"
3815"using ::llrint;\n"
3816"using ::log;\n"
3817"using ::log10;\n"
3818"using ::log1p;\n"
3819"using ::log2;\n"
3820"using ::logb;\n"
3821"using ::lrint;\n"
3822"using ::lround;\n"
3823"using ::llround;\n"
3824"using ::modf;\n"
3825"using ::nan;\n"
3826"using ::nanf;\n"
3827"using ::nearbyint;\n"
3828"using ::nextafter;\n"
3829"using ::pow;\n"
3830"using ::remainder;\n"
3831"using ::remquo;\n"
3832"using ::rint;\n"
3833"using ::round;\n"
3834"using ::scalbln;\n"
3835"using ::scalbn;\n"
3836"using ::signbit;\n"
3837"using ::sin;\n"
3838"using ::sinh;\n"
3839"using ::sqrt;\n"
3840"using ::tan;\n"
3841"using ::tanh;\n"
3842"using ::tgamma;\n"
3843"using ::trunc;\n"
3844"\n"
3845"#ifdef _LIBCPP_END_NAMESPACE_STD\n"
3846"_LIBCPP_END_NAMESPACE_STD\n"
3847"#else\n"
3848"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
3849"_GLIBCXX_END_NAMESPACE_VERSION\n"
3850"#endif\n"
3851"} // namespace std\n"
3852"#endif\n"
3853"\n"
3854"#pragma pop_macro(\"__DEVICE__\")\n"
3855"\n"
3856"#endif\n"
3857"" } ,
3858 { "/builtins/__clang_cuda_runtime_wrapper.h" , "/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===\n"
3859" *\n"
3860" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
3861" * of this software and associated documentation files (the \"Software\"), to deal\n"
3862" * in the Software without restriction, including without limitation the rights\n"
3863" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
3864" * copies of the Software, and to permit persons to whom the Software is\n"
3865" * furnished to do so, subject to the following conditions:\n"
3866" *\n"
3867" * The above copyright notice and this permission notice shall be included in\n"
3868" * all copies or substantial portions of the Software.\n"
3869" *\n"
3870" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
3871" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
3872" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
3873" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
3874" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
3875" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
3876" * THE SOFTWARE.\n"
3877" *\n"
3878" *===-----------------------------------------------------------------------===\n"
3879" */\n"
3880"\n"
3881"/*\n"
3882" * WARNING: This header is intended to be directly -include'd by\n"
3883" * the compiler and is not supposed to be included by users.\n"
3884" *\n"
3885" * CUDA headers are implemented in a way that currently makes it\n"
3886" * impossible for user code to #include directly when compiling with\n"
3887" * Clang. They present different view of CUDA-supplied functions\n"
3888" * depending on where in NVCC's compilation pipeline the headers are\n"
3889" * included. Neither of these modes provides function definitions with\n"
3890" * correct attributes, so we use preprocessor to force the headers\n"
3891" * into a form that Clang can use.\n"
3892" *\n"
3893" * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's\n"
3894" * this file during every CUDA compilation.\n"
3895" */\n"
3896"\n"
3897"#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__\n"
3898"#define __CLANG_CUDA_RUNTIME_WRAPPER_H__\n"
3899"\n"
3900"#if defined(__CUDA__) && defined(__clang__)\n"
3901"\n"
3902"// Include some forward declares that must come before cmath.\n"
3903"#include <__clang_cuda_math_forward_declares.h>\n"
3904"\n"
3905"// Include some standard headers to avoid CUDA headers including them\n"
3906"// while some required macros (like __THROW) are in a weird state.\n"
3907"#include <cmath>\n"
3908"#include <cstdlib>\n"
3909"#include <stdlib.h>\n"
3910"\n"
3911"// Preserve common macros that will be changed below by us or by CUDA\n"
3912"// headers.\n"
3913"#pragma push_macro(\"__THROW\")\n"
3914"#pragma push_macro(\"__CUDA_ARCH__\")\n"
3915"\n"
3916"// WARNING: Preprocessor hacks below are based on specific details of\n"
3917"// CUDA-7.x headers and are not expected to work with any other\n"
3918"// version of CUDA headers.\n"
3919"#include \"cuda.h\"\n"
3920"#if !defined(CUDA_VERSION)\n"
3921"#error \"cuda.h did not define CUDA_VERSION\"\n"
3922"#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000\n"
3923"#error \"Unsupported CUDA version!\"\n"
3924"#endif\n"
3925"\n"
3926"#pragma push_macro(\"__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__\")\n"
3927"#if CUDA_VERSION >= 10000\n"
3928"#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__\n"
3929"#endif\n"
3930"\n"
3931"// Make largest subset of device functions available during host\n"
3932"// compilation -- SM_35 for the time being.\n"
3933"#ifndef __CUDA_ARCH__\n"
3934"#define __CUDA_ARCH__ 350\n"
3935"#endif\n"
3936"\n"
3937"#include \"__clang_cuda_builtin_vars.h\"\n"
3938"\n"
3939"// No need for device_launch_parameters.h as __clang_cuda_builtin_vars.h above\n"
3940"// has taken care of builtin variables declared in the file.\n"
3941"#define __DEVICE_LAUNCH_PARAMETERS_H__\n"
3942"\n"
3943"// {math,device}_functions.h only have declarations of the\n"
3944"// functions. We don't need them as we're going to pull in their\n"
3945"// definitions from .hpp files.\n"
3946"#define __DEVICE_FUNCTIONS_H__\n"
3947"#define __MATH_FUNCTIONS_H__\n"
3948"#define __COMMON_FUNCTIONS_H__\n"
3949"// device_functions_decls is replaced by __clang_cuda_device_functions.h\n"
3950"// included below.\n"
3951"#define __DEVICE_FUNCTIONS_DECLS_H__\n"
3952"\n"
3953"#undef __CUDACC__\n"
3954"#if CUDA_VERSION < 9000\n"
3955"#define __CUDABE__\n"
3956"#else\n"
3957"#define __CUDA_LIBDEVICE__\n"
3958"#endif\n"
3959"// Disables definitions of device-side runtime support stubs in\n"
3960"// cuda_device_runtime_api.h\n"
3961"#include \"driver_types.h\"\n"
3962"#include \"host_config.h\"\n"
3963"#include \"host_defines.h\"\n"
3964"\n"
3965"// Temporarily replace \"nv_weak\" with weak, so __attribute__((nv_weak)) in\n"
3966"// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the\n"
3967"// functional equivalent of what we need.\n"
3968"#pragma push_macro(\"nv_weak\")\n"
3969"#define nv_weak weak\n"
3970"#undef __CUDABE__\n"
3971"#undef __CUDA_LIBDEVICE__\n"
3972"#define __CUDACC__\n"
3973"#include \"cuda_runtime.h\"\n"
3974"\n"
3975"#pragma pop_macro(\"nv_weak\")\n"
3976"#undef __CUDACC__\n"
3977"#define __CUDABE__\n"
3978"\n"
3979"// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does\n"
3980"// not have at the moment. Emulate them with a builtin memcpy/memset.\n"
3981"#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)\n"
3982"#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)\n"
3983"\n"
3984"#if CUDA_VERSION < 9000\n"
3985"#include \"crt/device_runtime.h\"\n"
3986"#endif\n"
3987"#include \"crt/host_runtime.h\"\n"
3988"// device_runtime.h defines __cxa_* macros that will conflict with\n"
3989"// cxxabi.h.\n"
3990"// FIXME: redefine these as __device__ functions.\n"
3991"#undef __cxa_vec_ctor\n"
3992"#undef __cxa_vec_cctor\n"
3993"#undef __cxa_vec_dtor\n"
3994"#undef __cxa_vec_new\n"
3995"#undef __cxa_vec_new2\n"
3996"#undef __cxa_vec_new3\n"
3997"#undef __cxa_vec_delete2\n"
3998"#undef __cxa_vec_delete\n"
3999"#undef __cxa_vec_delete3\n"
4000"#undef __cxa_pure_virtual\n"
4001"\n"
4002"// math_functions.hpp expects this host function be defined on MacOS, but it\n"
4003"// ends up not being there because of the games we play here. Just define it\n"
4004"// ourselves; it's simple enough.\n"
4005"#ifdef __APPLE__\n"
4006"inline __host__ double __signbitd(double x) {\n"
4007" return std::signbit(x);\n"
4008"}\n"
4009"#endif\n"
4010"\n"
4011"// CUDA 9.1 no longer provides declarations for libdevice functions, so we need\n"
4012"// to provide our own.\n"
4013"#include <__clang_cuda_libdevice_declares.h>\n"
4014"\n"
4015"// Wrappers for many device-side standard library functions became compiler\n"
4016"// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now\n"
4017"// provides its own implementation of the wrappers.\n"
4018"#if CUDA_VERSION >= 9000\n"
4019"#include <__clang_cuda_device_functions.h>\n"
4020"#endif\n"
4021"\n"
4022"// __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's\n"
4023"// counterpart does not do it, so we need to make it empty here to keep\n"
4024"// following CUDA includes happy.\n"
4025"#undef __THROW\n"
4026"#define __THROW\n"
4027"\n"
4028"// CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.\n"
4029"// Previous versions used to check whether they are defined or not.\n"
4030"// CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it\n"
4031"// here to detect the switch.\n"
4032"\n"
4033"#if defined(CU_DEVICE_INVALID)\n"
4034"#if !defined(__USE_FAST_MATH__)\n"
4035"#define __USE_FAST_MATH__ 0\n"
4036"#endif\n"
4037"\n"
4038"#if !defined(__CUDA_PREC_DIV)\n"
4039"#define __CUDA_PREC_DIV 0\n"
4040"#endif\n"
4041"#endif\n"
4042"\n"
4043"// Temporarily poison __host__ macro to ensure it's not used by any of\n"
4044"// the headers we're about to include.\n"
4045"#pragma push_macro(\"__host__\")\n"
4046"#define __host__ UNEXPECTED_HOST_ATTRIBUTE\n"
4047"\n"
4048"// device_functions.hpp and math_functions*.hpp use 'static\n"
4049"// __forceinline__' (with no __device__) for definitions of device\n"
4050"// functions. Temporarily redefine __forceinline__ to include\n"
4051"// __device__.\n"
4052"#pragma push_macro(\"__forceinline__\")\n"
4053"#define __forceinline__ __device__ __inline__ __attribute__((always_inline))\n"
4054"#if CUDA_VERSION < 9000\n"
4055"#include \"device_functions.hpp\"\n"
4056"#endif\n"
4057"\n"
4058"// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we\n"
4059"// get the slow-but-accurate or fast-but-inaccurate versions of functions like\n"
4060"// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.\n"
4061"//\n"
4062"// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.\n"
4063"// slow divides), so we need to scope our define carefully here.\n"
4064"#pragma push_macro(\"__USE_FAST_MATH__\")\n"
4065"#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n"
4066"#define __USE_FAST_MATH__ 1\n"
4067"#endif\n"
4068"\n"
4069"#if CUDA_VERSION >= 9000\n"
4070"// CUDA-9.2 needs host-side memcpy for some host functions in\n"
4071"// device_functions.hpp\n"
4072"#if CUDA_VERSION >= 9020\n"
4073"#include <string.h>\n"
4074"#endif\n"
4075"#include \"crt/math_functions.hpp\"\n"
4076"#else\n"
4077"#include \"math_functions.hpp\"\n"
4078"#endif\n"
4079"\n"
4080"#pragma pop_macro(\"__USE_FAST_MATH__\")\n"
4081"\n"
4082"#if CUDA_VERSION < 9000\n"
4083"#include \"math_functions_dbl_ptx3.hpp\"\n"
4084"#endif\n"
4085"#pragma pop_macro(\"__forceinline__\")\n"
4086"\n"
4087"// Pull in host-only functions that are only available when neither\n"
4088"// __CUDACC__ nor __CUDABE__ are defined.\n"
4089"#undef __MATH_FUNCTIONS_HPP__\n"
4090"#undef __CUDABE__\n"
4091"#if CUDA_VERSION < 9000\n"
4092"#include \"math_functions.hpp\"\n"
4093"#endif\n"
4094"// Alas, additional overloads for these functions are hard to get to.\n"
4095"// Considering that we only need these overloads for a few functions,\n"
4096"// we can provide them here.\n"
4097"static inline float rsqrt(float __a) { return rsqrtf(__a); }\n"
4098"static inline float rcbrt(float __a) { return rcbrtf(__a); }\n"
4099"static inline float sinpi(float __a) { return sinpif(__a); }\n"
4100"static inline float cospi(float __a) { return cospif(__a); }\n"
4101"static inline void sincospi(float __a, float *__b, float *__c) {\n"
4102" return sincospif(__a, __b, __c);\n"
4103"}\n"
4104"static inline float erfcinv(float __a) { return erfcinvf(__a); }\n"
4105"static inline float normcdfinv(float __a) { return normcdfinvf(__a); }\n"
4106"static inline float normcdf(float __a) { return normcdff(__a); }\n"
4107"static inline float erfcx(float __a) { return erfcxf(__a); }\n"
4108"\n"
4109"#if CUDA_VERSION < 9000\n"
4110"// For some reason single-argument variant is not always declared by\n"
4111"// CUDA headers. Alas, device_functions.hpp included below needs it.\n"
4112"static inline __device__ void __brkpt(int __c) { __brkpt(); }\n"
4113"#endif\n"
4114"\n"
4115"// Now include *.hpp with definitions of various GPU functions. Alas,\n"
4116"// a lot of thins get declared/defined with __host__ attribute which\n"
4117"// we don't want and we have to define it out. We also have to include\n"
4118"// {device,math}_functions.hpp again in order to extract the other\n"
4119"// branch of #if/else inside.\n"
4120"#define __host__\n"
4121"#undef __CUDABE__\n"
4122"#define __CUDACC__\n"
4123"#if CUDA_VERSION >= 9000\n"
4124"// Some atomic functions became compiler builtins in CUDA-9 , so we need their\n"
4125"// declarations.\n"
4126"#include \"device_atomic_functions.h\"\n"
4127"#endif\n"
4128"#undef __DEVICE_FUNCTIONS_HPP__\n"
4129"#include \"device_atomic_functions.hpp\"\n"
4130"#if CUDA_VERSION >= 9000\n"
4131"#include \"crt/device_functions.hpp\"\n"
4132"#include \"crt/device_double_functions.hpp\"\n"
4133"#else\n"
4134"#include \"device_functions.hpp\"\n"
4135"#define __CUDABE__\n"
4136"#include \"device_double_functions.h\"\n"
4137"#undef __CUDABE__\n"
4138"#endif\n"
4139"#include \"sm_20_atomic_functions.hpp\"\n"
4140"#include \"sm_20_intrinsics.hpp\"\n"
4141"#include \"sm_32_atomic_functions.hpp\"\n"
4142"\n"
4143"// Don't include sm_30_intrinsics.h and sm_32_intrinsics.h. These define the\n"
4144"// __shfl and __ldg intrinsics using inline (volatile) asm, but we want to\n"
4145"// define them using builtins so that the optimizer can reason about and across\n"
4146"// these instructions. In particular, using intrinsics for ldg gets us the\n"
4147"// [addr+imm] addressing mode, which, although it doesn't actually exist in the\n"
4148"// hardware, seems to generate faster machine code because ptxas can more easily\n"
4149"// reason about our code.\n"
4150"\n"
4151"#if CUDA_VERSION >= 8000\n"
4152"#pragma push_macro(\"__CUDA_ARCH__\")\n"
4153"#undef __CUDA_ARCH__\n"
4154"#include \"sm_60_atomic_functions.hpp\"\n"
4155"#include \"sm_61_intrinsics.hpp\"\n"
4156"#pragma pop_macro(\"__CUDA_ARCH__\")\n"
4157"#endif\n"
4158"\n"
4159"#undef __MATH_FUNCTIONS_HPP__\n"
4160"\n"
4161"// math_functions.hpp defines ::signbit as a __host__ __device__ function. This\n"
4162"// conflicts with libstdc++'s constexpr ::signbit, so we have to rename\n"
4163"// math_function.hpp's ::signbit. It's guarded by #undef signbit, but that's\n"
4164"// conditional on __GNUC__. :)\n"
4165"#pragma push_macro(\"signbit\")\n"
4166"#pragma push_macro(\"__GNUC__\")\n"
4167"#undef __GNUC__\n"
4168"#define signbit __ignored_cuda_signbit\n"
4169"\n"
4170"// CUDA-9 omits device-side definitions of some math functions if it sees\n"
4171"// include guard from math.h wrapper from libstdc++. We have to undo the header\n"
4172"// guard temporarily to get the definitions we need.\n"
4173"#pragma push_macro(\"_GLIBCXX_MATH_H\")\n"
4174"#pragma push_macro(\"_LIBCPP_VERSION\")\n"
4175"#if CUDA_VERSION >= 9000\n"
4176"#undef _GLIBCXX_MATH_H\n"
4177"// We also need to undo another guard that checks for libc++ 3.8+\n"
4178"#ifdef _LIBCPP_VERSION\n"
4179"#define _LIBCPP_VERSION 3700\n"
4180"#endif\n"
4181"#endif\n"
4182"\n"
4183"#if CUDA_VERSION >= 9000\n"
4184"#include \"crt/math_functions.hpp\"\n"
4185"#else\n"
4186"#include \"math_functions.hpp\"\n"
4187"#endif\n"
4188"#pragma pop_macro(\"_GLIBCXX_MATH_H\")\n"
4189"#pragma pop_macro(\"_LIBCPP_VERSION\")\n"
4190"#pragma pop_macro(\"__GNUC__\")\n"
4191"#pragma pop_macro(\"signbit\")\n"
4192"\n"
4193"#pragma pop_macro(\"__host__\")\n"
4194"\n"
4195"#include \"texture_indirect_functions.h\"\n"
4196"\n"
4197"// Restore state of __CUDA_ARCH__ and __THROW we had on entry.\n"
4198"#pragma pop_macro(\"__CUDA_ARCH__\")\n"
4199"#pragma pop_macro(\"__THROW\")\n"
4200"\n"
4201"// Set up compiler macros expected to be seen during compilation.\n"
4202"#undef __CUDABE__\n"
4203"#define __CUDACC__\n"
4204"\n"
4205"extern \"C\" {\n"
4206"// Device-side CUDA system calls.\n"
4207"// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls\n"
4208"// We need these declarations and wrappers for device-side\n"
4209"// malloc/free/printf calls to work without relying on\n"
4210"// -fcuda-disable-target-call-checks option.\n"
4211"__device__ int vprintf(const char *, const char *);\n"
4212"__device__ void free(void *) __attribute((nothrow));\n"
4213"__device__ void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc));\n"
4214"__device__ void __assertfail(const char *__message, const char *__file,\n"
4215" unsigned __line, const char *__function,\n"
4216" size_t __charSize) __attribute__((noreturn));\n"
4217"\n"
4218"// In order for standard assert() macro on linux to work we need to\n"
4219"// provide device-side __assert_fail()\n"
4220"__device__ static inline void __assert_fail(const char *__message,\n"
4221" const char *__file, unsigned __line,\n"
4222" const char *__function) {\n"
4223" __assertfail(__message, __file, __line, __function, sizeof(char));\n"
4224"}\n"
4225"\n"
4226"// Clang will convert printf into vprintf, but we still need\n"
4227"// device-side declaration for it.\n"
4228"__device__ int printf(const char *, ...);\n"
4229"} // extern \"C\"\n"
4230"\n"
4231"// We also need device-side std::malloc and std::free.\n"
4232"namespace std {\n"
4233"__device__ static inline void free(void *__ptr) { ::free(__ptr); }\n"
4234"__device__ static inline void *malloc(size_t __size) {\n"
4235" return ::malloc(__size);\n"
4236"}\n"
4237"} // namespace std\n"
4238"\n"
4239"// Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to\n"
4240"// come after we've pulled in the definition of uint3 and dim3.\n"
4241"\n"
4242"__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {\n"
4243" uint3 ret;\n"
4244" ret.x = x;\n"
4245" ret.y = y;\n"
4246" ret.z = z;\n"
4247" return ret;\n"
4248"}\n"
4249"\n"
4250"__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {\n"
4251" uint3 ret;\n"
4252" ret.x = x;\n"
4253" ret.y = y;\n"
4254" ret.z = z;\n"
4255" return ret;\n"
4256"}\n"
4257"\n"
4258"__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {\n"
4259" return dim3(x, y, z);\n"
4260"}\n"
4261"\n"
4262"__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {\n"
4263" return dim3(x, y, z);\n"
4264"}\n"
4265"\n"
4266"#include <__clang_cuda_cmath.h>\n"
4267"#include <__clang_cuda_intrinsics.h>\n"
4268"#include <__clang_cuda_complex_builtins.h>\n"
4269"\n"
4270"// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host\n"
4271"// mode, giving them their \"proper\" types of dim3 and uint3. This is\n"
4272"// incompatible with the types we give in __clang_cuda_builtin_vars.h. As as\n"
4273"// hack, force-include the header (nvcc doesn't include it by default) but\n"
4274"// redefine dim3 and uint3 to our builtin types. (Thankfully dim3 and uint3 are\n"
4275"// only used here for the redeclarations of blockDim and threadIdx.)\n"
4276"#pragma push_macro(\"dim3\")\n"
4277"#pragma push_macro(\"uint3\")\n"
4278"#define dim3 __cuda_builtin_blockDim_t\n"
4279"#define uint3 __cuda_builtin_threadIdx_t\n"
4280"#include \"curand_mtgp32_kernel.h\"\n"
4281"#pragma pop_macro(\"dim3\")\n"
4282"#pragma pop_macro(\"uint3\")\n"
4283"#pragma pop_macro(\"__USE_FAST_MATH__\")\n"
4284"#pragma pop_macro(\"__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__\")\n"
4285"\n"
4286"#endif // __CUDA__\n"
4287"#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__\n"
4288"" } ,
4289 { "/builtins/__stddef_max_align_t.h" , "/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===\n"
4290" *\n"
4291" * Copyright (c) 2014 Chandler Carruth\n"
4292" *\n"
4293" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4294" * of this software and associated documentation files (the \"Software\"), to deal\n"
4295" * in the Software without restriction, including without limitation the rights\n"
4296" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4297" * copies of the Software, and to permit persons to whom the Software is\n"
4298" * furnished to do so, subject to the following conditions:\n"
4299" *\n"
4300" * The above copyright notice and this permission notice shall be included in\n"
4301" * all copies or substantial portions of the Software.\n"
4302" *\n"
4303" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4304" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4305" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4306" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4307" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4308" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4309" * THE SOFTWARE.\n"
4310" *\n"
4311" *===-----------------------------------------------------------------------===\n"
4312" */\n"
4313"\n"
4314"#ifndef __CLANG_MAX_ALIGN_T_DEFINED\n"
4315"#define __CLANG_MAX_ALIGN_T_DEFINED\n"
4316"\n"
4317"#if defined(_MSC_VER)\n"
4318"typedef double max_align_t;\n"
4319"#elif defined(__APPLE__)\n"
4320"typedef long double max_align_t;\n"
4321"#else\n"
4322"// Define 'max_align_t' to match the GCC definition.\n"
4323"typedef struct {\n"
4324" long long __clang_max_align_nonce1\n"
4325" __attribute__((__aligned__(__alignof__(long long))));\n"
4326" long double __clang_max_align_nonce2\n"
4327" __attribute__((__aligned__(__alignof__(long double))));\n"
4328"} max_align_t;\n"
4329"#endif\n"
4330"\n"
4331"#endif\n"
4332"" } ,
4333 { "/builtins/__wmmintrin_aes.h" , "/*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------===\n"
4334" *\n"
4335" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4336" * of this software and associated documentation files (the \"Software\"), to deal\n"
4337" * in the Software without restriction, including without limitation the rights\n"
4338" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4339" * copies of the Software, and to permit persons to whom the Software is\n"
4340" * furnished to do so, subject to the following conditions:\n"
4341" *\n"
4342" * The above copyright notice and this permission notice shall be included in\n"
4343" * all copies or substantial portions of the Software.\n"
4344" *\n"
4345" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4346" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4347" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4348" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4349" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4350" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4351" * THE SOFTWARE.\n"
4352" *\n"
4353" *===-----------------------------------------------------------------------===\n"
4354" */\n"
4355"\n"
4356"#ifndef __WMMINTRIN_H\n"
4357"#error \"Never use <__wmmintrin_aes.h> directly; include <wmmintrin.h> instead.\"\n"
4358"#endif\n"
4359"\n"
4360"#ifndef __WMMINTRIN_AES_H\n"
4361"#define __WMMINTRIN_AES_H\n"
4362"\n"
4363"/* Define the default attributes for the functions in this file. */\n"
4364"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"aes\"), __min_vector_width__(128)))\n"
4365"\n"
4366"/// Performs a single round of AES encryption using the Equivalent\n"
4367"/// Inverse Cipher, transforming the state value from the first source\n"
4368"/// operand using a 128-bit round key value contained in the second source\n"
4369"/// operand, and writes the result to the destination.\n"
4370"///\n"
4371"/// \\headerfile <x86intrin.h>\n"
4372"///\n"
4373"/// This intrinsic corresponds to the <c> VAESENC </c> instruction.\n"
4374"///\n"
4375"/// \\param __V\n"
4376"/// A 128-bit integer vector containing the state value.\n"
4377"/// \\param __R\n"
4378"/// A 128-bit integer vector containing the round key value.\n"
4379"/// \\returns A 128-bit integer vector containing the encrypted value.\n"
4380"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4381"_mm_aesenc_si128(__m128i __V, __m128i __R)\n"
4382"{\n"
4383" return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R);\n"
4384"}\n"
4385"\n"
4386"/// Performs the final round of AES encryption using the Equivalent\n"
4387"/// Inverse Cipher, transforming the state value from the first source\n"
4388"/// operand using a 128-bit round key value contained in the second source\n"
4389"/// operand, and writes the result to the destination.\n"
4390"///\n"
4391"/// \\headerfile <x86intrin.h>\n"
4392"///\n"
4393"/// This intrinsic corresponds to the <c> VAESENCLAST </c> instruction.\n"
4394"///\n"
4395"/// \\param __V\n"
4396"/// A 128-bit integer vector containing the state value.\n"
4397"/// \\param __R\n"
4398"/// A 128-bit integer vector containing the round key value.\n"
4399"/// \\returns A 128-bit integer vector containing the encrypted value.\n"
4400"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4401"_mm_aesenclast_si128(__m128i __V, __m128i __R)\n"
4402"{\n"
4403" return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R);\n"
4404"}\n"
4405"\n"
4406"/// Performs a single round of AES decryption using the Equivalent\n"
4407"/// Inverse Cipher, transforming the state value from the first source\n"
4408"/// operand using a 128-bit round key value contained in the second source\n"
4409"/// operand, and writes the result to the destination.\n"
4410"///\n"
4411"/// \\headerfile <x86intrin.h>\n"
4412"///\n"
4413"/// This intrinsic corresponds to the <c> VAESDEC </c> instruction.\n"
4414"///\n"
4415"/// \\param __V\n"
4416"/// A 128-bit integer vector containing the state value.\n"
4417"/// \\param __R\n"
4418"/// A 128-bit integer vector containing the round key value.\n"
4419"/// \\returns A 128-bit integer vector containing the decrypted value.\n"
4420"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4421"_mm_aesdec_si128(__m128i __V, __m128i __R)\n"
4422"{\n"
4423" return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R);\n"
4424"}\n"
4425"\n"
4426"/// Performs the final round of AES decryption using the Equivalent\n"
4427"/// Inverse Cipher, transforming the state value from the first source\n"
4428"/// operand using a 128-bit round key value contained in the second source\n"
4429"/// operand, and writes the result to the destination.\n"
4430"///\n"
4431"/// \\headerfile <x86intrin.h>\n"
4432"///\n"
4433"/// This intrinsic corresponds to the <c> VAESDECLAST </c> instruction.\n"
4434"///\n"
4435"/// \\param __V\n"
4436"/// A 128-bit integer vector containing the state value.\n"
4437"/// \\param __R\n"
4438"/// A 128-bit integer vector containing the round key value.\n"
4439"/// \\returns A 128-bit integer vector containing the decrypted value.\n"
4440"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4441"_mm_aesdeclast_si128(__m128i __V, __m128i __R)\n"
4442"{\n"
4443" return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R);\n"
4444"}\n"
4445"\n"
4446"/// Applies the AES InvMixColumns() transformation to an expanded key\n"
4447"/// contained in the source operand, and writes the result to the\n"
4448"/// destination.\n"
4449"///\n"
4450"/// \\headerfile <x86intrin.h>\n"
4451"///\n"
4452"/// This intrinsic corresponds to the <c> VAESIMC </c> instruction.\n"
4453"///\n"
4454"/// \\param __V\n"
4455"/// A 128-bit integer vector containing the expanded key.\n"
4456"/// \\returns A 128-bit integer vector containing the transformed value.\n"
4457"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4458"_mm_aesimc_si128(__m128i __V)\n"
4459"{\n"
4460" return (__m128i)__builtin_ia32_aesimc128((__v2di)__V);\n"
4461"}\n"
4462"\n"
4463"/// Generates a round key for AES encryption, operating on 128-bit data\n"
4464"/// specified in the first source operand and using an 8-bit round constant\n"
4465"/// specified by the second source operand, and writes the result to the\n"
4466"/// destination.\n"
4467"///\n"
4468"/// \\headerfile <x86intrin.h>\n"
4469"///\n"
4470"/// \\code\n"
4471"/// __m128i _mm_aeskeygenassist_si128(__m128i C, const int R);\n"
4472"/// \\endcode\n"
4473"///\n"
4474"/// This intrinsic corresponds to the <c> AESKEYGENASSIST </c> instruction.\n"
4475"///\n"
4476"/// \\param C\n"
4477"/// A 128-bit integer vector that is used to generate the AES encryption key.\n"
4478"/// \\param R\n"
4479"/// An 8-bit round constant used to generate the AES encryption key.\n"
4480"/// \\returns A 128-bit round key for AES encryption.\n"
4481"#define _mm_aeskeygenassist_si128(C, R) \\\n"
4482" (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))\n"
4483"\n"
4484"#undef __DEFAULT_FN_ATTRS\n"
4485"\n"
4486"#endif /* __WMMINTRIN_AES_H */\n"
4487"" } ,
4488 { "/builtins/__wmmintrin_pclmul.h" , "/*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------===\n"
4489" *\n"
4490" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4491" * of this software and associated documentation files (the \"Software\"), to deal\n"
4492" * in the Software without restriction, including without limitation the rights\n"
4493" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4494" * copies of the Software, and to permit persons to whom the Software is\n"
4495" * furnished to do so, subject to the following conditions:\n"
4496" *\n"
4497" * The above copyright notice and this permission notice shall be included in\n"
4498" * all copies or substantial portions of the Software.\n"
4499" *\n"
4500" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4501" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4502" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4503" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4504" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4505" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4506" * THE SOFTWARE.\n"
4507" *\n"
4508" *===-----------------------------------------------------------------------===\n"
4509" */\n"
4510"\n"
4511"#ifndef __WMMINTRIN_H\n"
4512"#error \"Never use <__wmmintrin_pclmul.h> directly; include <wmmintrin.h> instead.\"\n"
4513"#endif\n"
4514"\n"
4515"#ifndef __WMMINTRIN_PCLMUL_H\n"
4516"#define __WMMINTRIN_PCLMUL_H\n"
4517"\n"
4518"/// Multiplies two 64-bit integer values, which are selected from source\n"
4519"/// operands using the immediate-value operand. The multiplication is a\n"
4520"/// carry-less multiplication, and the 128-bit integer product is stored in\n"
4521"/// the destination.\n"
4522"///\n"
4523"/// \\headerfile <x86intrin.h>\n"
4524"///\n"
4525"/// \\code\n"
4526"/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);\n"
4527"/// \\endcode\n"
4528"///\n"
4529"/// This intrinsic corresponds to the <c> VPCLMULQDQ </c> instruction.\n"
4530"///\n"
4531"/// \\param __X\n"
4532"/// A 128-bit vector of [2 x i64] containing one of the source operands.\n"
4533"/// \\param __Y\n"
4534"/// A 128-bit vector of [2 x i64] containing one of the source operands.\n"
4535"/// \\param __I\n"
4536"/// An immediate value specifying which 64-bit values to select from the\n"
4537"/// operands. Bit 0 is used to select a value from operand \\a __X, and bit\n"
4538"/// 4 is used to select a value from operand \\a __Y: \\n\n"
4539"/// Bit[0]=0 indicates that bits[63:0] of operand \\a __X are used. \\n\n"
4540"/// Bit[0]=1 indicates that bits[127:64] of operand \\a __X are used. \\n\n"
4541"/// Bit[4]=0 indicates that bits[63:0] of operand \\a __Y are used. \\n\n"
4542"/// Bit[4]=1 indicates that bits[127:64] of operand \\a __Y are used.\n"
4543"/// \\returns The 128-bit integer vector containing the result of the carry-less\n"
4544"/// multiplication of the selected 64-bit values.\n"
4545"#define _mm_clmulepi64_si128(X, Y, I) \\\n"
4546" ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \\\n"
4547" (__v2di)(__m128i)(Y), (char)(I)))\n"
4548"\n"
4549"#endif /* __WMMINTRIN_PCLMUL_H */\n"
4550"" } ,
4551 { "/builtins/adxintrin.h" , "/*===---- adxintrin.h - ADX intrinsics -------------------------------------===\n"
4552" *\n"
4553" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4554" * of this software and associated documentation files (the \"Software\"), to deal\n"
4555" * in the Software without restriction, including without limitation the rights\n"
4556" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4557" * copies of the Software, and to permit persons to whom the Software is\n"
4558" * furnished to do so, subject to the following conditions:\n"
4559" *\n"
4560" * The above copyright notice and this permission notice shall be included in\n"
4561" * all copies or substantial portions of the Software.\n"
4562" *\n"
4563" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4564" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4565" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4566" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4567" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4568" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4569" * THE SOFTWARE.\n"
4570" *\n"
4571" *===-----------------------------------------------------------------------===\n"
4572" */\n"
4573"\n"
4574"#ifndef __IMMINTRIN_H\n"
4575"#error \"Never use <adxintrin.h> directly; include <immintrin.h> instead.\"\n"
4576"#endif\n"
4577"\n"
4578"#ifndef __ADXINTRIN_H\n"
4579"#define __ADXINTRIN_H\n"
4580"\n"
4581"/* Define the default attributes for the functions in this file. */\n"
4582"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
4583"\n"
4584"/* Intrinsics that are available only if __ADX__ defined */\n"
4585"static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n"
4586"_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n"
4587" unsigned int *__p)\n"
4588"{\n"
4589" return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);\n"
4590"}\n"
4591"\n"
4592"#ifdef __x86_64__\n"
4593"static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n"
4594"_addcarryx_u64(unsigned char __cf, unsigned long long __x,\n"
4595" unsigned long long __y, unsigned long long *__p)\n"
4596"{\n"
4597" return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);\n"
4598"}\n"
4599"#endif\n"
4600"\n"
4601"/* Intrinsics that are also available if __ADX__ undefined */\n"
4602"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4603"_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n"
4604" unsigned int *__p)\n"
4605"{\n"
4606" return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);\n"
4607"}\n"
4608"\n"
4609"#ifdef __x86_64__\n"
4610"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4611"_addcarry_u64(unsigned char __cf, unsigned long long __x,\n"
4612" unsigned long long __y, unsigned long long *__p)\n"
4613"{\n"
4614" return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);\n"
4615"}\n"
4616"#endif\n"
4617"\n"
4618"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4619"_subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n"
4620" unsigned int *__p)\n"
4621"{\n"
4622" return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);\n"
4623"}\n"
4624"\n"
4625"#ifdef __x86_64__\n"
4626"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4627"_subborrow_u64(unsigned char __cf, unsigned long long __x,\n"
4628" unsigned long long __y, unsigned long long *__p)\n"
4629"{\n"
4630" return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);\n"
4631"}\n"
4632"#endif\n"
4633"\n"
4634"#undef __DEFAULT_FN_ATTRS\n"
4635"\n"
4636"#endif /* __ADXINTRIN_H */\n"
4637"" } ,
4638 { "/builtins/ammintrin.h" , "/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===\n"
4639" *\n"
4640" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4641" * of this software and associated documentation files (the \"Software\"), to deal\n"
4642" * in the Software without restriction, including without limitation the rights\n"
4643" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4644" * copies of the Software, and to permit persons to whom the Software is\n"
4645" * furnished to do so, subject to the following conditions:\n"
4646" *\n"
4647" * The above copyright notice and this permission notice shall be included in\n"
4648" * all copies or substantial portions of the Software.\n"
4649" *\n"
4650" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4651" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4652" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4653" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4654" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4655" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4656" * THE SOFTWARE.\n"
4657" *\n"
4658" *===-----------------------------------------------------------------------===\n"
4659" */\n"
4660"\n"
4661"#ifndef __AMMINTRIN_H\n"
4662"#define __AMMINTRIN_H\n"
4663"\n"
4664"#include <pmmintrin.h>\n"
4665"\n"
4666"/* Define the default attributes for the functions in this file. */\n"
4667"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4a\"), __min_vector_width__(128)))\n"
4668"\n"
4669"/// Extracts the specified bits from the lower 64 bits of the 128-bit\n"
4670"/// integer vector operand at the index \\a idx and of the length \\a len.\n"
4671"///\n"
4672"/// \\headerfile <x86intrin.h>\n"
4673"///\n"
4674"/// \\code\n"
4675"/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);\n"
4676"/// \\endcode\n"
4677"///\n"
4678"/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n"
4679"///\n"
4680"/// \\param x\n"
4681"/// The value from which bits are extracted.\n"
4682"/// \\param len\n"
4683"/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n"
4684"/// are zero, the length is interpreted as 64.\n"
4685"/// \\param idx\n"
4686"/// Bits [5:0] specify the index of the least significant bit; the other\n"
4687"/// bits are ignored. If the sum of the index and length is greater than 64,\n"
4688"/// the result is undefined. If the length and index are both zero, bits\n"
4689"/// [63:0] of parameter \\a x are extracted. If the length is zero but the\n"
4690"/// index is non-zero, the result is undefined.\n"
4691"/// \\returns A 128-bit integer vector whose lower 64 bits contain the bits\n"
4692"/// extracted from the source operand.\n"
4693"#define _mm_extracti_si64(x, len, idx) \\\n"
4694" ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \\\n"
4695" (char)(len), (char)(idx)))\n"
4696"\n"
4697"/// Extracts the specified bits from the lower 64 bits of the 128-bit\n"
4698"/// integer vector operand at the index and of the length specified by\n"
4699"/// \\a __y.\n"
4700"///\n"
4701"/// \\headerfile <x86intrin.h>\n"
4702"///\n"
4703"/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n"
4704"///\n"
4705"/// \\param __x\n"
4706"/// The value from which bits are extracted.\n"
4707"/// \\param __y\n"
4708"/// Specifies the index of the least significant bit at [13:8] and the\n"
4709"/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the\n"
4710"/// length is interpreted as 64. If the sum of the index and length is\n"
4711"/// greater than 64, the result is undefined. If the length and index are\n"
4712"/// both zero, bits [63:0] of parameter \\a __x are extracted. If the length\n"
4713"/// is zero but the index is non-zero, the result is undefined.\n"
4714"/// \\returns A 128-bit vector whose lower 64 bits contain the bits extracted\n"
4715"/// from the source operand.\n"
4716"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4717"_mm_extract_si64(__m128i __x, __m128i __y)\n"
4718"{\n"
4719" return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);\n"
4720"}\n"
4721"\n"
4722"/// Inserts bits of a specified length from the source integer vector\n"
4723"/// \\a y into the lower 64 bits of the destination integer vector \\a x at\n"
4724"/// the index \\a idx and of the length \\a len.\n"
4725"///\n"
4726"/// \\headerfile <x86intrin.h>\n"
4727"///\n"
4728"/// \\code\n"
4729"/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,\n"
4730"/// const int idx);\n"
4731"/// \\endcode\n"
4732"///\n"
4733"/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n"
4734"///\n"
4735"/// \\param x\n"
4736"/// The destination operand where bits will be inserted. The inserted bits\n"
4737"/// are defined by the length \\a len and by the index \\a idx specifying the\n"
4738"/// least significant bit.\n"
4739"/// \\param y\n"
4740"/// The source operand containing the bits to be extracted. The extracted\n"
4741"/// bits are the least significant bits of operand \\a y of length \\a len.\n"
4742"/// \\param len\n"
4743"/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n"
4744"/// are zero, the length is interpreted as 64.\n"
4745"/// \\param idx\n"
4746"/// Bits [5:0] specify the index of the least significant bit; the other\n"
4747"/// bits are ignored. If the sum of the index and length is greater than 64,\n"
4748"/// the result is undefined. If the length and index are both zero, bits\n"
4749"/// [63:0] of parameter \\a y are inserted into parameter \\a x. If the length\n"
4750"/// is zero but the index is non-zero, the result is undefined.\n"
4751"/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n"
4752"/// destination operand \\a x with the specified bitfields replaced by the\n"
4753"/// lower bits of source operand \\a y. The upper 64 bits of the return value\n"
4754"/// are undefined.\n"
4755"#define _mm_inserti_si64(x, y, len, idx) \\\n"
4756" ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \\\n"
4757" (__v2di)(__m128i)(y), \\\n"
4758" (char)(len), (char)(idx)))\n"
4759"\n"
4760"/// Inserts bits of a specified length from the source integer vector\n"
4761"/// \\a __y into the lower 64 bits of the destination integer vector \\a __x\n"
4762"/// at the index and of the length specified by \\a __y.\n"
4763"///\n"
4764"/// \\headerfile <x86intrin.h>\n"
4765"///\n"
4766"/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n"
4767"///\n"
4768"/// \\param __x\n"
4769"/// The destination operand where bits will be inserted. The inserted bits\n"
4770"/// are defined by the length and by the index of the least significant bit\n"
4771"/// specified by operand \\a __y.\n"
4772"/// \\param __y\n"
4773"/// The source operand containing the bits to be extracted. The extracted\n"
4774"/// bits are the least significant bits of operand \\a __y with length\n"
4775"/// specified by bits [69:64]. These are inserted into the destination at the\n"
4776"/// index specified by bits [77:72]; all other bits are ignored. If bits\n"
4777"/// [69:64] are zero, the length is interpreted as 64. If the sum of the\n"
4778"/// index and length is greater than 64, the result is undefined. If the\n"
4779"/// length and index are both zero, bits [63:0] of parameter \\a __y are\n"
4780"/// inserted into parameter \\a __x. If the length is zero but the index is\n"
4781"/// non-zero, the result is undefined.\n"
4782"/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n"
4783"/// destination operand \\a __x with the specified bitfields replaced by the\n"
4784"/// lower bits of source operand \\a __y. The upper 64 bits of the return\n"
4785"/// value are undefined.\n"
4786"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4787"_mm_insert_si64(__m128i __x, __m128i __y)\n"
4788"{\n"
4789" return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);\n"
4790"}\n"
4791"\n"
4792"/// Stores a 64-bit double-precision value in a 64-bit memory location.\n"
4793"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
4794"/// used again soon).\n"
4795"///\n"
4796"/// \\headerfile <x86intrin.h>\n"
4797"///\n"
4798"/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.\n"
4799"///\n"
4800"/// \\param __p\n"
4801"/// The 64-bit memory location used to store the register value.\n"
4802"/// \\param __a\n"
4803"/// The 64-bit double-precision floating-point register value to be stored.\n"
4804"static __inline__ void __DEFAULT_FN_ATTRS\n"
4805"_mm_stream_sd(double *__p, __m128d __a)\n"
4806"{\n"
4807" __builtin_ia32_movntsd(__p, (__v2df)__a);\n"
4808"}\n"
4809"\n"
4810"/// Stores a 32-bit single-precision floating-point value in a 32-bit\n"
4811"/// memory location. To minimize caching, the data is flagged as\n"
4812"/// non-temporal (unlikely to be used again soon).\n"
4813"///\n"
4814"/// \\headerfile <x86intrin.h>\n"
4815"///\n"
4816"/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.\n"
4817"///\n"
4818"/// \\param __p\n"
4819"/// The 32-bit memory location used to store the register value.\n"
4820"/// \\param __a\n"
4821"/// The 32-bit single-precision floating-point register value to be stored.\n"
4822"static __inline__ void __DEFAULT_FN_ATTRS\n"
4823"_mm_stream_ss(float *__p, __m128 __a)\n"
4824"{\n"
4825" __builtin_ia32_movntss(__p, (__v4sf)__a);\n"
4826"}\n"
4827"\n"
4828"#undef __DEFAULT_FN_ATTRS\n"
4829"\n"
4830"#endif /* __AMMINTRIN_H */\n"
4831"" } ,
4832 { "/builtins/arm64intr.h" , "/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===\n"
4833" *\n"
4834" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4835" * of this software and associated documentation files (the \"Software\"), to deal\n"
4836" * in the Software without restriction, including without limitation the rights\n"
4837" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4838" * copies of the Software, and to permit persons to whom the Software is\n"
4839" * furnished to do so, subject to the following conditions:\n"
4840" *\n"
4841" * The above copyright notice and this permission notice shall be included in\n"
4842" * all copies or substantial portions of the Software.\n"
4843" *\n"
4844" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4845" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4846" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4847" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4848" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4849" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4850" * THE SOFTWARE.\n"
4851" *\n"
4852" *===-----------------------------------------------------------------------===\n"
4853" */\n"
4854"\n"
4855"/* Only include this if we're compiling for the windows platform. */\n"
4856"#ifndef _MSC_VER\n"
4857"#include_next <arm64intr.h>\n"
4858"#else\n"
4859"\n"
4860"#ifndef __ARM64INTR_H\n"
4861"#define __ARM64INTR_H\n"
4862"\n"
4863"typedef enum\n"
4864"{\n"
4865" _ARM64_BARRIER_SY = 0xF,\n"
4866" _ARM64_BARRIER_ST = 0xE,\n"
4867" _ARM64_BARRIER_LD = 0xD,\n"
4868" _ARM64_BARRIER_ISH = 0xB,\n"
4869" _ARM64_BARRIER_ISHST = 0xA,\n"
4870" _ARM64_BARRIER_ISHLD = 0x9,\n"
4871" _ARM64_BARRIER_NSH = 0x7,\n"
4872" _ARM64_BARRIER_NSHST = 0x6,\n"
4873" _ARM64_BARRIER_NSHLD = 0x5,\n"
4874" _ARM64_BARRIER_OSH = 0x3,\n"
4875" _ARM64_BARRIER_OSHST = 0x2,\n"
4876" _ARM64_BARRIER_OSHLD = 0x1\n"
4877"} _ARM64INTR_BARRIER_TYPE;\n"
4878"\n"
4879"#endif /* __ARM64INTR_H */\n"
4880"#endif /* _MSC_VER */\n"
4881"" } ,
4882 { "/builtins/arm_acle.h" , "/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===\n"
4883" *\n"
4884" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4885" * of this software and associated documentation files (the \"Software\"), to deal\n"
4886" * in the Software without restriction, including without limitation the rights\n"
4887" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4888" * copies of the Software, and to permit persons to whom the Software is\n"
4889" * furnished to do so, subject to the following conditions:\n"
4890" *\n"
4891" * The above copyright notice and this permission notice shall be included in\n"
4892" * all copies or substantial portions of the Software.\n"
4893" *\n"
4894" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4895" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4896" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4897" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4898" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4899" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4900" * THE SOFTWARE.\n"
4901" *\n"
4902" *===-----------------------------------------------------------------------===\n"
4903" */\n"
4904"\n"
4905"#ifndef __ARM_ACLE_H\n"
4906"#define __ARM_ACLE_H\n"
4907"\n"
4908"#ifndef __ARM_ACLE\n"
4909"#error \"ACLE intrinsics support not enabled.\"\n"
4910"#endif\n"
4911"\n"
4912"#include <stdint.h>\n"
4913"\n"
4914"#if defined(__cplusplus)\n"
4915"extern \"C\" {\n"
4916"#endif\n"
4917"\n"
4918"/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */\n"
4919"/* 8.3 Memory barriers */\n"
4920"#if !defined(_MSC_VER)\n"
4921"#define __dmb(i) __builtin_arm_dmb(i)\n"
4922"#define __dsb(i) __builtin_arm_dsb(i)\n"
4923"#define __isb(i) __builtin_arm_isb(i)\n"
4924"#endif\n"
4925"\n"
4926"/* 8.4 Hints */\n"
4927"\n"
4928"#if !defined(_MSC_VER)\n"
4929"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {\n"
4930" __builtin_arm_wfi();\n"
4931"}\n"
4932"\n"
4933"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {\n"
4934" __builtin_arm_wfe();\n"
4935"}\n"
4936"\n"
4937"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {\n"
4938" __builtin_arm_sev();\n"
4939"}\n"
4940"\n"
4941"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {\n"
4942" __builtin_arm_sevl();\n"
4943"}\n"
4944"\n"
4945"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {\n"
4946" __builtin_arm_yield();\n"
4947"}\n"
4948"#endif\n"
4949"\n"
4950"#if __ARM_32BIT_STATE\n"
4951"#define __dbg(t) __builtin_arm_dbg(t)\n"
4952"#endif\n"
4953"\n"
4954"/* 8.5 Swap */\n"
4955"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
4956"__swp(uint32_t __x, volatile uint32_t *__p) {\n"
4957" uint32_t v;\n"
4958" do\n"
4959" v = __builtin_arm_ldrex(__p);\n"
4960" while (__builtin_arm_strex(__x, __p));\n"
4961" return v;\n"
4962"}\n"
4963"\n"
4964"/* 8.6 Memory prefetch intrinsics */\n"
4965"/* 8.6.1 Data prefetch */\n"
4966"#define __pld(addr) __pldx(0, 0, 0, addr)\n"
4967"\n"
4968"#if __ARM_32BIT_STATE\n"
4969"#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n"
4970" __builtin_arm_prefetch(addr, access_kind, 1)\n"
4971"#else\n"
4972"#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n"
4973" __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)\n"
4974"#endif\n"
4975"\n"
4976"/* 8.6.2 Instruction prefetch */\n"
4977"#define __pli(addr) __plix(0, 0, addr)\n"
4978"\n"
4979"#if __ARM_32BIT_STATE\n"
4980"#define __plix(cache_level, retention_policy, addr) \\\n"
4981" __builtin_arm_prefetch(addr, 0, 0)\n"
4982"#else\n"
4983"#define __plix(cache_level, retention_policy, addr) \\\n"
4984" __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)\n"
4985"#endif\n"
4986"\n"
4987"/* 8.7 NOP */\n"
4988"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {\n"
4989" __builtin_arm_nop();\n"
4990"}\n"
4991"\n"
4992"/* 9 DATA-PROCESSING INTRINSICS */\n"
4993"/* 9.2 Miscellaneous data-processing intrinsics */\n"
4994"/* ROR */\n"
4995"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
4996"__ror(uint32_t __x, uint32_t __y) {\n"
4997" __y %= 32;\n"
4998" if (__y == 0)\n"
4999" return __x;\n"
5000" return (__x >> __y) | (__x << (32 - __y));\n"
5001"}\n"
5002"\n"
5003"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5004"__rorll(uint64_t __x, uint32_t __y) {\n"
5005" __y %= 64;\n"
5006" if (__y == 0)\n"
5007" return __x;\n"
5008" return (__x >> __y) | (__x << (64 - __y));\n"
5009"}\n"
5010"\n"
5011"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5012"__rorl(unsigned long __x, uint32_t __y) {\n"
5013"#if __SIZEOF_LONG__ == 4\n"
5014" return __ror(__x, __y);\n"
5015"#else\n"
5016" return __rorll(__x, __y);\n"
5017"#endif\n"
5018"}\n"
5019"\n"
5020"\n"
5021"/* CLZ */\n"
5022"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5023"__clz(uint32_t __t) {\n"
5024" return __builtin_clz(__t);\n"
5025"}\n"
5026"\n"
5027"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5028"__clzl(unsigned long __t) {\n"
5029" return __builtin_clzl(__t);\n"
5030"}\n"
5031"\n"
5032"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5033"__clzll(uint64_t __t) {\n"
5034" return __builtin_clzll(__t);\n"
5035"}\n"
5036"\n"
5037"/* REV */\n"
5038"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5039"__rev(uint32_t __t) {\n"
5040" return __builtin_bswap32(__t);\n"
5041"}\n"
5042"\n"
5043"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5044"__revl(unsigned long __t) {\n"
5045"#if __SIZEOF_LONG__ == 4\n"
5046" return __builtin_bswap32(__t);\n"
5047"#else\n"
5048" return __builtin_bswap64(__t);\n"
5049"#endif\n"
5050"}\n"
5051"\n"
5052"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5053"__revll(uint64_t __t) {\n"
5054" return __builtin_bswap64(__t);\n"
5055"}\n"
5056"\n"
5057"/* REV16 */\n"
5058"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5059"__rev16(uint32_t __t) {\n"
5060" return __ror(__rev(__t), 16);\n"
5061"}\n"
5062"\n"
5063"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5064"__rev16ll(uint64_t __t) {\n"
5065" return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);\n"
5066"}\n"
5067"\n"
5068"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5069"__rev16l(unsigned long __t) {\n"
5070"#if __SIZEOF_LONG__ == 4\n"
5071" return __rev16(__t);\n"
5072"#else\n"
5073" return __rev16ll(__t);\n"
5074"#endif\n"
5075"}\n"
5076"\n"
5077"/* REVSH */\n"
5078"static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))\n"
5079"__revsh(int16_t __t) {\n"
5080" return __builtin_bswap16(__t);\n"
5081"}\n"
5082"\n"
5083"/* RBIT */\n"
5084"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5085"__rbit(uint32_t __t) {\n"
5086" return __builtin_arm_rbit(__t);\n"
5087"}\n"
5088"\n"
5089"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5090"__rbitll(uint64_t __t) {\n"
5091"#if __ARM_32BIT_STATE\n"
5092" return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |\n"
5093" __builtin_arm_rbit(__t >> 32);\n"
5094"#else\n"
5095" return __builtin_arm_rbit64(__t);\n"
5096"#endif\n"
5097"}\n"
5098"\n"
5099"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5100"__rbitl(unsigned long __t) {\n"
5101"#if __SIZEOF_LONG__ == 4\n"
5102" return __rbit(__t);\n"
5103"#else\n"
5104" return __rbitll(__t);\n"
5105"#endif\n"
5106"}\n"
5107"\n"
5108"/*\n"
5109" * 9.3 16-bit multiplications\n"
5110" */\n"
5111"#if __ARM_FEATURE_DSP\n"
5112"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5113"__smulbb(int32_t __a, int32_t __b) {\n"
5114" return __builtin_arm_smulbb(__a, __b);\n"
5115"}\n"
5116"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5117"__smulbt(int32_t __a, int32_t __b) {\n"
5118" return __builtin_arm_smulbt(__a, __b);\n"
5119"}\n"
5120"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5121"__smultb(int32_t __a, int32_t __b) {\n"
5122" return __builtin_arm_smultb(__a, __b);\n"
5123"}\n"
5124"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5125"__smultt(int32_t __a, int32_t __b) {\n"
5126" return __builtin_arm_smultt(__a, __b);\n"
5127"}\n"
5128"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5129"__smulwb(int32_t __a, int32_t __b) {\n"
5130" return __builtin_arm_smulwb(__a, __b);\n"
5131"}\n"
5132"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5133"__smulwt(int32_t __a, int32_t __b) {\n"
5134" return __builtin_arm_smulwt(__a, __b);\n"
5135"}\n"
5136"#endif\n"
5137"\n"
5138"/*\n"
5139" * 9.4 Saturating intrinsics\n"
5140" *\n"
5141" * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag\n"
5142" * intrinsics are implemented and the flag is enabled.\n"
5143" */\n"
5144"/* 9.4.1 Width-specified saturation intrinsics */\n"
5145"#if __ARM_FEATURE_SAT\n"
5146"#define __ssat(x, y) __builtin_arm_ssat(x, y)\n"
5147"#define __usat(x, y) __builtin_arm_usat(x, y)\n"
5148"#endif\n"
5149"\n"
5150"/* 9.4.2 Saturating addition and subtraction intrinsics */\n"
5151"#if __ARM_FEATURE_DSP\n"
5152"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5153"__qadd(int32_t __t, int32_t __v) {\n"
5154" return __builtin_arm_qadd(__t, __v);\n"
5155"}\n"
5156"\n"
5157"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5158"__qsub(int32_t __t, int32_t __v) {\n"
5159" return __builtin_arm_qsub(__t, __v);\n"
5160"}\n"
5161"\n"
5162"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5163"__qdbl(int32_t __t) {\n"
5164" return __builtin_arm_qadd(__t, __t);\n"
5165"}\n"
5166"#endif\n"
5167"\n"
5168"/* 9.4.3 Accumultating multiplications */\n"
5169"#if __ARM_FEATURE_DSP\n"
5170"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5171"__smlabb(int32_t __a, int32_t __b, int32_t __c) {\n"
5172" return __builtin_arm_smlabb(__a, __b, __c);\n"
5173"}\n"
5174"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5175"__smlabt(int32_t __a, int32_t __b, int32_t __c) {\n"
5176" return __builtin_arm_smlabt(__a, __b, __c);\n"
5177"}\n"
5178"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5179"__smlatb(int32_t __a, int32_t __b, int32_t __c) {\n"
5180" return __builtin_arm_smlatb(__a, __b, __c);\n"
5181"}\n"
5182"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5183"__smlatt(int32_t __a, int32_t __b, int32_t __c) {\n"
5184" return __builtin_arm_smlatt(__a, __b, __c);\n"
5185"}\n"
5186"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5187"__smlawb(int32_t __a, int32_t __b, int32_t __c) {\n"
5188" return __builtin_arm_smlawb(__a, __b, __c);\n"
5189"}\n"
5190"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5191"__smlawt(int32_t __a, int32_t __b, int32_t __c) {\n"
5192" return __builtin_arm_smlawt(__a, __b, __c);\n"
5193"}\n"
5194"#endif\n"
5195"\n"
5196"\n"
5197"/* 9.5.4 Parallel 16-bit saturation */\n"
5198"#if __ARM_FEATURE_SIMD32\n"
5199"#define __ssat16(x, y) __builtin_arm_ssat16(x, y)\n"
5200"#define __usat16(x, y) __builtin_arm_usat16(x, y)\n"
5201"#endif\n"
5202"\n"
5203"/* 9.5.5 Packing and unpacking */\n"
5204"#if __ARM_FEATURE_SIMD32\n"
5205"typedef int32_t int8x4_t;\n"
5206"typedef int32_t int16x2_t;\n"
5207"typedef uint32_t uint8x4_t;\n"
5208"typedef uint32_t uint16x2_t;\n"
5209"\n"
5210"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5211"__sxtab16(int16x2_t __a, int8x4_t __b) {\n"
5212" return __builtin_arm_sxtab16(__a, __b);\n"
5213"}\n"
5214"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5215"__sxtb16(int8x4_t __a) {\n"
5216" return __builtin_arm_sxtb16(__a);\n"
5217"}\n"
5218"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5219"__uxtab16(int16x2_t __a, int8x4_t __b) {\n"
5220" return __builtin_arm_uxtab16(__a, __b);\n"
5221"}\n"
5222"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5223"__uxtb16(int8x4_t __a) {\n"
5224" return __builtin_arm_uxtb16(__a);\n"
5225"}\n"
5226"#endif\n"
5227"\n"
5228"/* 9.5.6 Parallel selection */\n"
5229"#if __ARM_FEATURE_SIMD32\n"
5230"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5231"__sel(uint8x4_t __a, uint8x4_t __b) {\n"
5232" return __builtin_arm_sel(__a, __b);\n"
5233"}\n"
5234"#endif\n"
5235"\n"
5236"/* 9.5.7 Parallel 8-bit addition and subtraction */\n"
5237"#if __ARM_FEATURE_SIMD32\n"
5238"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5239"__qadd8(int8x4_t __a, int8x4_t __b) {\n"
5240" return __builtin_arm_qadd8(__a, __b);\n"
5241"}\n"
5242"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5243"__qsub8(int8x4_t __a, int8x4_t __b) {\n"
5244" return __builtin_arm_qsub8(__a, __b);\n"
5245"}\n"
5246"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5247"__sadd8(int8x4_t __a, int8x4_t __b) {\n"
5248" return __builtin_arm_sadd8(__a, __b);\n"
5249"}\n"
5250"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5251"__shadd8(int8x4_t __a, int8x4_t __b) {\n"
5252" return __builtin_arm_shadd8(__a, __b);\n"
5253"}\n"
5254"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5255"__shsub8(int8x4_t __a, int8x4_t __b) {\n"
5256" return __builtin_arm_shsub8(__a, __b);\n"
5257"}\n"
5258"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5259"__ssub8(int8x4_t __a, int8x4_t __b) {\n"
5260" return __builtin_arm_ssub8(__a, __b);\n"
5261"}\n"
5262"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5263"__uadd8(uint8x4_t __a, uint8x4_t __b) {\n"
5264" return __builtin_arm_uadd8(__a, __b);\n"
5265"}\n"
5266"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5267"__uhadd8(uint8x4_t __a, uint8x4_t __b) {\n"
5268" return __builtin_arm_uhadd8(__a, __b);\n"
5269"}\n"
5270"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5271"__uhsub8(uint8x4_t __a, uint8x4_t __b) {\n"
5272" return __builtin_arm_uhsub8(__a, __b);\n"
5273"}\n"
5274"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5275"__uqadd8(uint8x4_t __a, uint8x4_t __b) {\n"
5276" return __builtin_arm_uqadd8(__a, __b);\n"
5277"}\n"
5278"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5279"__uqsub8(uint8x4_t __a, uint8x4_t __b) {\n"
5280" return __builtin_arm_uqsub8(__a, __b);\n"
5281"}\n"
5282"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5283"__usub8(uint8x4_t __a, uint8x4_t __b) {\n"
5284" return __builtin_arm_usub8(__a, __b);\n"
5285"}\n"
5286"#endif\n"
5287"\n"
5288"/* 9.5.8 Sum of 8-bit absolute differences */\n"
5289"#if __ARM_FEATURE_SIMD32\n"
5290"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5291"__usad8(uint8x4_t __a, uint8x4_t __b) {\n"
5292" return __builtin_arm_usad8(__a, __b);\n"
5293"}\n"
5294"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5295"__usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {\n"
5296" return __builtin_arm_usada8(__a, __b, __c);\n"
5297"}\n"
5298"#endif\n"
5299"\n"
5300"/* 9.5.9 Parallel 16-bit addition and subtraction */\n"
5301"#if __ARM_FEATURE_SIMD32\n"
5302"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5303"__qadd16(int16x2_t __a, int16x2_t __b) {\n"
5304" return __builtin_arm_qadd16(__a, __b);\n"
5305"}\n"
5306"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5307"__qasx(int16x2_t __a, int16x2_t __b) {\n"
5308" return __builtin_arm_qasx(__a, __b);\n"
5309"}\n"
5310"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5311"__qsax(int16x2_t __a, int16x2_t __b) {\n"
5312" return __builtin_arm_qsax(__a, __b);\n"
5313"}\n"
5314"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5315"__qsub16(int16x2_t __a, int16x2_t __b) {\n"
5316" return __builtin_arm_qsub16(__a, __b);\n"
5317"}\n"
5318"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5319"__sadd16(int16x2_t __a, int16x2_t __b) {\n"
5320" return __builtin_arm_sadd16(__a, __b);\n"
5321"}\n"
5322"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5323"__sasx(int16x2_t __a, int16x2_t __b) {\n"
5324" return __builtin_arm_sasx(__a, __b);\n"
5325"}\n"
5326"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5327"__shadd16(int16x2_t __a, int16x2_t __b) {\n"
5328" return __builtin_arm_shadd16(__a, __b);\n"
5329"}\n"
5330"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5331"__shasx(int16x2_t __a, int16x2_t __b) {\n"
5332" return __builtin_arm_shasx(__a, __b);\n"
5333"}\n"
5334"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5335"__shsax(int16x2_t __a, int16x2_t __b) {\n"
5336" return __builtin_arm_shsax(__a, __b);\n"
5337"}\n"
5338"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5339"__shsub16(int16x2_t __a, int16x2_t __b) {\n"
5340" return __builtin_arm_shsub16(__a, __b);\n"
5341"}\n"
5342"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5343"__ssax(int16x2_t __a, int16x2_t __b) {\n"
5344" return __builtin_arm_ssax(__a, __b);\n"
5345"}\n"
5346"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5347"__ssub16(int16x2_t __a, int16x2_t __b) {\n"
5348" return __builtin_arm_ssub16(__a, __b);\n"
5349"}\n"
5350"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5351"__uadd16(uint16x2_t __a, uint16x2_t __b) {\n"
5352" return __builtin_arm_uadd16(__a, __b);\n"
5353"}\n"
5354"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5355"__uasx(uint16x2_t __a, uint16x2_t __b) {\n"
5356" return __builtin_arm_uasx(__a, __b);\n"
5357"}\n"
5358"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5359"__uhadd16(uint16x2_t __a, uint16x2_t __b) {\n"
5360" return __builtin_arm_uhadd16(__a, __b);\n"
5361"}\n"
5362"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5363"__uhasx(uint16x2_t __a, uint16x2_t __b) {\n"
5364" return __builtin_arm_uhasx(__a, __b);\n"
5365"}\n"
5366"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5367"__uhsax(uint16x2_t __a, uint16x2_t __b) {\n"
5368" return __builtin_arm_uhsax(__a, __b);\n"
5369"}\n"
5370"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5371"__uhsub16(uint16x2_t __a, uint16x2_t __b) {\n"
5372" return __builtin_arm_uhsub16(__a, __b);\n"
5373"}\n"
5374"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5375"__uqadd16(uint16x2_t __a, uint16x2_t __b) {\n"
5376" return __builtin_arm_uqadd16(__a, __b);\n"
5377"}\n"
5378"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5379"__uqasx(uint16x2_t __a, uint16x2_t __b) {\n"
5380" return __builtin_arm_uqasx(__a, __b);\n"
5381"}\n"
5382"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5383"__uqsax(uint16x2_t __a, uint16x2_t __b) {\n"
5384" return __builtin_arm_uqsax(__a, __b);\n"
5385"}\n"
5386"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5387"__uqsub16(uint16x2_t __a, uint16x2_t __b) {\n"
5388" return __builtin_arm_uqsub16(__a, __b);\n"
5389"}\n"
5390"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5391"__usax(uint16x2_t __a, uint16x2_t __b) {\n"
5392" return __builtin_arm_usax(__a, __b);\n"
5393"}\n"
5394"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5395"__usub16(uint16x2_t __a, uint16x2_t __b) {\n"
5396" return __builtin_arm_usub16(__a, __b);\n"
5397"}\n"
5398"#endif\n"
5399"\n"
5400"/* 9.5.10 Parallel 16-bit multiplications */\n"
5401"#if __ARM_FEATURE_SIMD32\n"
5402"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5403"__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5404" return __builtin_arm_smlad(__a, __b, __c);\n"
5405"}\n"
5406"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5407"__smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5408" return __builtin_arm_smladx(__a, __b, __c);\n"
5409"}\n"
5410"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5411"__smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5412" return __builtin_arm_smlald(__a, __b, __c);\n"
5413"}\n"
5414"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5415"__smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5416" return __builtin_arm_smlaldx(__a, __b, __c);\n"
5417"}\n"
5418"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5419"__smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5420" return __builtin_arm_smlsd(__a, __b, __c);\n"
5421"}\n"
5422"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5423"__smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5424" return __builtin_arm_smlsdx(__a, __b, __c);\n"
5425"}\n"
5426"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5427"__smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5428" return __builtin_arm_smlsld(__a, __b, __c);\n"
5429"}\n"
5430"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5431"__smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5432" return __builtin_arm_smlsldx(__a, __b, __c);\n"
5433"}\n"
5434"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5435"__smuad(int16x2_t __a, int16x2_t __b) {\n"
5436" return __builtin_arm_smuad(__a, __b);\n"
5437"}\n"
5438"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5439"__smuadx(int16x2_t __a, int16x2_t __b) {\n"
5440" return __builtin_arm_smuadx(__a, __b);\n"
5441"}\n"
5442"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5443"__smusd(int16x2_t __a, int16x2_t __b) {\n"
5444" return __builtin_arm_smusd(__a, __b);\n"
5445"}\n"
5446"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5447"__smusdx(int16x2_t __a, int16x2_t __b) {\n"
5448" return __builtin_arm_smusdx(__a, __b);\n"
5449"}\n"
5450"#endif\n"
5451"\n"
5452"/* 9.7 CRC32 intrinsics */\n"
5453"#if __ARM_FEATURE_CRC32\n"
5454"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5455"__crc32b(uint32_t __a, uint8_t __b) {\n"
5456" return __builtin_arm_crc32b(__a, __b);\n"
5457"}\n"
5458"\n"
5459"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5460"__crc32h(uint32_t __a, uint16_t __b) {\n"
5461" return __builtin_arm_crc32h(__a, __b);\n"
5462"}\n"
5463"\n"
5464"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5465"__crc32w(uint32_t __a, uint32_t __b) {\n"
5466" return __builtin_arm_crc32w(__a, __b);\n"
5467"}\n"
5468"\n"
5469"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5470"__crc32d(uint32_t __a, uint64_t __b) {\n"
5471" return __builtin_arm_crc32d(__a, __b);\n"
5472"}\n"
5473"\n"
5474"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5475"__crc32cb(uint32_t __a, uint8_t __b) {\n"
5476" return __builtin_arm_crc32cb(__a, __b);\n"
5477"}\n"
5478"\n"
5479"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5480"__crc32ch(uint32_t __a, uint16_t __b) {\n"
5481" return __builtin_arm_crc32ch(__a, __b);\n"
5482"}\n"
5483"\n"
5484"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5485"__crc32cw(uint32_t __a, uint32_t __b) {\n"
5486" return __builtin_arm_crc32cw(__a, __b);\n"
5487"}\n"
5488"\n"
5489"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5490"__crc32cd(uint32_t __a, uint64_t __b) {\n"
5491" return __builtin_arm_crc32cd(__a, __b);\n"
5492"}\n"
5493"#endif\n"
5494"\n"
5495"/* 10.1 Special register intrinsics */\n"
5496"#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)\n"
5497"#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)\n"
5498"#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)\n"
5499"#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)\n"
5500"#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)\n"
5501"#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)\n"
5502"\n"
5503"#if defined(__cplusplus)\n"
5504"}\n"
5505"#endif\n"
5506"\n"
5507"#endif /* __ARM_ACLE_H */\n"
5508"" } ,
5509 { "/builtins/arm_fp16.h" , "/*===---- arm_fp16.h - ARM FP16 intrinsics ---------------------------------===\n"
5510" *\n"
5511" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
5512" * of this software and associated documentation files (the \"Software\"), to deal\n"
5513" * in the Software without restriction, including without limitation the rights\n"
5514" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
5515" * copies of the Software, and to permit persons to whom the Software is\n"
5516" * furnished to do so, subject to the following conditions:\n"
5517" *\n"
5518" * The above copyright notice and this permission notice shall be included in\n"
5519" * all copies or substantial portions of the Software.\n"
5520" *\n"
5521" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
5522" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
5523" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
5524" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
5525" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
5526" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
5527" * THE SOFTWARE.\n"
5528" *\n"
5529" *===-----------------------------------------------------------------------===\n"
5530" */\n"
5531"\n"
5532"#ifndef __ARM_FP16_H\n"
5533"#define __ARM_FP16_H\n"
5534"\n"
5535"#include <stdint.h>\n"
5536"\n"
5537"typedef __fp16 float16_t;\n"
5538"#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))\n"
5539"\n"
5540"#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)\n"
5541"#ifdef __LITTLE_ENDIAN__\n"
5542"#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n"
5543" float16_t __s0 = __p0; \\\n"
5544" float16_t __s1 = __p1; \\\n"
5545" float16_t __ret; \\\n"
5546" __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n"
5547" __ret; \\\n"
5548"})\n"
5549"#else\n"
5550"#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n"
5551" float16_t __s0 = __p0; \\\n"
5552" float16_t __s1 = __p1; \\\n"
5553" float16_t __ret; \\\n"
5554" __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n"
5555" __ret; \\\n"
5556"})\n"
5557"#endif\n"
5558"\n"
5559"#ifdef __LITTLE_ENDIAN__\n"
5560"#define vabsh_f16(__p0) __extension__ ({ \\\n"
5561" float16_t __s0 = __p0; \\\n"
5562" float16_t __ret; \\\n"
5563" __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n"
5564" __ret; \\\n"
5565"})\n"
5566"#else\n"
5567"#define vabsh_f16(__p0) __extension__ ({ \\\n"
5568" float16_t __s0 = __p0; \\\n"
5569" float16_t __ret; \\\n"
5570" __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n"
5571" __ret; \\\n"
5572"})\n"
5573"#endif\n"
5574"\n"
5575"#ifdef __LITTLE_ENDIAN__\n"
5576"#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n"
5577" float16_t __s0 = __p0; \\\n"
5578" float16_t __s1 = __p1; \\\n"
5579" float16_t __ret; \\\n"
5580" __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n"
5581" __ret; \\\n"
5582"})\n"
5583"#else\n"
5584"#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n"
5585" float16_t __s0 = __p0; \\\n"
5586" float16_t __s1 = __p1; \\\n"
5587" float16_t __ret; \\\n"
5588" __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n"
5589" __ret; \\\n"
5590"})\n"
5591"#endif\n"
5592"\n"
5593"#ifdef __LITTLE_ENDIAN__\n"
5594"#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n"
5595" float16_t __s0 = __p0; \\\n"
5596" float16_t __s1 = __p1; \\\n"
5597" uint16_t __ret; \\\n"
5598" __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n"
5599" __ret; \\\n"
5600"})\n"
5601"#else\n"
5602"#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n"
5603" float16_t __s0 = __p0; \\\n"
5604" float16_t __s1 = __p1; \\\n"
5605" uint16_t __ret; \\\n"
5606" __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n"
5607" __ret; \\\n"
5608"})\n"
5609"#endif\n"
5610"\n"
5611"#ifdef __LITTLE_ENDIAN__\n"
5612"#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n"
5613" float16_t __s0 = __p0; \\\n"
5614" float16_t __s1 = __p1; \\\n"
5615" uint16_t __ret; \\\n"
5616" __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n"
5617" __ret; \\\n"
5618"})\n"
5619"#else\n"
5620"#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n"
5621" float16_t __s0 = __p0; \\\n"
5622" float16_t __s1 = __p1; \\\n"
5623" uint16_t __ret; \\\n"
5624" __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n"
5625" __ret; \\\n"
5626"})\n"
5627"#endif\n"
5628"\n"
5629"#ifdef __LITTLE_ENDIAN__\n"
5630"#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n"
5631" float16_t __s0 = __p0; \\\n"
5632" float16_t __s1 = __p1; \\\n"
5633" uint16_t __ret; \\\n"
5634" __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n"
5635" __ret; \\\n"
5636"})\n"
5637"#else\n"
5638"#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n"
5639" float16_t __s0 = __p0; \\\n"
5640" float16_t __s1 = __p1; \\\n"
5641" uint16_t __ret; \\\n"
5642" __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n"
5643" __ret; \\\n"
5644"})\n"
5645"#endif\n"
5646"\n"
5647"#ifdef __LITTLE_ENDIAN__\n"
5648"#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n"
5649" float16_t __s0 = __p0; \\\n"
5650" float16_t __s1 = __p1; \\\n"
5651" uint16_t __ret; \\\n"
5652" __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n"
5653" __ret; \\\n"
5654"})\n"
5655"#else\n"
5656"#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n"
5657" float16_t __s0 = __p0; \\\n"
5658" float16_t __s1 = __p1; \\\n"
5659" uint16_t __ret; \\\n"
5660" __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n"
5661" __ret; \\\n"
5662"})\n"
5663"#endif\n"
5664"\n"
5665"#ifdef __LITTLE_ENDIAN__\n"
5666"#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n"
5667" float16_t __s0 = __p0; \\\n"
5668" float16_t __s1 = __p1; \\\n"
5669" uint16_t __ret; \\\n"
5670" __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n"
5671" __ret; \\\n"
5672"})\n"
5673"#else\n"
5674"#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n"
5675" float16_t __s0 = __p0; \\\n"
5676" float16_t __s1 = __p1; \\\n"
5677" uint16_t __ret; \\\n"
5678" __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n"
5679" __ret; \\\n"
5680"})\n"
5681"#endif\n"
5682"\n"
5683"#ifdef __LITTLE_ENDIAN__\n"
5684"#define vceqzh_f16(__p0) __extension__ ({ \\\n"
5685" float16_t __s0 = __p0; \\\n"
5686" uint16_t __ret; \\\n"
5687" __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n"
5688" __ret; \\\n"
5689"})\n"
5690"#else\n"
5691"#define vceqzh_f16(__p0) __extension__ ({ \\\n"
5692" float16_t __s0 = __p0; \\\n"
5693" uint16_t __ret; \\\n"
5694" __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n"
5695" __ret; \\\n"
5696"})\n"
5697"#endif\n"
5698"\n"
5699"#ifdef __LITTLE_ENDIAN__\n"
5700"#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n"
5701" float16_t __s0 = __p0; \\\n"
5702" float16_t __s1 = __p1; \\\n"
5703" uint16_t __ret; \\\n"
5704" __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n"
5705" __ret; \\\n"
5706"})\n"
5707"#else\n"
5708"#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n"
5709" float16_t __s0 = __p0; \\\n"
5710" float16_t __s1 = __p1; \\\n"
5711" uint16_t __ret; \\\n"
5712" __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n"
5713" __ret; \\\n"
5714"})\n"
5715"#endif\n"
5716"\n"
5717"#ifdef __LITTLE_ENDIAN__\n"
5718"#define vcgezh_f16(__p0) __extension__ ({ \\\n"
5719" float16_t __s0 = __p0; \\\n"
5720" uint16_t __ret; \\\n"
5721" __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n"
5722" __ret; \\\n"
5723"})\n"
5724"#else\n"
5725"#define vcgezh_f16(__p0) __extension__ ({ \\\n"
5726" float16_t __s0 = __p0; \\\n"
5727" uint16_t __ret; \\\n"
5728" __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n"
5729" __ret; \\\n"
5730"})\n"
5731"#endif\n"
5732"\n"
5733"#ifdef __LITTLE_ENDIAN__\n"
5734"#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n"
5735" float16_t __s0 = __p0; \\\n"
5736" float16_t __s1 = __p1; \\\n"
5737" uint16_t __ret; \\\n"
5738" __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n"
5739" __ret; \\\n"
5740"})\n"
5741"#else\n"
5742"#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n"
5743" float16_t __s0 = __p0; \\\n"
5744" float16_t __s1 = __p1; \\\n"
5745" uint16_t __ret; \\\n"
5746" __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n"
5747" __ret; \\\n"
5748"})\n"
5749"#endif\n"
5750"\n"
5751"#ifdef __LITTLE_ENDIAN__\n"
5752"#define vcgtzh_f16(__p0) __extension__ ({ \\\n"
5753" float16_t __s0 = __p0; \\\n"
5754" uint16_t __ret; \\\n"
5755" __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n"
5756" __ret; \\\n"
5757"})\n"
5758"#else\n"
5759"#define vcgtzh_f16(__p0) __extension__ ({ \\\n"
5760" float16_t __s0 = __p0; \\\n"
5761" uint16_t __ret; \\\n"
5762" __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n"
5763" __ret; \\\n"
5764"})\n"
5765"#endif\n"
5766"\n"
5767"#ifdef __LITTLE_ENDIAN__\n"
5768"#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n"
5769" float16_t __s0 = __p0; \\\n"
5770" float16_t __s1 = __p1; \\\n"
5771" uint16_t __ret; \\\n"
5772" __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n"
5773" __ret; \\\n"
5774"})\n"
5775"#else\n"
5776"#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n"
5777" float16_t __s0 = __p0; \\\n"
5778" float16_t __s1 = __p1; \\\n"
5779" uint16_t __ret; \\\n"
5780" __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n"
5781" __ret; \\\n"
5782"})\n"
5783"#endif\n"
5784"\n"
5785"#ifdef __LITTLE_ENDIAN__\n"
5786"#define vclezh_f16(__p0) __extension__ ({ \\\n"
5787" float16_t __s0 = __p0; \\\n"
5788" uint16_t __ret; \\\n"
5789" __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n"
5790" __ret; \\\n"
5791"})\n"
5792"#else\n"
5793"#define vclezh_f16(__p0) __extension__ ({ \\\n"
5794" float16_t __s0 = __p0; \\\n"
5795" uint16_t __ret; \\\n"
5796" __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n"
5797" __ret; \\\n"
5798"})\n"
5799"#endif\n"
5800"\n"
5801"#ifdef __LITTLE_ENDIAN__\n"
5802"#define vclth_f16(__p0, __p1) __extension__ ({ \\\n"
5803" float16_t __s0 = __p0; \\\n"
5804" float16_t __s1 = __p1; \\\n"
5805" uint16_t __ret; \\\n"
5806" __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n"
5807" __ret; \\\n"
5808"})\n"
5809"#else\n"
5810"#define vclth_f16(__p0, __p1) __extension__ ({ \\\n"
5811" float16_t __s0 = __p0; \\\n"
5812" float16_t __s1 = __p1; \\\n"
5813" uint16_t __ret; \\\n"
5814" __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n"
5815" __ret; \\\n"
5816"})\n"
5817"#endif\n"
5818"\n"
5819"#ifdef __LITTLE_ENDIAN__\n"
5820"#define vcltzh_f16(__p0) __extension__ ({ \\\n"
5821" float16_t __s0 = __p0; \\\n"
5822" uint16_t __ret; \\\n"
5823" __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n"
5824" __ret; \\\n"
5825"})\n"
5826"#else\n"
5827"#define vcltzh_f16(__p0) __extension__ ({ \\\n"
5828" float16_t __s0 = __p0; \\\n"
5829" uint16_t __ret; \\\n"
5830" __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n"
5831" __ret; \\\n"
5832"})\n"
5833"#endif\n"
5834"\n"
5835"#ifdef __LITTLE_ENDIAN__\n"
5836"#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n"
5837" float16_t __s0 = __p0; \\\n"
5838" int16_t __ret; \\\n"
5839" __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n"
5840" __ret; \\\n"
5841"})\n"
5842"#else\n"
5843"#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n"
5844" float16_t __s0 = __p0; \\\n"
5845" int16_t __ret; \\\n"
5846" __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n"
5847" __ret; \\\n"
5848"})\n"
5849"#endif\n"
5850"\n"
5851"#ifdef __LITTLE_ENDIAN__\n"
5852"#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n"
5853" float16_t __s0 = __p0; \\\n"
5854" int32_t __ret; \\\n"
5855" __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n"
5856" __ret; \\\n"
5857"})\n"
5858"#else\n"
5859"#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n"
5860" float16_t __s0 = __p0; \\\n"
5861" int32_t __ret; \\\n"
5862" __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n"
5863" __ret; \\\n"
5864"})\n"
5865"#endif\n"
5866"\n"
5867"#ifdef __LITTLE_ENDIAN__\n"
5868"#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n"
5869" float16_t __s0 = __p0; \\\n"
5870" int64_t __ret; \\\n"
5871" __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n"
5872" __ret; \\\n"
5873"})\n"
5874"#else\n"
5875"#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n"
5876" float16_t __s0 = __p0; \\\n"
5877" int64_t __ret; \\\n"
5878" __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n"
5879" __ret; \\\n"
5880"})\n"
5881"#endif\n"
5882"\n"
5883"#ifdef __LITTLE_ENDIAN__\n"
5884"#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n"
5885" float16_t __s0 = __p0; \\\n"
5886" uint16_t __ret; \\\n"
5887" __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n"
5888" __ret; \\\n"
5889"})\n"
5890"#else\n"
5891"#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n"
5892" float16_t __s0 = __p0; \\\n"
5893" uint16_t __ret; \\\n"
5894" __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n"
5895" __ret; \\\n"
5896"})\n"
5897"#endif\n"
5898"\n"
5899"#ifdef __LITTLE_ENDIAN__\n"
5900"#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n"
5901" float16_t __s0 = __p0; \\\n"
5902" uint32_t __ret; \\\n"
5903" __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n"
5904" __ret; \\\n"
5905"})\n"
5906"#else\n"
5907"#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n"
5908" float16_t __s0 = __p0; \\\n"
5909" uint32_t __ret; \\\n"
5910" __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n"
5911" __ret; \\\n"
5912"})\n"
5913"#endif\n"
5914"\n"
5915"#ifdef __LITTLE_ENDIAN__\n"
5916"#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n"
5917" float16_t __s0 = __p0; \\\n"
5918" uint64_t __ret; \\\n"
5919" __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n"
5920" __ret; \\\n"
5921"})\n"
5922"#else\n"
5923"#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n"
5924" float16_t __s0 = __p0; \\\n"
5925" uint64_t __ret; \\\n"
5926" __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n"
5927" __ret; \\\n"
5928"})\n"
5929"#endif\n"
5930"\n"
5931"#ifdef __LITTLE_ENDIAN__\n"
5932"#define vcvth_s16_f16(__p0) __extension__ ({ \\\n"
5933" float16_t __s0 = __p0; \\\n"
5934" int16_t __ret; \\\n"
5935" __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n"
5936" __ret; \\\n"
5937"})\n"
5938"#else\n"
5939"#define vcvth_s16_f16(__p0) __extension__ ({ \\\n"
5940" float16_t __s0 = __p0; \\\n"
5941" int16_t __ret; \\\n"
5942" __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n"
5943" __ret; \\\n"
5944"})\n"
5945"#endif\n"
5946"\n"
5947"#ifdef __LITTLE_ENDIAN__\n"
5948"#define vcvth_s32_f16(__p0) __extension__ ({ \\\n"
5949" float16_t __s0 = __p0; \\\n"
5950" int32_t __ret; \\\n"
5951" __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n"
5952" __ret; \\\n"
5953"})\n"
5954"#else\n"
5955"#define vcvth_s32_f16(__p0) __extension__ ({ \\\n"
5956" float16_t __s0 = __p0; \\\n"
5957" int32_t __ret; \\\n"
5958" __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n"
5959" __ret; \\\n"
5960"})\n"
5961"#endif\n"
5962"\n"
5963"#ifdef __LITTLE_ENDIAN__\n"
5964"#define vcvth_s64_f16(__p0) __extension__ ({ \\\n"
5965" float16_t __s0 = __p0; \\\n"
5966" int64_t __ret; \\\n"
5967" __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n"
5968" __ret; \\\n"
5969"})\n"
5970"#else\n"
5971"#define vcvth_s64_f16(__p0) __extension__ ({ \\\n"
5972" float16_t __s0 = __p0; \\\n"
5973" int64_t __ret; \\\n"
5974" __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n"
5975" __ret; \\\n"
5976"})\n"
5977"#endif\n"
5978"\n"
5979"#ifdef __LITTLE_ENDIAN__\n"
5980"#define vcvth_u16_f16(__p0) __extension__ ({ \\\n"
5981" float16_t __s0 = __p0; \\\n"
5982" uint16_t __ret; \\\n"
5983" __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n"
5984" __ret; \\\n"
5985"})\n"
5986"#else\n"
5987"#define vcvth_u16_f16(__p0) __extension__ ({ \\\n"
5988" float16_t __s0 = __p0; \\\n"
5989" uint16_t __ret; \\\n"
5990" __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n"
5991" __ret; \\\n"
5992"})\n"
5993"#endif\n"
5994"\n"
5995"#ifdef __LITTLE_ENDIAN__\n"
5996"#define vcvth_u32_f16(__p0) __extension__ ({ \\\n"
5997" float16_t __s0 = __p0; \\\n"
5998" uint32_t __ret; \\\n"
5999" __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n"
6000" __ret; \\\n"
6001"})\n"
6002"#else\n"
6003"#define vcvth_u32_f16(__p0) __extension__ ({ \\\n"
6004" float16_t __s0 = __p0; \\\n"
6005" uint32_t __ret; \\\n"
6006" __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n"
6007" __ret; \\\n"
6008"})\n"
6009"#endif\n"
6010"\n"
6011"#ifdef __LITTLE_ENDIAN__\n"
6012"#define vcvth_u64_f16(__p0) __extension__ ({ \\\n"
6013" float16_t __s0 = __p0; \\\n"
6014" uint64_t __ret; \\\n"
6015" __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n"
6016" __ret; \\\n"
6017"})\n"
6018"#else\n"
6019"#define vcvth_u64_f16(__p0) __extension__ ({ \\\n"
6020" float16_t __s0 = __p0; \\\n"
6021" uint64_t __ret; \\\n"
6022" __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n"
6023" __ret; \\\n"
6024"})\n"
6025"#endif\n"
6026"\n"
6027"#ifdef __LITTLE_ENDIAN__\n"
6028"#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n"
6029" float16_t __s0 = __p0; \\\n"
6030" int16_t __ret; \\\n"
6031" __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n"
6032" __ret; \\\n"
6033"})\n"
6034"#else\n"
6035"#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n"
6036" float16_t __s0 = __p0; \\\n"
6037" int16_t __ret; \\\n"
6038" __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n"
6039" __ret; \\\n"
6040"})\n"
6041"#endif\n"
6042"\n"
6043"#ifdef __LITTLE_ENDIAN__\n"
6044"#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n"
6045" float16_t __s0 = __p0; \\\n"
6046" int32_t __ret; \\\n"
6047" __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n"
6048" __ret; \\\n"
6049"})\n"
6050"#else\n"
6051"#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n"
6052" float16_t __s0 = __p0; \\\n"
6053" int32_t __ret; \\\n"
6054" __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n"
6055" __ret; \\\n"
6056"})\n"
6057"#endif\n"
6058"\n"
6059"#ifdef __LITTLE_ENDIAN__\n"
6060"#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n"
6061" float16_t __s0 = __p0; \\\n"
6062" int64_t __ret; \\\n"
6063" __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n"
6064" __ret; \\\n"
6065"})\n"
6066"#else\n"
6067"#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n"
6068" float16_t __s0 = __p0; \\\n"
6069" int64_t __ret; \\\n"
6070" __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n"
6071" __ret; \\\n"
6072"})\n"
6073"#endif\n"
6074"\n"
6075"#ifdef __LITTLE_ENDIAN__\n"
6076"#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n"
6077" float16_t __s0 = __p0; \\\n"
6078" uint16_t __ret; \\\n"
6079" __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n"
6080" __ret; \\\n"
6081"})\n"
6082"#else\n"
6083"#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n"
6084" float16_t __s0 = __p0; \\\n"
6085" uint16_t __ret; \\\n"
6086" __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n"
6087" __ret; \\\n"
6088"})\n"
6089"#endif\n"
6090"\n"
6091"#ifdef __LITTLE_ENDIAN__\n"
6092"#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n"
6093" float16_t __s0 = __p0; \\\n"
6094" uint32_t __ret; \\\n"
6095" __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n"
6096" __ret; \\\n"
6097"})\n"
6098"#else\n"
6099"#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n"
6100" float16_t __s0 = __p0; \\\n"
6101" uint32_t __ret; \\\n"
6102" __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n"
6103" __ret; \\\n"
6104"})\n"
6105"#endif\n"
6106"\n"
6107"#ifdef __LITTLE_ENDIAN__\n"
6108"#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n"
6109" float16_t __s0 = __p0; \\\n"
6110" uint64_t __ret; \\\n"
6111" __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n"
6112" __ret; \\\n"
6113"})\n"
6114"#else\n"
6115"#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n"
6116" float16_t __s0 = __p0; \\\n"
6117" uint64_t __ret; \\\n"
6118" __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n"
6119" __ret; \\\n"
6120"})\n"
6121"#endif\n"
6122"\n"
6123"#ifdef __LITTLE_ENDIAN__\n"
6124"__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n"
6125" float16_t __ret;\n"
6126" __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n"
6127" return __ret;\n"
6128"}\n"
6129"#else\n"
6130"__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n"
6131" float16_t __ret;\n"
6132" __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n"
6133" return __ret;\n"
6134"}\n"
6135"#endif\n"
6136"\n"
6137"#ifdef __LITTLE_ENDIAN__\n"
6138"__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n"
6139" float16_t __ret;\n"
6140" __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n"
6141" return __ret;\n"
6142"}\n"
6143"#else\n"
6144"__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n"
6145" float16_t __ret;\n"
6146" __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n"
6147" return __ret;\n"
6148"}\n"
6149"#endif\n"
6150"\n"
6151"#ifdef __LITTLE_ENDIAN__\n"
6152"__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n"
6153" float16_t __ret;\n"
6154" __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n"
6155" return __ret;\n"
6156"}\n"
6157"#else\n"
6158"__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n"
6159" float16_t __ret;\n"
6160" __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n"
6161" return __ret;\n"
6162"}\n"
6163"#endif\n"
6164"\n"
6165"#ifdef __LITTLE_ENDIAN__\n"
6166"__ai float16_t vcvth_f16_s32(int32_t __p0) {\n"
6167" float16_t __ret;\n"
6168" __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n"
6169" return __ret;\n"
6170"}\n"
6171"#else\n"
6172"__ai float16_t vcvth_f16_s32(int32_t __p0) {\n"
6173" float16_t __ret;\n"
6174" __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n"
6175" return __ret;\n"
6176"}\n"
6177"#endif\n"
6178"\n"
6179"#ifdef __LITTLE_ENDIAN__\n"
6180"__ai float16_t vcvth_f16_s64(int64_t __p0) {\n"
6181" float16_t __ret;\n"
6182" __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n"
6183" return __ret;\n"
6184"}\n"
6185"#else\n"
6186"__ai float16_t vcvth_f16_s64(int64_t __p0) {\n"
6187" float16_t __ret;\n"
6188" __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n"
6189" return __ret;\n"
6190"}\n"
6191"#endif\n"
6192"\n"
6193"#ifdef __LITTLE_ENDIAN__\n"
6194"__ai float16_t vcvth_f16_s16(int16_t __p0) {\n"
6195" float16_t __ret;\n"
6196" __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n"
6197" return __ret;\n"
6198"}\n"
6199"#else\n"
6200"__ai float16_t vcvth_f16_s16(int16_t __p0) {\n"
6201" float16_t __ret;\n"
6202" __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n"
6203" return __ret;\n"
6204"}\n"
6205"#endif\n"
6206"\n"
6207"#ifdef __LITTLE_ENDIAN__\n"
6208"#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n"
6209" uint32_t __s0 = __p0; \\\n"
6210" float16_t __ret; \\\n"
6211" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n"
6212" __ret; \\\n"
6213"})\n"
6214"#else\n"
6215"#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n"
6216" uint32_t __s0 = __p0; \\\n"
6217" float16_t __ret; \\\n"
6218" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n"
6219" __ret; \\\n"
6220"})\n"
6221"#endif\n"
6222"\n"
6223"#ifdef __LITTLE_ENDIAN__\n"
6224"#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n"
6225" uint64_t __s0 = __p0; \\\n"
6226" float16_t __ret; \\\n"
6227" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n"
6228" __ret; \\\n"
6229"})\n"
6230"#else\n"
6231"#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n"
6232" uint64_t __s0 = __p0; \\\n"
6233" float16_t __ret; \\\n"
6234" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n"
6235" __ret; \\\n"
6236"})\n"
6237"#endif\n"
6238"\n"
6239"#ifdef __LITTLE_ENDIAN__\n"
6240"#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n"
6241" uint16_t __s0 = __p0; \\\n"
6242" float16_t __ret; \\\n"
6243" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n"
6244" __ret; \\\n"
6245"})\n"
6246"#else\n"
6247"#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n"
6248" uint16_t __s0 = __p0; \\\n"
6249" float16_t __ret; \\\n"
6250" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n"
6251" __ret; \\\n"
6252"})\n"
6253"#endif\n"
6254"\n"
6255"#ifdef __LITTLE_ENDIAN__\n"
6256"#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n"
6257" int32_t __s0 = __p0; \\\n"
6258" float16_t __ret; \\\n"
6259" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n"
6260" __ret; \\\n"
6261"})\n"
6262"#else\n"
6263"#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n"
6264" int32_t __s0 = __p0; \\\n"
6265" float16_t __ret; \\\n"
6266" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n"
6267" __ret; \\\n"
6268"})\n"
6269"#endif\n"
6270"\n"
6271"#ifdef __LITTLE_ENDIAN__\n"
6272"#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n"
6273" int64_t __s0 = __p0; \\\n"
6274" float16_t __ret; \\\n"
6275" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n"
6276" __ret; \\\n"
6277"})\n"
6278"#else\n"
6279"#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n"
6280" int64_t __s0 = __p0; \\\n"
6281" float16_t __ret; \\\n"
6282" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n"
6283" __ret; \\\n"
6284"})\n"
6285"#endif\n"
6286"\n"
6287"#ifdef __LITTLE_ENDIAN__\n"
6288"#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n"
6289" int16_t __s0 = __p0; \\\n"
6290" float16_t __ret; \\\n"
6291" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n"
6292" __ret; \\\n"
6293"})\n"
6294"#else\n"
6295"#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n"
6296" int16_t __s0 = __p0; \\\n"
6297" float16_t __ret; \\\n"
6298" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n"
6299" __ret; \\\n"
6300"})\n"
6301"#endif\n"
6302"\n"
6303"#ifdef __LITTLE_ENDIAN__\n"
6304"#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n"
6305" float16_t __s0 = __p0; \\\n"
6306" int16_t __ret; \\\n"
6307" __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n"
6308" __ret; \\\n"
6309"})\n"
6310"#else\n"
6311"#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n"
6312" float16_t __s0 = __p0; \\\n"
6313" int16_t __ret; \\\n"
6314" __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n"
6315" __ret; \\\n"
6316"})\n"
6317"#endif\n"
6318"\n"
6319"#ifdef __LITTLE_ENDIAN__\n"
6320"#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n"
6321" float16_t __s0 = __p0; \\\n"
6322" int32_t __ret; \\\n"
6323" __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n"
6324" __ret; \\\n"
6325"})\n"
6326"#else\n"
6327"#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n"
6328" float16_t __s0 = __p0; \\\n"
6329" int32_t __ret; \\\n"
6330" __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n"
6331" __ret; \\\n"
6332"})\n"
6333"#endif\n"
6334"\n"
6335"#ifdef __LITTLE_ENDIAN__\n"
6336"#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n"
6337" float16_t __s0 = __p0; \\\n"
6338" int64_t __ret; \\\n"
6339" __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n"
6340" __ret; \\\n"
6341"})\n"
6342"#else\n"
6343"#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n"
6344" float16_t __s0 = __p0; \\\n"
6345" int64_t __ret; \\\n"
6346" __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n"
6347" __ret; \\\n"
6348"})\n"
6349"#endif\n"
6350"\n"
6351"#ifdef __LITTLE_ENDIAN__\n"
6352"#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n"
6353" float16_t __s0 = __p0; \\\n"
6354" uint16_t __ret; \\\n"
6355" __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n"
6356" __ret; \\\n"
6357"})\n"
6358"#else\n"
6359"#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n"
6360" float16_t __s0 = __p0; \\\n"
6361" uint16_t __ret; \\\n"
6362" __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n"
6363" __ret; \\\n"
6364"})\n"
6365"#endif\n"
6366"\n"
6367"#ifdef __LITTLE_ENDIAN__\n"
6368"#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n"
6369" float16_t __s0 = __p0; \\\n"
6370" uint32_t __ret; \\\n"
6371" __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n"
6372" __ret; \\\n"
6373"})\n"
6374"#else\n"
6375"#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n"
6376" float16_t __s0 = __p0; \\\n"
6377" uint32_t __ret; \\\n"
6378" __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n"
6379" __ret; \\\n"
6380"})\n"
6381"#endif\n"
6382"\n"
6383"#ifdef __LITTLE_ENDIAN__\n"
6384"#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n"
6385" float16_t __s0 = __p0; \\\n"
6386" uint64_t __ret; \\\n"
6387" __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n"
6388" __ret; \\\n"
6389"})\n"
6390"#else\n"
6391"#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n"
6392" float16_t __s0 = __p0; \\\n"
6393" uint64_t __ret; \\\n"
6394" __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n"
6395" __ret; \\\n"
6396"})\n"
6397"#endif\n"
6398"\n"
6399"#ifdef __LITTLE_ENDIAN__\n"
6400"#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n"
6401" float16_t __s0 = __p0; \\\n"
6402" int16_t __ret; \\\n"
6403" __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n"
6404" __ret; \\\n"
6405"})\n"
6406"#else\n"
6407"#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n"
6408" float16_t __s0 = __p0; \\\n"
6409" int16_t __ret; \\\n"
6410" __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n"
6411" __ret; \\\n"
6412"})\n"
6413"#endif\n"
6414"\n"
6415"#ifdef __LITTLE_ENDIAN__\n"
6416"#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n"
6417" float16_t __s0 = __p0; \\\n"
6418" int32_t __ret; \\\n"
6419" __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n"
6420" __ret; \\\n"
6421"})\n"
6422"#else\n"
6423"#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n"
6424" float16_t __s0 = __p0; \\\n"
6425" int32_t __ret; \\\n"
6426" __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n"
6427" __ret; \\\n"
6428"})\n"
6429"#endif\n"
6430"\n"
6431"#ifdef __LITTLE_ENDIAN__\n"
6432"#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n"
6433" float16_t __s0 = __p0; \\\n"
6434" int64_t __ret; \\\n"
6435" __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n"
6436" __ret; \\\n"
6437"})\n"
6438"#else\n"
6439"#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n"
6440" float16_t __s0 = __p0; \\\n"
6441" int64_t __ret; \\\n"
6442" __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n"
6443" __ret; \\\n"
6444"})\n"
6445"#endif\n"
6446"\n"
6447"#ifdef __LITTLE_ENDIAN__\n"
6448"#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n"
6449" float16_t __s0 = __p0; \\\n"
6450" uint16_t __ret; \\\n"
6451" __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n"
6452" __ret; \\\n"
6453"})\n"
6454"#else\n"
6455"#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n"
6456" float16_t __s0 = __p0; \\\n"
6457" uint16_t __ret; \\\n"
6458" __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n"
6459" __ret; \\\n"
6460"})\n"
6461"#endif\n"
6462"\n"
6463"#ifdef __LITTLE_ENDIAN__\n"
6464"#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n"
6465" float16_t __s0 = __p0; \\\n"
6466" uint32_t __ret; \\\n"
6467" __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n"
6468" __ret; \\\n"
6469"})\n"
6470"#else\n"
6471"#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n"
6472" float16_t __s0 = __p0; \\\n"
6473" uint32_t __ret; \\\n"
6474" __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n"
6475" __ret; \\\n"
6476"})\n"
6477"#endif\n"
6478"\n"
6479"#ifdef __LITTLE_ENDIAN__\n"
6480"#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n"
6481" float16_t __s0 = __p0; \\\n"
6482" uint64_t __ret; \\\n"
6483" __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n"
6484" __ret; \\\n"
6485"})\n"
6486"#else\n"
6487"#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n"
6488" float16_t __s0 = __p0; \\\n"
6489" uint64_t __ret; \\\n"
6490" __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n"
6491" __ret; \\\n"
6492"})\n"
6493"#endif\n"
6494"\n"
6495"#ifdef __LITTLE_ENDIAN__\n"
6496"#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n"
6497" float16_t __s0 = __p0; \\\n"
6498" int16_t __ret; \\\n"
6499" __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n"
6500" __ret; \\\n"
6501"})\n"
6502"#else\n"
6503"#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n"
6504" float16_t __s0 = __p0; \\\n"
6505" int16_t __ret; \\\n"
6506" __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n"
6507" __ret; \\\n"
6508"})\n"
6509"#endif\n"
6510"\n"
6511"#ifdef __LITTLE_ENDIAN__\n"
6512"#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n"
6513" float16_t __s0 = __p0; \\\n"
6514" int32_t __ret; \\\n"
6515" __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n"
6516" __ret; \\\n"
6517"})\n"
6518"#else\n"
6519"#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n"
6520" float16_t __s0 = __p0; \\\n"
6521" int32_t __ret; \\\n"
6522" __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n"
6523" __ret; \\\n"
6524"})\n"
6525"#endif\n"
6526"\n"
6527"#ifdef __LITTLE_ENDIAN__\n"
6528"#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n"
6529" float16_t __s0 = __p0; \\\n"
6530" int64_t __ret; \\\n"
6531" __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n"
6532" __ret; \\\n"
6533"})\n"
6534"#else\n"
6535"#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n"
6536" float16_t __s0 = __p0; \\\n"
6537" int64_t __ret; \\\n"
6538" __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n"
6539" __ret; \\\n"
6540"})\n"
6541"#endif\n"
6542"\n"
6543"#ifdef __LITTLE_ENDIAN__\n"
6544"#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n"
6545" float16_t __s0 = __p0; \\\n"
6546" uint16_t __ret; \\\n"
6547" __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n"
6548" __ret; \\\n"
6549"})\n"
6550"#else\n"
6551"#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n"
6552" float16_t __s0 = __p0; \\\n"
6553" uint16_t __ret; \\\n"
6554" __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n"
6555" __ret; \\\n"
6556"})\n"
6557"#endif\n"
6558"\n"
6559"#ifdef __LITTLE_ENDIAN__\n"
6560"#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n"
6561" float16_t __s0 = __p0; \\\n"
6562" uint32_t __ret; \\\n"
6563" __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n"
6564" __ret; \\\n"
6565"})\n"
6566"#else\n"
6567"#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n"
6568" float16_t __s0 = __p0; \\\n"
6569" uint32_t __ret; \\\n"
6570" __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n"
6571" __ret; \\\n"
6572"})\n"
6573"#endif\n"
6574"\n"
6575"#ifdef __LITTLE_ENDIAN__\n"
6576"#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n"
6577" float16_t __s0 = __p0; \\\n"
6578" uint64_t __ret; \\\n"
6579" __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n"
6580" __ret; \\\n"
6581"})\n"
6582"#else\n"
6583"#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n"
6584" float16_t __s0 = __p0; \\\n"
6585" uint64_t __ret; \\\n"
6586" __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n"
6587" __ret; \\\n"
6588"})\n"
6589"#endif\n"
6590"\n"
6591"#ifdef __LITTLE_ENDIAN__\n"
6592"#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n"
6593" float16_t __s0 = __p0; \\\n"
6594" float16_t __s1 = __p1; \\\n"
6595" float16_t __ret; \\\n"
6596" __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n"
6597" __ret; \\\n"
6598"})\n"
6599"#else\n"
6600"#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n"
6601" float16_t __s0 = __p0; \\\n"
6602" float16_t __s1 = __p1; \\\n"
6603" float16_t __ret; \\\n"
6604" __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n"
6605" __ret; \\\n"
6606"})\n"
6607"#endif\n"
6608"\n"
6609"#ifdef __LITTLE_ENDIAN__\n"
6610"#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6611" float16_t __s0 = __p0; \\\n"
6612" float16_t __s1 = __p1; \\\n"
6613" float16_t __s2 = __p2; \\\n"
6614" float16_t __ret; \\\n"
6615" __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n"
6616" __ret; \\\n"
6617"})\n"
6618"#else\n"
6619"#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6620" float16_t __s0 = __p0; \\\n"
6621" float16_t __s1 = __p1; \\\n"
6622" float16_t __s2 = __p2; \\\n"
6623" float16_t __ret; \\\n"
6624" __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n"
6625" __ret; \\\n"
6626"})\n"
6627"#endif\n"
6628"\n"
6629"#ifdef __LITTLE_ENDIAN__\n"
6630"#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6631" float16_t __s0 = __p0; \\\n"
6632" float16_t __s1 = __p1; \\\n"
6633" float16_t __s2 = __p2; \\\n"
6634" float16_t __ret; \\\n"
6635" __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n"
6636" __ret; \\\n"
6637"})\n"
6638"#else\n"
6639"#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6640" float16_t __s0 = __p0; \\\n"
6641" float16_t __s1 = __p1; \\\n"
6642" float16_t __s2 = __p2; \\\n"
6643" float16_t __ret; \\\n"
6644" __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n"
6645" __ret; \\\n"
6646"})\n"
6647"#endif\n"
6648"\n"
6649"#ifdef __LITTLE_ENDIAN__\n"
6650"#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n"
6651" float16_t __s0 = __p0; \\\n"
6652" float16_t __s1 = __p1; \\\n"
6653" float16_t __ret; \\\n"
6654" __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n"
6655" __ret; \\\n"
6656"})\n"
6657"#else\n"
6658"#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n"
6659" float16_t __s0 = __p0; \\\n"
6660" float16_t __s1 = __p1; \\\n"
6661" float16_t __ret; \\\n"
6662" __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n"
6663" __ret; \\\n"
6664"})\n"
6665"#endif\n"
6666"\n"
6667"#ifdef __LITTLE_ENDIAN__\n"
6668"#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6669" float16_t __s0 = __p0; \\\n"
6670" float16_t __s1 = __p1; \\\n"
6671" float16_t __ret; \\\n"
6672" __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n"
6673" __ret; \\\n"
6674"})\n"
6675"#else\n"
6676"#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6677" float16_t __s0 = __p0; \\\n"
6678" float16_t __s1 = __p1; \\\n"
6679" float16_t __ret; \\\n"
6680" __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n"
6681" __ret; \\\n"
6682"})\n"
6683"#endif\n"
6684"\n"
6685"#ifdef __LITTLE_ENDIAN__\n"
6686"#define vminh_f16(__p0, __p1) __extension__ ({ \\\n"
6687" float16_t __s0 = __p0; \\\n"
6688" float16_t __s1 = __p1; \\\n"
6689" float16_t __ret; \\\n"
6690" __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n"
6691" __ret; \\\n"
6692"})\n"
6693"#else\n"
6694"#define vminh_f16(__p0, __p1) __extension__ ({ \\\n"
6695" float16_t __s0 = __p0; \\\n"
6696" float16_t __s1 = __p1; \\\n"
6697" float16_t __ret; \\\n"
6698" __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n"
6699" __ret; \\\n"
6700"})\n"
6701"#endif\n"
6702"\n"
6703"#ifdef __LITTLE_ENDIAN__\n"
6704"#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6705" float16_t __s0 = __p0; \\\n"
6706" float16_t __s1 = __p1; \\\n"
6707" float16_t __ret; \\\n"
6708" __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n"
6709" __ret; \\\n"
6710"})\n"
6711"#else\n"
6712"#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6713" float16_t __s0 = __p0; \\\n"
6714" float16_t __s1 = __p1; \\\n"
6715" float16_t __ret; \\\n"
6716" __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n"
6717" __ret; \\\n"
6718"})\n"
6719"#endif\n"
6720"\n"
6721"#ifdef __LITTLE_ENDIAN__\n"
6722"#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n"
6723" float16_t __s0 = __p0; \\\n"
6724" float16_t __s1 = __p1; \\\n"
6725" float16_t __ret; \\\n"
6726" __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n"
6727" __ret; \\\n"
6728"})\n"
6729"#else\n"
6730"#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n"
6731" float16_t __s0 = __p0; \\\n"
6732" float16_t __s1 = __p1; \\\n"
6733" float16_t __ret; \\\n"
6734" __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n"
6735" __ret; \\\n"
6736"})\n"
6737"#endif\n"
6738"\n"
6739"#ifdef __LITTLE_ENDIAN__\n"
6740"#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n"
6741" float16_t __s0 = __p0; \\\n"
6742" float16_t __s1 = __p1; \\\n"
6743" float16_t __ret; \\\n"
6744" __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n"
6745" __ret; \\\n"
6746"})\n"
6747"#else\n"
6748"#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n"
6749" float16_t __s0 = __p0; \\\n"
6750" float16_t __s1 = __p1; \\\n"
6751" float16_t __ret; \\\n"
6752" __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n"
6753" __ret; \\\n"
6754"})\n"
6755"#endif\n"
6756"\n"
6757"#ifdef __LITTLE_ENDIAN__\n"
6758"#define vnegh_f16(__p0) __extension__ ({ \\\n"
6759" float16_t __s0 = __p0; \\\n"
6760" float16_t __ret; \\\n"
6761" __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n"
6762" __ret; \\\n"
6763"})\n"
6764"#else\n"
6765"#define vnegh_f16(__p0) __extension__ ({ \\\n"
6766" float16_t __s0 = __p0; \\\n"
6767" float16_t __ret; \\\n"
6768" __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n"
6769" __ret; \\\n"
6770"})\n"
6771"#endif\n"
6772"\n"
6773"#ifdef __LITTLE_ENDIAN__\n"
6774"#define vrecpeh_f16(__p0) __extension__ ({ \\\n"
6775" float16_t __s0 = __p0; \\\n"
6776" float16_t __ret; \\\n"
6777" __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n"
6778" __ret; \\\n"
6779"})\n"
6780"#else\n"
6781"#define vrecpeh_f16(__p0) __extension__ ({ \\\n"
6782" float16_t __s0 = __p0; \\\n"
6783" float16_t __ret; \\\n"
6784" __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n"
6785" __ret; \\\n"
6786"})\n"
6787"#endif\n"
6788"\n"
6789"#ifdef __LITTLE_ENDIAN__\n"
6790"#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n"
6791" float16_t __s0 = __p0; \\\n"
6792" float16_t __s1 = __p1; \\\n"
6793" float16_t __ret; \\\n"
6794" __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n"
6795" __ret; \\\n"
6796"})\n"
6797"#else\n"
6798"#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n"
6799" float16_t __s0 = __p0; \\\n"
6800" float16_t __s1 = __p1; \\\n"
6801" float16_t __ret; \\\n"
6802" __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n"
6803" __ret; \\\n"
6804"})\n"
6805"#endif\n"
6806"\n"
6807"#ifdef __LITTLE_ENDIAN__\n"
6808"#define vrecpxh_f16(__p0) __extension__ ({ \\\n"
6809" float16_t __s0 = __p0; \\\n"
6810" float16_t __ret; \\\n"
6811" __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n"
6812" __ret; \\\n"
6813"})\n"
6814"#else\n"
6815"#define vrecpxh_f16(__p0) __extension__ ({ \\\n"
6816" float16_t __s0 = __p0; \\\n"
6817" float16_t __ret; \\\n"
6818" __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n"
6819" __ret; \\\n"
6820"})\n"
6821"#endif\n"
6822"\n"
6823"#ifdef __LITTLE_ENDIAN__\n"
6824"#define vrndh_f16(__p0) __extension__ ({ \\\n"
6825" float16_t __s0 = __p0; \\\n"
6826" float16_t __ret; \\\n"
6827" __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n"
6828" __ret; \\\n"
6829"})\n"
6830"#else\n"
6831"#define vrndh_f16(__p0) __extension__ ({ \\\n"
6832" float16_t __s0 = __p0; \\\n"
6833" float16_t __ret; \\\n"
6834" __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n"
6835" __ret; \\\n"
6836"})\n"
6837"#endif\n"
6838"\n"
6839"#ifdef __LITTLE_ENDIAN__\n"
6840"#define vrndah_f16(__p0) __extension__ ({ \\\n"
6841" float16_t __s0 = __p0; \\\n"
6842" float16_t __ret; \\\n"
6843" __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n"
6844" __ret; \\\n"
6845"})\n"
6846"#else\n"
6847"#define vrndah_f16(__p0) __extension__ ({ \\\n"
6848" float16_t __s0 = __p0; \\\n"
6849" float16_t __ret; \\\n"
6850" __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n"
6851" __ret; \\\n"
6852"})\n"
6853"#endif\n"
6854"\n"
6855"#ifdef __LITTLE_ENDIAN__\n"
6856"#define vrndih_f16(__p0) __extension__ ({ \\\n"
6857" float16_t __s0 = __p0; \\\n"
6858" float16_t __ret; \\\n"
6859" __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n"
6860" __ret; \\\n"
6861"})\n"
6862"#else\n"
6863"#define vrndih_f16(__p0) __extension__ ({ \\\n"
6864" float16_t __s0 = __p0; \\\n"
6865" float16_t __ret; \\\n"
6866" __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n"
6867" __ret; \\\n"
6868"})\n"
6869"#endif\n"
6870"\n"
6871"#ifdef __LITTLE_ENDIAN__\n"
6872"#define vrndmh_f16(__p0) __extension__ ({ \\\n"
6873" float16_t __s0 = __p0; \\\n"
6874" float16_t __ret; \\\n"
6875" __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n"
6876" __ret; \\\n"
6877"})\n"
6878"#else\n"
6879"#define vrndmh_f16(__p0) __extension__ ({ \\\n"
6880" float16_t __s0 = __p0; \\\n"
6881" float16_t __ret; \\\n"
6882" __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n"
6883" __ret; \\\n"
6884"})\n"
6885"#endif\n"
6886"\n"
6887"#ifdef __LITTLE_ENDIAN__\n"
6888"#define vrndnh_f16(__p0) __extension__ ({ \\\n"
6889" float16_t __s0 = __p0; \\\n"
6890" float16_t __ret; \\\n"
6891" __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n"
6892" __ret; \\\n"
6893"})\n"
6894"#else\n"
6895"#define vrndnh_f16(__p0) __extension__ ({ \\\n"
6896" float16_t __s0 = __p0; \\\n"
6897" float16_t __ret; \\\n"
6898" __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n"
6899" __ret; \\\n"
6900"})\n"
6901"#endif\n"
6902"\n"
6903"#ifdef __LITTLE_ENDIAN__\n"
6904"#define vrndph_f16(__p0) __extension__ ({ \\\n"
6905" float16_t __s0 = __p0; \\\n"
6906" float16_t __ret; \\\n"
6907" __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n"
6908" __ret; \\\n"
6909"})\n"
6910"#else\n"
6911"#define vrndph_f16(__p0) __extension__ ({ \\\n"
6912" float16_t __s0 = __p0; \\\n"
6913" float16_t __ret; \\\n"
6914" __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n"
6915" __ret; \\\n"
6916"})\n"
6917"#endif\n"
6918"\n"
6919"#ifdef __LITTLE_ENDIAN__\n"
6920"#define vrndxh_f16(__p0) __extension__ ({ \\\n"
6921" float16_t __s0 = __p0; \\\n"
6922" float16_t __ret; \\\n"
6923" __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n"
6924" __ret; \\\n"
6925"})\n"
6926"#else\n"
6927"#define vrndxh_f16(__p0) __extension__ ({ \\\n"
6928" float16_t __s0 = __p0; \\\n"
6929" float16_t __ret; \\\n"
6930" __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n"
6931" __ret; \\\n"
6932"})\n"
6933"#endif\n"
6934"\n"
6935"#ifdef __LITTLE_ENDIAN__\n"
6936"#define vrsqrteh_f16(__p0) __extension__ ({ \\\n"
6937" float16_t __s0 = __p0; \\\n"
6938" float16_t __ret; \\\n"
6939" __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n"
6940" __ret; \\\n"
6941"})\n"
6942"#else\n"
6943"#define vrsqrteh_f16(__p0) __extension__ ({ \\\n"
6944" float16_t __s0 = __p0; \\\n"
6945" float16_t __ret; \\\n"
6946" __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n"
6947" __ret; \\\n"
6948"})\n"
6949"#endif\n"
6950"\n"
6951"#ifdef __LITTLE_ENDIAN__\n"
6952"#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n"
6953" float16_t __s0 = __p0; \\\n"
6954" float16_t __s1 = __p1; \\\n"
6955" float16_t __ret; \\\n"
6956" __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n"
6957" __ret; \\\n"
6958"})\n"
6959"#else\n"
6960"#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n"
6961" float16_t __s0 = __p0; \\\n"
6962" float16_t __s1 = __p1; \\\n"
6963" float16_t __ret; \\\n"
6964" __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n"
6965" __ret; \\\n"
6966"})\n"
6967"#endif\n"
6968"\n"
6969"#ifdef __LITTLE_ENDIAN__\n"
6970"#define vsqrth_f16(__p0) __extension__ ({ \\\n"
6971" float16_t __s0 = __p0; \\\n"
6972" float16_t __ret; \\\n"
6973" __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n"
6974" __ret; \\\n"
6975"})\n"
6976"#else\n"
6977"#define vsqrth_f16(__p0) __extension__ ({ \\\n"
6978" float16_t __s0 = __p0; \\\n"
6979" float16_t __ret; \\\n"
6980" __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n"
6981" __ret; \\\n"
6982"})\n"
6983"#endif\n"
6984"\n"
6985"#ifdef __LITTLE_ENDIAN__\n"
6986"#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n"
6987" float16_t __s0 = __p0; \\\n"
6988" float16_t __s1 = __p1; \\\n"
6989" float16_t __ret; \\\n"
6990" __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n"
6991" __ret; \\\n"
6992"})\n"
6993"#else\n"
6994"#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n"
6995" float16_t __s0 = __p0; \\\n"
6996" float16_t __s1 = __p1; \\\n"
6997" float16_t __ret; \\\n"
6998" __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n"
6999" __ret; \\\n"
7000"})\n"
7001"#endif\n"
7002"\n"
7003"#endif\n"
7004"\n"
7005"#undef __ai\n"
7006"\n"
7007"#endif /* __ARM_FP16_H */\n"
7008"" } ,
7009 { "/builtins/armintr.h" , "/*===---- armintr.h - ARM Windows intrinsics -------------------------------===\n"
7010" *\n"
7011" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
7012" * of this software and associated documentation files (the \"Software\"), to deal\n"
7013" * in the Software without restriction, including without limitation the rights\n"
7014" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
7015" * copies of the Software, and to permit persons to whom the Software is\n"
7016" * furnished to do so, subject to the following conditions:\n"
7017" *\n"
7018" * The above copyright notice and this permission notice shall be included in\n"
7019" * all copies or substantial portions of the Software.\n"
7020" *\n"
7021" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
7022" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
7023" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
7024" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
7025" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
7026" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
7027" * THE SOFTWARE.\n"
7028" *\n"
7029" *===-----------------------------------------------------------------------===\n"
7030" */\n"
7031"\n"
7032"/* Only include this if we're compiling for the windows platform. */\n"
7033"#ifndef _MSC_VER\n"
7034"#include_next <armintr.h>\n"
7035"#else\n"
7036"\n"
7037"#ifndef __ARMINTR_H\n"
7038"#define __ARMINTR_H\n"
7039"\n"
7040"typedef enum\n"
7041"{\n"
7042" _ARM_BARRIER_SY = 0xF,\n"
7043" _ARM_BARRIER_ST = 0xE,\n"
7044" _ARM_BARRIER_ISH = 0xB,\n"
7045" _ARM_BARRIER_ISHST = 0xA,\n"
7046" _ARM_BARRIER_NSH = 0x7,\n"
7047" _ARM_BARRIER_NSHST = 0x6,\n"
7048" _ARM_BARRIER_OSH = 0x3,\n"
7049" _ARM_BARRIER_OSHST = 0x2\n"
7050"} _ARMINTR_BARRIER_TYPE;\n"
7051"\n"
7052"#endif /* __ARMINTR_H */\n"
7053"#endif /* _MSC_VER */\n"
7054"" } ,
7055 { "/builtins/avx2intrin.h" , "/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===\n"
7056" *\n"
7057" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
7058" * of this software and associated documentation files (the \"Software\"), to deal\n"
7059" * in the Software without restriction, including without limitation the rights\n"
7060" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
7061" * copies of the Software, and to permit persons to whom the Software is\n"
7062" * furnished to do so, subject to the following conditions:\n"
7063" *\n"
7064" * The above copyright notice and this permission notice shall be included in\n"
7065" * all copies or substantial portions of the Software.\n"
7066" *\n"
7067" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
7068" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
7069" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
7070" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
7071" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
7072" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
7073" * THE SOFTWARE.\n"
7074" *\n"
7075" *===-----------------------------------------------------------------------===\n"
7076" */\n"
7077"\n"
7078"#ifndef __IMMINTRIN_H\n"
7079"#error \"Never use <avx2intrin.h> directly; include <immintrin.h> instead.\"\n"
7080"#endif\n"
7081"\n"
7082"#ifndef __AVX2INTRIN_H\n"
7083"#define __AVX2INTRIN_H\n"
7084"\n"
7085"/* Define the default attributes for the functions in this file. */\n"
7086"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(256)))\n"
7087"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(128)))\n"
7088"\n"
7089"/* SSE4 Multiple Packed Sums of Absolute Difference. */\n"
7090"#define _mm256_mpsadbw_epu8(X, Y, M) \\\n"
7091" (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \\\n"
7092" (__v32qi)(__m256i)(Y), (int)(M))\n"
7093"\n"
7094"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7095"_mm256_abs_epi8(__m256i __a)\n"
7096"{\n"
7097" return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);\n"
7098"}\n"
7099"\n"
7100"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7101"_mm256_abs_epi16(__m256i __a)\n"
7102"{\n"
7103" return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);\n"
7104"}\n"
7105"\n"
7106"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7107"_mm256_abs_epi32(__m256i __a)\n"
7108"{\n"
7109" return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);\n"
7110"}\n"
7111"\n"
7112"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7113"_mm256_packs_epi16(__m256i __a, __m256i __b)\n"
7114"{\n"
7115" return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);\n"
7116"}\n"
7117"\n"
7118"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7119"_mm256_packs_epi32(__m256i __a, __m256i __b)\n"
7120"{\n"
7121" return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);\n"
7122"}\n"
7123"\n"
7124"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7125"_mm256_packus_epi16(__m256i __a, __m256i __b)\n"
7126"{\n"
7127" return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);\n"
7128"}\n"
7129"\n"
7130"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7131"_mm256_packus_epi32(__m256i __V1, __m256i __V2)\n"
7132"{\n"
7133" return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);\n"
7134"}\n"
7135"\n"
7136"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7137"_mm256_add_epi8(__m256i __a, __m256i __b)\n"
7138"{\n"
7139" return (__m256i)((__v32qu)__a + (__v32qu)__b);\n"
7140"}\n"
7141"\n"
7142"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7143"_mm256_add_epi16(__m256i __a, __m256i __b)\n"
7144"{\n"
7145" return (__m256i)((__v16hu)__a + (__v16hu)__b);\n"
7146"}\n"
7147"\n"
7148"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7149"_mm256_add_epi32(__m256i __a, __m256i __b)\n"
7150"{\n"
7151" return (__m256i)((__v8su)__a + (__v8su)__b);\n"
7152"}\n"
7153"\n"
7154"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7155"_mm256_add_epi64(__m256i __a, __m256i __b)\n"
7156"{\n"
7157" return (__m256i)((__v4du)__a + (__v4du)__b);\n"
7158"}\n"
7159"\n"
7160"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7161"_mm256_adds_epi8(__m256i __a, __m256i __b)\n"
7162"{\n"
7163" return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);\n"
7164"}\n"
7165"\n"
7166"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7167"_mm256_adds_epi16(__m256i __a, __m256i __b)\n"
7168"{\n"
7169" return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);\n"
7170"}\n"
7171"\n"
7172"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7173"_mm256_adds_epu8(__m256i __a, __m256i __b)\n"
7174"{\n"
7175" return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);\n"
7176"}\n"
7177"\n"
7178"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7179"_mm256_adds_epu16(__m256i __a, __m256i __b)\n"
7180"{\n"
7181" return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);\n"
7182"}\n"
7183"\n"
7184"#define _mm256_alignr_epi8(a, b, n) \\\n"
7185" (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \\\n"
7186" (__v32qi)(__m256i)(b), (n))\n"
7187"\n"
7188"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7189"_mm256_and_si256(__m256i __a, __m256i __b)\n"
7190"{\n"
7191" return (__m256i)((__v4du)__a & (__v4du)__b);\n"
7192"}\n"
7193"\n"
7194"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7195"_mm256_andnot_si256(__m256i __a, __m256i __b)\n"
7196"{\n"
7197" return (__m256i)(~(__v4du)__a & (__v4du)__b);\n"
7198"}\n"
7199"\n"
7200"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7201"_mm256_avg_epu8(__m256i __a, __m256i __b)\n"
7202"{\n"
7203" typedef unsigned short __v32hu __attribute__((__vector_size__(64)));\n"
7204" return (__m256i)__builtin_convertvector(\n"
7205" ((__builtin_convertvector((__v32qu)__a, __v32hu) +\n"
7206" __builtin_convertvector((__v32qu)__b, __v32hu)) + 1)\n"
7207" >> 1, __v32qu);\n"
7208"}\n"
7209"\n"
7210"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7211"_mm256_avg_epu16(__m256i __a, __m256i __b)\n"
7212"{\n"
7213" typedef unsigned int __v16su __attribute__((__vector_size__(64)));\n"
7214" return (__m256i)__builtin_convertvector(\n"
7215" ((__builtin_convertvector((__v16hu)__a, __v16su) +\n"
7216" __builtin_convertvector((__v16hu)__b, __v16su)) + 1)\n"
7217" >> 1, __v16hu);\n"
7218"}\n"
7219"\n"
7220"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7221"_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)\n"
7222"{\n"
7223" return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,\n"
7224" (__v32qi)__M);\n"
7225"}\n"
7226"\n"
7227"#define _mm256_blend_epi16(V1, V2, M) \\\n"
7228" (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \\\n"
7229" (__v16hi)(__m256i)(V2), (int)(M))\n"
7230"\n"
7231"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7232"_mm256_cmpeq_epi8(__m256i __a, __m256i __b)\n"
7233"{\n"
7234" return (__m256i)((__v32qi)__a == (__v32qi)__b);\n"
7235"}\n"
7236"\n"
7237"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7238"_mm256_cmpeq_epi16(__m256i __a, __m256i __b)\n"
7239"{\n"
7240" return (__m256i)((__v16hi)__a == (__v16hi)__b);\n"
7241"}\n"
7242"\n"
7243"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7244"_mm256_cmpeq_epi32(__m256i __a, __m256i __b)\n"
7245"{\n"
7246" return (__m256i)((__v8si)__a == (__v8si)__b);\n"
7247"}\n"
7248"\n"
7249"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7250"_mm256_cmpeq_epi64(__m256i __a, __m256i __b)\n"
7251"{\n"
7252" return (__m256i)((__v4di)__a == (__v4di)__b);\n"
7253"}\n"
7254"\n"
7255"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7256"_mm256_cmpgt_epi8(__m256i __a, __m256i __b)\n"
7257"{\n"
7258" /* This function always performs a signed comparison, but __v32qi is a char\n"
7259" which may be signed or unsigned, so use __v32qs. */\n"
7260" return (__m256i)((__v32qs)__a > (__v32qs)__b);\n"
7261"}\n"
7262"\n"
7263"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7264"_mm256_cmpgt_epi16(__m256i __a, __m256i __b)\n"
7265"{\n"
7266" return (__m256i)((__v16hi)__a > (__v16hi)__b);\n"
7267"}\n"
7268"\n"
7269"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7270"_mm256_cmpgt_epi32(__m256i __a, __m256i __b)\n"
7271"{\n"
7272" return (__m256i)((__v8si)__a > (__v8si)__b);\n"
7273"}\n"
7274"\n"
7275"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7276"_mm256_cmpgt_epi64(__m256i __a, __m256i __b)\n"
7277"{\n"
7278" return (__m256i)((__v4di)__a > (__v4di)__b);\n"
7279"}\n"
7280"\n"
7281"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7282"_mm256_hadd_epi16(__m256i __a, __m256i __b)\n"
7283"{\n"
7284" return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);\n"
7285"}\n"
7286"\n"
7287"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7288"_mm256_hadd_epi32(__m256i __a, __m256i __b)\n"
7289"{\n"
7290" return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);\n"
7291"}\n"
7292"\n"
7293"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7294"_mm256_hadds_epi16(__m256i __a, __m256i __b)\n"
7295"{\n"
7296" return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);\n"
7297"}\n"
7298"\n"
7299"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7300"_mm256_hsub_epi16(__m256i __a, __m256i __b)\n"
7301"{\n"
7302" return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);\n"
7303"}\n"
7304"\n"
7305"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7306"_mm256_hsub_epi32(__m256i __a, __m256i __b)\n"
7307"{\n"
7308" return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);\n"
7309"}\n"
7310"\n"
7311"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7312"_mm256_hsubs_epi16(__m256i __a, __m256i __b)\n"
7313"{\n"
7314" return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);\n"
7315"}\n"
7316"\n"
7317"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7318"_mm256_maddubs_epi16(__m256i __a, __m256i __b)\n"
7319"{\n"
7320" return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);\n"
7321"}\n"
7322"\n"
7323"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7324"_mm256_madd_epi16(__m256i __a, __m256i __b)\n"
7325"{\n"
7326" return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);\n"
7327"}\n"
7328"\n"
7329"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7330"_mm256_max_epi8(__m256i __a, __m256i __b)\n"
7331"{\n"
7332" return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);\n"
7333"}\n"
7334"\n"
7335"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7336"_mm256_max_epi16(__m256i __a, __m256i __b)\n"
7337"{\n"
7338" return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);\n"
7339"}\n"
7340"\n"
7341"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7342"_mm256_max_epi32(__m256i __a, __m256i __b)\n"
7343"{\n"
7344" return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);\n"
7345"}\n"
7346"\n"
7347"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7348"_mm256_max_epu8(__m256i __a, __m256i __b)\n"
7349"{\n"
7350" return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);\n"
7351"}\n"
7352"\n"
7353"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7354"_mm256_max_epu16(__m256i __a, __m256i __b)\n"
7355"{\n"
7356" return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);\n"
7357"}\n"
7358"\n"
7359"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7360"_mm256_max_epu32(__m256i __a, __m256i __b)\n"
7361"{\n"
7362" return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);\n"
7363"}\n"
7364"\n"
7365"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7366"_mm256_min_epi8(__m256i __a, __m256i __b)\n"
7367"{\n"
7368" return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);\n"
7369"}\n"
7370"\n"
7371"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7372"_mm256_min_epi16(__m256i __a, __m256i __b)\n"
7373"{\n"
7374" return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);\n"
7375"}\n"
7376"\n"
7377"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7378"_mm256_min_epi32(__m256i __a, __m256i __b)\n"
7379"{\n"
7380" return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);\n"
7381"}\n"
7382"\n"
7383"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7384"_mm256_min_epu8(__m256i __a, __m256i __b)\n"
7385"{\n"
7386" return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);\n"
7387"}\n"
7388"\n"
7389"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7390"_mm256_min_epu16(__m256i __a, __m256i __b)\n"
7391"{\n"
7392" return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);\n"
7393"}\n"
7394"\n"
7395"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7396"_mm256_min_epu32(__m256i __a, __m256i __b)\n"
7397"{\n"
7398" return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);\n"
7399"}\n"
7400"\n"
7401"static __inline__ int __DEFAULT_FN_ATTRS256\n"
7402"_mm256_movemask_epi8(__m256i __a)\n"
7403"{\n"
7404" return __builtin_ia32_pmovmskb256((__v32qi)__a);\n"
7405"}\n"
7406"\n"
7407"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7408"_mm256_cvtepi8_epi16(__m128i __V)\n"
7409"{\n"
7410" /* This function always performs a signed extension, but __v16qi is a char\n"
7411" which may be signed or unsigned, so use __v16qs. */\n"
7412" return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);\n"
7413"}\n"
7414"\n"
7415"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7416"_mm256_cvtepi8_epi32(__m128i __V)\n"
7417"{\n"
7418" /* This function always performs a signed extension, but __v16qi is a char\n"
7419" which may be signed or unsigned, so use __v16qs. */\n"
7420" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n"
7421"}\n"
7422"\n"
7423"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7424"_mm256_cvtepi8_epi64(__m128i __V)\n"
7425"{\n"
7426" /* This function always performs a signed extension, but __v16qi is a char\n"
7427" which may be signed or unsigned, so use __v16qs. */\n"
7428" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);\n"
7429"}\n"
7430"\n"
7431"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7432"_mm256_cvtepi16_epi32(__m128i __V)\n"
7433"{\n"
7434" return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);\n"
7435"}\n"
7436"\n"
7437"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7438"_mm256_cvtepi16_epi64(__m128i __V)\n"
7439"{\n"
7440" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);\n"
7441"}\n"
7442"\n"
7443"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7444"_mm256_cvtepi32_epi64(__m128i __V)\n"
7445"{\n"
7446" return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);\n"
7447"}\n"
7448"\n"
7449"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7450"_mm256_cvtepu8_epi16(__m128i __V)\n"
7451"{\n"
7452" return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);\n"
7453"}\n"
7454"\n"
7455"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7456"_mm256_cvtepu8_epi32(__m128i __V)\n"
7457"{\n"
7458" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n"
7459"}\n"
7460"\n"
7461"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7462"_mm256_cvtepu8_epi64(__m128i __V)\n"
7463"{\n"
7464" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);\n"
7465"}\n"
7466"\n"
7467"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7468"_mm256_cvtepu16_epi32(__m128i __V)\n"
7469"{\n"
7470" return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);\n"
7471"}\n"
7472"\n"
7473"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7474"_mm256_cvtepu16_epi64(__m128i __V)\n"
7475"{\n"
7476" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);\n"
7477"}\n"
7478"\n"
7479"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7480"_mm256_cvtepu32_epi64(__m128i __V)\n"
7481"{\n"
7482" return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);\n"
7483"}\n"
7484"\n"
7485"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7486"_mm256_mul_epi32(__m256i __a, __m256i __b)\n"
7487"{\n"
7488" return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);\n"
7489"}\n"
7490"\n"
7491"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7492"_mm256_mulhrs_epi16(__m256i __a, __m256i __b)\n"
7493"{\n"
7494" return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);\n"
7495"}\n"
7496"\n"
7497"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7498"_mm256_mulhi_epu16(__m256i __a, __m256i __b)\n"
7499"{\n"
7500" return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);\n"
7501"}\n"
7502"\n"
7503"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7504"_mm256_mulhi_epi16(__m256i __a, __m256i __b)\n"
7505"{\n"
7506" return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);\n"
7507"}\n"
7508"\n"
7509"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7510"_mm256_mullo_epi16(__m256i __a, __m256i __b)\n"
7511"{\n"
7512" return (__m256i)((__v16hu)__a * (__v16hu)__b);\n"
7513"}\n"
7514"\n"
7515"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7516"_mm256_mullo_epi32 (__m256i __a, __m256i __b)\n"
7517"{\n"
7518" return (__m256i)((__v8su)__a * (__v8su)__b);\n"
7519"}\n"
7520"\n"
7521"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7522"_mm256_mul_epu32(__m256i __a, __m256i __b)\n"
7523"{\n"
7524" return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);\n"
7525"}\n"
7526"\n"
7527"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7528"_mm256_or_si256(__m256i __a, __m256i __b)\n"
7529"{\n"
7530" return (__m256i)((__v4du)__a | (__v4du)__b);\n"
7531"}\n"
7532"\n"
7533"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7534"_mm256_sad_epu8(__m256i __a, __m256i __b)\n"
7535"{\n"
7536" return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);\n"
7537"}\n"
7538"\n"
7539"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7540"_mm256_shuffle_epi8(__m256i __a, __m256i __b)\n"
7541"{\n"
7542" return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);\n"
7543"}\n"
7544"\n"
7545"#define _mm256_shuffle_epi32(a, imm) \\\n"
7546" (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))\n"
7547"\n"
7548"#define _mm256_shufflehi_epi16(a, imm) \\\n"
7549" (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))\n"
7550"\n"
7551"#define _mm256_shufflelo_epi16(a, imm) \\\n"
7552" (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))\n"
7553"\n"
7554"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7555"_mm256_sign_epi8(__m256i __a, __m256i __b)\n"
7556"{\n"
7557" return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);\n"
7558"}\n"
7559"\n"
7560"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7561"_mm256_sign_epi16(__m256i __a, __m256i __b)\n"
7562"{\n"
7563" return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);\n"
7564"}\n"
7565"\n"
7566"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7567"_mm256_sign_epi32(__m256i __a, __m256i __b)\n"
7568"{\n"
7569" return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);\n"
7570"}\n"
7571"\n"
7572"#define _mm256_slli_si256(a, imm) \\\n"
7573" (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n"
7574"\n"
7575"#define _mm256_bslli_epi128(a, imm) \\\n"
7576" (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n"
7577"\n"
7578"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7579"_mm256_slli_epi16(__m256i __a, int __count)\n"
7580"{\n"
7581" return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);\n"
7582"}\n"
7583"\n"
7584"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7585"_mm256_sll_epi16(__m256i __a, __m128i __count)\n"
7586"{\n"
7587" return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);\n"
7588"}\n"
7589"\n"
7590"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7591"_mm256_slli_epi32(__m256i __a, int __count)\n"
7592"{\n"
7593" return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);\n"
7594"}\n"
7595"\n"
7596"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7597"_mm256_sll_epi32(__m256i __a, __m128i __count)\n"
7598"{\n"
7599" return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);\n"
7600"}\n"
7601"\n"
7602"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7603"_mm256_slli_epi64(__m256i __a, int __count)\n"
7604"{\n"
7605" return __builtin_ia32_psllqi256((__v4di)__a, __count);\n"
7606"}\n"
7607"\n"
7608"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7609"_mm256_sll_epi64(__m256i __a, __m128i __count)\n"
7610"{\n"
7611" return __builtin_ia32_psllq256((__v4di)__a, __count);\n"
7612"}\n"
7613"\n"
7614"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7615"_mm256_srai_epi16(__m256i __a, int __count)\n"
7616"{\n"
7617" return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);\n"
7618"}\n"
7619"\n"
7620"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7621"_mm256_sra_epi16(__m256i __a, __m128i __count)\n"
7622"{\n"
7623" return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);\n"
7624"}\n"
7625"\n"
7626"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7627"_mm256_srai_epi32(__m256i __a, int __count)\n"
7628"{\n"
7629" return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);\n"
7630"}\n"
7631"\n"
7632"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7633"_mm256_sra_epi32(__m256i __a, __m128i __count)\n"
7634"{\n"
7635" return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);\n"
7636"}\n"
7637"\n"
7638"#define _mm256_srli_si256(a, imm) \\\n"
7639" (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n"
7640"\n"
7641"#define _mm256_bsrli_epi128(a, imm) \\\n"
7642" (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n"
7643"\n"
7644"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7645"_mm256_srli_epi16(__m256i __a, int __count)\n"
7646"{\n"
7647" return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);\n"
7648"}\n"
7649"\n"
7650"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7651"_mm256_srl_epi16(__m256i __a, __m128i __count)\n"
7652"{\n"
7653" return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);\n"
7654"}\n"
7655"\n"
7656"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7657"_mm256_srli_epi32(__m256i __a, int __count)\n"
7658"{\n"
7659" return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);\n"
7660"}\n"
7661"\n"
7662"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7663"_mm256_srl_epi32(__m256i __a, __m128i __count)\n"
7664"{\n"
7665" return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);\n"
7666"}\n"
7667"\n"
7668"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7669"_mm256_srli_epi64(__m256i __a, int __count)\n"
7670"{\n"
7671" return __builtin_ia32_psrlqi256((__v4di)__a, __count);\n"
7672"}\n"
7673"\n"
7674"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7675"_mm256_srl_epi64(__m256i __a, __m128i __count)\n"
7676"{\n"
7677" return __builtin_ia32_psrlq256((__v4di)__a, __count);\n"
7678"}\n"
7679"\n"
7680"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7681"_mm256_sub_epi8(__m256i __a, __m256i __b)\n"
7682"{\n"
7683" return (__m256i)((__v32qu)__a - (__v32qu)__b);\n"
7684"}\n"
7685"\n"
7686"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7687"_mm256_sub_epi16(__m256i __a, __m256i __b)\n"
7688"{\n"
7689" return (__m256i)((__v16hu)__a - (__v16hu)__b);\n"
7690"}\n"
7691"\n"
7692"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7693"_mm256_sub_epi32(__m256i __a, __m256i __b)\n"
7694"{\n"
7695" return (__m256i)((__v8su)__a - (__v8su)__b);\n"
7696"}\n"
7697"\n"
7698"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7699"_mm256_sub_epi64(__m256i __a, __m256i __b)\n"
7700"{\n"
7701" return (__m256i)((__v4du)__a - (__v4du)__b);\n"
7702"}\n"
7703"\n"
7704"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7705"_mm256_subs_epi8(__m256i __a, __m256i __b)\n"
7706"{\n"
7707" return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);\n"
7708"}\n"
7709"\n"
7710"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7711"_mm256_subs_epi16(__m256i __a, __m256i __b)\n"
7712"{\n"
7713" return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);\n"
7714"}\n"
7715"\n"
7716"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7717"_mm256_subs_epu8(__m256i __a, __m256i __b)\n"
7718"{\n"
7719" return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);\n"
7720"}\n"
7721"\n"
7722"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7723"_mm256_subs_epu16(__m256i __a, __m256i __b)\n"
7724"{\n"
7725" return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);\n"
7726"}\n"
7727"\n"
7728"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7729"_mm256_unpackhi_epi8(__m256i __a, __m256i __b)\n"
7730"{\n"
7731" return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);\n"
7732"}\n"
7733"\n"
7734"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7735"_mm256_unpackhi_epi16(__m256i __a, __m256i __b)\n"
7736"{\n"
7737" return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n"
7738"}\n"
7739"\n"
7740"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7741"_mm256_unpackhi_epi32(__m256i __a, __m256i __b)\n"
7742"{\n"
7743" return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);\n"
7744"}\n"
7745"\n"
7746"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7747"_mm256_unpackhi_epi64(__m256i __a, __m256i __b)\n"
7748"{\n"
7749" return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);\n"
7750"}\n"
7751"\n"
7752"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7753"_mm256_unpacklo_epi8(__m256i __a, __m256i __b)\n"
7754"{\n"
7755" return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);\n"
7756"}\n"
7757"\n"
7758"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7759"_mm256_unpacklo_epi16(__m256i __a, __m256i __b)\n"
7760"{\n"
7761" return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);\n"
7762"}\n"
7763"\n"
7764"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7765"_mm256_unpacklo_epi32(__m256i __a, __m256i __b)\n"
7766"{\n"
7767" return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);\n"
7768"}\n"
7769"\n"
7770"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7771"_mm256_unpacklo_epi64(__m256i __a, __m256i __b)\n"
7772"{\n"
7773" return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);\n"
7774"}\n"
7775"\n"
7776"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7777"_mm256_xor_si256(__m256i __a, __m256i __b)\n"
7778"{\n"
7779" return (__m256i)((__v4du)__a ^ (__v4du)__b);\n"
7780"}\n"
7781"\n"
7782"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7783"_mm256_stream_load_si256(__m256i const *__V)\n"
7784"{\n"
7785" typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n"
7786" return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);\n"
7787"}\n"
7788"\n"
7789"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
7790"_mm_broadcastss_ps(__m128 __X)\n"
7791"{\n"
7792" return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);\n"
7793"}\n"
7794"\n"
7795"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
7796"_mm_broadcastsd_pd(__m128d __a)\n"
7797"{\n"
7798" return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n"
7799"}\n"
7800"\n"
7801"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
7802"_mm256_broadcastss_ps(__m128 __X)\n"
7803"{\n"
7804" return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7805"}\n"
7806"\n"
7807"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
7808"_mm256_broadcastsd_pd(__m128d __X)\n"
7809"{\n"
7810" return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);\n"
7811"}\n"
7812"\n"
7813"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7814"_mm256_broadcastsi128_si256(__m128i __X)\n"
7815"{\n"
7816" return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1);\n"
7817"}\n"
7818"\n"
7819"#define _mm_blend_epi32(V1, V2, M) \\\n"
7820" (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \\\n"
7821" (__v4si)(__m128i)(V2), (int)(M))\n"
7822"\n"
7823"#define _mm256_blend_epi32(V1, V2, M) \\\n"
7824" (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \\\n"
7825" (__v8si)(__m256i)(V2), (int)(M))\n"
7826"\n"
7827"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7828"_mm256_broadcastb_epi8(__m128i __X)\n"
7829"{\n"
7830" return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7831"}\n"
7832"\n"
7833"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7834"_mm256_broadcastw_epi16(__m128i __X)\n"
7835"{\n"
7836" return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7837"}\n"
7838"\n"
7839"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7840"_mm256_broadcastd_epi32(__m128i __X)\n"
7841"{\n"
7842" return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7843"}\n"
7844"\n"
7845"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7846"_mm256_broadcastq_epi64(__m128i __X)\n"
7847"{\n"
7848" return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);\n"
7849"}\n"
7850"\n"
7851"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7852"_mm_broadcastb_epi8(__m128i __X)\n"
7853"{\n"
7854" return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7855"}\n"
7856"\n"
7857"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7858"_mm_broadcastw_epi16(__m128i __X)\n"
7859"{\n"
7860" return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7861"}\n"
7862"\n"
7863"\n"
7864"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7865"_mm_broadcastd_epi32(__m128i __X)\n"
7866"{\n"
7867" return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);\n"
7868"}\n"
7869"\n"
7870"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7871"_mm_broadcastq_epi64(__m128i __X)\n"
7872"{\n"
7873" return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);\n"
7874"}\n"
7875"\n"
7876"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7877"_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)\n"
7878"{\n"
7879" return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);\n"
7880"}\n"
7881"\n"
7882"#define _mm256_permute4x64_pd(V, M) \\\n"
7883" (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))\n"
7884"\n"
7885"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
7886"_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)\n"
7887"{\n"
7888" return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);\n"
7889"}\n"
7890"\n"
7891"#define _mm256_permute4x64_epi64(V, M) \\\n"
7892" (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))\n"
7893"\n"
7894"#define _mm256_permute2x128_si256(V1, V2, M) \\\n"
7895" (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))\n"
7896"\n"
7897"#define _mm256_extracti128_si256(V, M) \\\n"
7898" (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))\n"
7899"\n"
7900"#define _mm256_inserti128_si256(V1, V2, M) \\\n"
7901" (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \\\n"
7902" (__v2di)(__m128i)(V2), (int)(M))\n"
7903"\n"
7904"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7905"_mm256_maskload_epi32(int const *__X, __m256i __M)\n"
7906"{\n"
7907" return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);\n"
7908"}\n"
7909"\n"
7910"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7911"_mm256_maskload_epi64(long long const *__X, __m256i __M)\n"
7912"{\n"
7913" return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M);\n"
7914"}\n"
7915"\n"
7916"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7917"_mm_maskload_epi32(int const *__X, __m128i __M)\n"
7918"{\n"
7919" return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);\n"
7920"}\n"
7921"\n"
7922"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7923"_mm_maskload_epi64(long long const *__X, __m128i __M)\n"
7924"{\n"
7925" return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);\n"
7926"}\n"
7927"\n"
7928"static __inline__ void __DEFAULT_FN_ATTRS256\n"
7929"_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)\n"
7930"{\n"
7931" __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);\n"
7932"}\n"
7933"\n"
7934"static __inline__ void __DEFAULT_FN_ATTRS256\n"
7935"_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)\n"
7936"{\n"
7937" __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y);\n"
7938"}\n"
7939"\n"
7940"static __inline__ void __DEFAULT_FN_ATTRS128\n"
7941"_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)\n"
7942"{\n"
7943" __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);\n"
7944"}\n"
7945"\n"
7946"static __inline__ void __DEFAULT_FN_ATTRS128\n"
7947"_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)\n"
7948"{\n"
7949" __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y);\n"
7950"}\n"
7951"\n"
7952"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7953"_mm256_sllv_epi32(__m256i __X, __m256i __Y)\n"
7954"{\n"
7955" return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);\n"
7956"}\n"
7957"\n"
7958"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7959"_mm_sllv_epi32(__m128i __X, __m128i __Y)\n"
7960"{\n"
7961" return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);\n"
7962"}\n"
7963"\n"
7964"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7965"_mm256_sllv_epi64(__m256i __X, __m256i __Y)\n"
7966"{\n"
7967" return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);\n"
7968"}\n"
7969"\n"
7970"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7971"_mm_sllv_epi64(__m128i __X, __m128i __Y)\n"
7972"{\n"
7973" return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);\n"
7974"}\n"
7975"\n"
7976"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7977"_mm256_srav_epi32(__m256i __X, __m256i __Y)\n"
7978"{\n"
7979" return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);\n"
7980"}\n"
7981"\n"
7982"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7983"_mm_srav_epi32(__m128i __X, __m128i __Y)\n"
7984"{\n"
7985" return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);\n"
7986"}\n"
7987"\n"
7988"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7989"_mm256_srlv_epi32(__m256i __X, __m256i __Y)\n"
7990"{\n"
7991" return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);\n"
7992"}\n"
7993"\n"
7994"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7995"_mm_srlv_epi32(__m128i __X, __m128i __Y)\n"
7996"{\n"
7997" return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);\n"
7998"}\n"
7999"\n"
8000"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
8001"_mm256_srlv_epi64(__m256i __X, __m256i __Y)\n"
8002"{\n"
8003" return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);\n"
8004"}\n"
8005"\n"
8006"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
8007"_mm_srlv_epi64(__m128i __X, __m128i __Y)\n"
8008"{\n"
8009" return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);\n"
8010"}\n"
8011"\n"
8012"#define _mm_mask_i32gather_pd(a, m, i, mask, s) \\\n"
8013" (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \\\n"
8014" (double const *)(m), \\\n"
8015" (__v4si)(__m128i)(i), \\\n"
8016" (__v2df)(__m128d)(mask), (s))\n"
8017"\n"
8018"#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \\\n"
8019" (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \\\n"
8020" (double const *)(m), \\\n"
8021" (__v4si)(__m128i)(i), \\\n"
8022" (__v4df)(__m256d)(mask), (s))\n"
8023"\n"
8024"#define _mm_mask_i64gather_pd(a, m, i, mask, s) \\\n"
8025" (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \\\n"
8026" (double const *)(m), \\\n"
8027" (__v2di)(__m128i)(i), \\\n"
8028" (__v2df)(__m128d)(mask), (s))\n"
8029"\n"
8030"#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \\\n"
8031" (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \\\n"
8032" (double const *)(m), \\\n"
8033" (__v4di)(__m256i)(i), \\\n"
8034" (__v4df)(__m256d)(mask), (s))\n"
8035"\n"
8036"#define _mm_mask_i32gather_ps(a, m, i, mask, s) \\\n"
8037" (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \\\n"
8038" (float const *)(m), \\\n"
8039" (__v4si)(__m128i)(i), \\\n"
8040" (__v4sf)(__m128)(mask), (s))\n"
8041"\n"
8042"#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \\\n"
8043" (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \\\n"
8044" (float const *)(m), \\\n"
8045" (__v8si)(__m256i)(i), \\\n"
8046" (__v8sf)(__m256)(mask), (s))\n"
8047"\n"
8048"#define _mm_mask_i64gather_ps(a, m, i, mask, s) \\\n"
8049" (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \\\n"
8050" (float const *)(m), \\\n"
8051" (__v2di)(__m128i)(i), \\\n"
8052" (__v4sf)(__m128)(mask), (s))\n"
8053"\n"
8054"#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \\\n"
8055" (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \\\n"
8056" (float const *)(m), \\\n"
8057" (__v4di)(__m256i)(i), \\\n"
8058" (__v4sf)(__m128)(mask), (s))\n"
8059"\n"
8060"#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \\\n"
8061" (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \\\n"
8062" (int const *)(m), \\\n"
8063" (__v4si)(__m128i)(i), \\\n"
8064" (__v4si)(__m128i)(mask), (s))\n"
8065"\n"
8066"#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \\\n"
8067" (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \\\n"
8068" (int const *)(m), \\\n"
8069" (__v8si)(__m256i)(i), \\\n"
8070" (__v8si)(__m256i)(mask), (s))\n"
8071"\n"
8072"#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \\\n"
8073" (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \\\n"
8074" (int const *)(m), \\\n"
8075" (__v2di)(__m128i)(i), \\\n"
8076" (__v4si)(__m128i)(mask), (s))\n"
8077"\n"
8078"#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \\\n"
8079" (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \\\n"
8080" (int const *)(m), \\\n"
8081" (__v4di)(__m256i)(i), \\\n"
8082" (__v4si)(__m128i)(mask), (s))\n"
8083"\n"
8084"#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \\\n"
8085" (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \\\n"
8086" (long long const *)(m), \\\n"
8087" (__v4si)(__m128i)(i), \\\n"
8088" (__v2di)(__m128i)(mask), (s))\n"
8089"\n"
8090"#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \\\n"
8091" (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \\\n"
8092" (long long const *)(m), \\\n"
8093" (__v4si)(__m128i)(i), \\\n"
8094" (__v4di)(__m256i)(mask), (s))\n"
8095"\n"
8096"#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \\\n"
8097" (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \\\n"
8098" (long long const *)(m), \\\n"
8099" (__v2di)(__m128i)(i), \\\n"
8100" (__v2di)(__m128i)(mask), (s))\n"
8101"\n"
8102"#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \\\n"
8103" (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \\\n"
8104" (long long const *)(m), \\\n"
8105" (__v4di)(__m256i)(i), \\\n"
8106" (__v4di)(__m256i)(mask), (s))\n"
8107"\n"
8108"#define _mm_i32gather_pd(m, i, s) \\\n"
8109" (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \\\n"
8110" (double const *)(m), \\\n"
8111" (__v4si)(__m128i)(i), \\\n"
8112" (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n"
8113" _mm_setzero_pd()), \\\n"
8114" (s))\n"
8115"\n"
8116"#define _mm256_i32gather_pd(m, i, s) \\\n"
8117" (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \\\n"
8118" (double const *)(m), \\\n"
8119" (__v4si)(__m128i)(i), \\\n"
8120" (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n"
8121" _mm256_setzero_pd(), \\\n"
8122" _CMP_EQ_OQ), \\\n"
8123" (s))\n"
8124"\n"
8125"#define _mm_i64gather_pd(m, i, s) \\\n"
8126" (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \\\n"
8127" (double const *)(m), \\\n"
8128" (__v2di)(__m128i)(i), \\\n"
8129" (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n"
8130" _mm_setzero_pd()), \\\n"
8131" (s))\n"
8132"\n"
8133"#define _mm256_i64gather_pd(m, i, s) \\\n"
8134" (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \\\n"
8135" (double const *)(m), \\\n"
8136" (__v4di)(__m256i)(i), \\\n"
8137" (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n"
8138" _mm256_setzero_pd(), \\\n"
8139" _CMP_EQ_OQ), \\\n"
8140" (s))\n"
8141"\n"
8142"#define _mm_i32gather_ps(m, i, s) \\\n"
8143" (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \\\n"
8144" (float const *)(m), \\\n"
8145" (__v4si)(__m128i)(i), \\\n"
8146" (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n"
8147" _mm_setzero_ps()), \\\n"
8148" (s))\n"
8149"\n"
8150"#define _mm256_i32gather_ps(m, i, s) \\\n"
8151" (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \\\n"
8152" (float const *)(m), \\\n"
8153" (__v8si)(__m256i)(i), \\\n"
8154" (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \\\n"
8155" _mm256_setzero_ps(), \\\n"
8156" _CMP_EQ_OQ), \\\n"
8157" (s))\n"
8158"\n"
8159"#define _mm_i64gather_ps(m, i, s) \\\n"
8160" (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \\\n"
8161" (float const *)(m), \\\n"
8162" (__v2di)(__m128i)(i), \\\n"
8163" (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n"
8164" _mm_setzero_ps()), \\\n"
8165" (s))\n"
8166"\n"
8167"#define _mm256_i64gather_ps(m, i, s) \\\n"
8168" (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \\\n"
8169" (float const *)(m), \\\n"
8170" (__v4di)(__m256i)(i), \\\n"
8171" (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n"
8172" _mm_setzero_ps()), \\\n"
8173" (s))\n"
8174"\n"
8175"#define _mm_i32gather_epi32(m, i, s) \\\n"
8176" (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \\\n"
8177" (int const *)(m), (__v4si)(__m128i)(i), \\\n"
8178" (__v4si)_mm_set1_epi32(-1), (s))\n"
8179"\n"
8180"#define _mm256_i32gather_epi32(m, i, s) \\\n"
8181" (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \\\n"
8182" (int const *)(m), (__v8si)(__m256i)(i), \\\n"
8183" (__v8si)_mm256_set1_epi32(-1), (s))\n"
8184"\n"
8185"#define _mm_i64gather_epi32(m, i, s) \\\n"
8186" (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \\\n"
8187" (int const *)(m), (__v2di)(__m128i)(i), \\\n"
8188" (__v4si)_mm_set1_epi32(-1), (s))\n"
8189"\n"
8190"#define _mm256_i64gather_epi32(m, i, s) \\\n"
8191" (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \\\n"
8192" (int const *)(m), (__v4di)(__m256i)(i), \\\n"
8193" (__v4si)_mm_set1_epi32(-1), (s))\n"
8194"\n"
8195"#define _mm_i32gather_epi64(m, i, s) \\\n"
8196" (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \\\n"
8197" (long long const *)(m), \\\n"
8198" (__v4si)(__m128i)(i), \\\n"
8199" (__v2di)_mm_set1_epi64x(-1), (s))\n"
8200"\n"
8201"#define _mm256_i32gather_epi64(m, i, s) \\\n"
8202" (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \\\n"
8203" (long long const *)(m), \\\n"
8204" (__v4si)(__m128i)(i), \\\n"
8205" (__v4di)_mm256_set1_epi64x(-1), (s))\n"
8206"\n"
8207"#define _mm_i64gather_epi64(m, i, s) \\\n"
8208" (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \\\n"
8209" (long long const *)(m), \\\n"
8210" (__v2di)(__m128i)(i), \\\n"
8211" (__v2di)_mm_set1_epi64x(-1), (s))\n"
8212"\n"
8213"#define _mm256_i64gather_epi64(m, i, s) \\\n"
8214" (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \\\n"
8215" (long long const *)(m), \\\n"
8216" (__v4di)(__m256i)(i), \\\n"
8217" (__v4di)_mm256_set1_epi64x(-1), (s))\n"
8218"\n"
8219"#undef __DEFAULT_FN_ATTRS256\n"
8220"#undef __DEFAULT_FN_ATTRS128\n"
8221"\n"
8222"#endif /* __AVX2INTRIN_H */\n"
8223"" } ,
8224 { "/builtins/avxintrin.h" , "/*===---- avxintrin.h - AVX intrinsics -------------------------------------===\n"
8225" *\n"
8226" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
8227" * of this software and associated documentation files (the \"Software\"), to deal\n"
8228" * in the Software without restriction, including without limitation the rights\n"
8229" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
8230" * copies of the Software, and to permit persons to whom the Software is\n"
8231" * furnished to do so, subject to the following conditions:\n"
8232" *\n"
8233" * The above copyright notice and this permission notice shall be included in\n"
8234" * all copies or substantial portions of the Software.\n"
8235" *\n"
8236" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
8237" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
8238" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
8239" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
8240" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
8241" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
8242" * THE SOFTWARE.\n"
8243" *\n"
8244" *===-----------------------------------------------------------------------===\n"
8245" */\n"
8246"\n"
8247"#ifndef __IMMINTRIN_H\n"
8248"#error \"Never use <avxintrin.h> directly; include <immintrin.h> instead.\"\n"
8249"#endif\n"
8250"\n"
8251"#ifndef __AVXINTRIN_H\n"
8252"#define __AVXINTRIN_H\n"
8253"\n"
8254"typedef double __v4df __attribute__ ((__vector_size__ (32)));\n"
8255"typedef float __v8sf __attribute__ ((__vector_size__ (32)));\n"
8256"typedef long long __v4di __attribute__ ((__vector_size__ (32)));\n"
8257"typedef int __v8si __attribute__ ((__vector_size__ (32)));\n"
8258"typedef short __v16hi __attribute__ ((__vector_size__ (32)));\n"
8259"typedef char __v32qi __attribute__ ((__vector_size__ (32)));\n"
8260"\n"
8261"/* Unsigned types */\n"
8262"typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));\n"
8263"typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n"
8264"typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n"
8265"typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));\n"
8266"\n"
8267"/* We need an explicitly signed variant for char. Note that this shouldn't\n"
8268" * appear in the interface though. */\n"
8269"typedef signed char __v32qs __attribute__((__vector_size__(32)));\n"
8270"\n"
8271"typedef float __m256 __attribute__ ((__vector_size__ (32)));\n"
8272"typedef double __m256d __attribute__((__vector_size__(32)));\n"
8273"typedef long long __m256i __attribute__((__vector_size__(32)));\n"
8274"\n"
8275"/* Define the default attributes for the functions in this file. */\n"
8276"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(256)))\n"
8277"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(128)))\n"
8278"\n"
8279"/* Arithmetic */\n"
8280"/// Adds two 256-bit vectors of [4 x double].\n"
8281"///\n"
8282"/// \\headerfile <x86intrin.h>\n"
8283"///\n"
8284"/// This intrinsic corresponds to the <c> VADDPD </c> instruction.\n"
8285"///\n"
8286"/// \\param __a\n"
8287"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8288"/// \\param __b\n"
8289"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8290"/// \\returns A 256-bit vector of [4 x double] containing the sums of both\n"
8291"/// operands.\n"
8292"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8293"_mm256_add_pd(__m256d __a, __m256d __b)\n"
8294"{\n"
8295" return (__m256d)((__v4df)__a+(__v4df)__b);\n"
8296"}\n"
8297"\n"
8298"/// Adds two 256-bit vectors of [8 x float].\n"
8299"///\n"
8300"/// \\headerfile <x86intrin.h>\n"
8301"///\n"
8302"/// This intrinsic corresponds to the <c> VADDPS </c> instruction.\n"
8303"///\n"
8304"/// \\param __a\n"
8305"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8306"/// \\param __b\n"
8307"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8308"/// \\returns A 256-bit vector of [8 x float] containing the sums of both\n"
8309"/// operands.\n"
8310"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8311"_mm256_add_ps(__m256 __a, __m256 __b)\n"
8312"{\n"
8313" return (__m256)((__v8sf)__a+(__v8sf)__b);\n"
8314"}\n"
8315"\n"
8316"/// Subtracts two 256-bit vectors of [4 x double].\n"
8317"///\n"
8318"/// \\headerfile <x86intrin.h>\n"
8319"///\n"
8320"/// This intrinsic corresponds to the <c> VSUBPD </c> instruction.\n"
8321"///\n"
8322"/// \\param __a\n"
8323"/// A 256-bit vector of [4 x double] containing the minuend.\n"
8324"/// \\param __b\n"
8325"/// A 256-bit vector of [4 x double] containing the subtrahend.\n"
8326"/// \\returns A 256-bit vector of [4 x double] containing the differences between\n"
8327"/// both operands.\n"
8328"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8329"_mm256_sub_pd(__m256d __a, __m256d __b)\n"
8330"{\n"
8331" return (__m256d)((__v4df)__a-(__v4df)__b);\n"
8332"}\n"
8333"\n"
8334"/// Subtracts two 256-bit vectors of [8 x float].\n"
8335"///\n"
8336"/// \\headerfile <x86intrin.h>\n"
8337"///\n"
8338"/// This intrinsic corresponds to the <c> VSUBPS </c> instruction.\n"
8339"///\n"
8340"/// \\param __a\n"
8341"/// A 256-bit vector of [8 x float] containing the minuend.\n"
8342"/// \\param __b\n"
8343"/// A 256-bit vector of [8 x float] containing the subtrahend.\n"
8344"/// \\returns A 256-bit vector of [8 x float] containing the differences between\n"
8345"/// both operands.\n"
8346"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8347"_mm256_sub_ps(__m256 __a, __m256 __b)\n"
8348"{\n"
8349" return (__m256)((__v8sf)__a-(__v8sf)__b);\n"
8350"}\n"
8351"\n"
8352"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
8353"/// two 256-bit vectors of [4 x double].\n"
8354"///\n"
8355"/// \\headerfile <x86intrin.h>\n"
8356"///\n"
8357"/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n"
8358"///\n"
8359"/// \\param __a\n"
8360"/// A 256-bit vector of [4 x double] containing the left source operand.\n"
8361"/// \\param __b\n"
8362"/// A 256-bit vector of [4 x double] containing the right source operand.\n"
8363"/// \\returns A 256-bit vector of [4 x double] containing the alternating sums\n"
8364"/// and differences between both operands.\n"
8365"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8366"_mm256_addsub_pd(__m256d __a, __m256d __b)\n"
8367"{\n"
8368" return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);\n"
8369"}\n"
8370"\n"
8371"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
8372"/// two 256-bit vectors of [8 x float].\n"
8373"///\n"
8374"/// \\headerfile <x86intrin.h>\n"
8375"///\n"
8376"/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n"
8377"///\n"
8378"/// \\param __a\n"
8379"/// A 256-bit vector of [8 x float] containing the left source operand.\n"
8380"/// \\param __b\n"
8381"/// A 256-bit vector of [8 x float] containing the right source operand.\n"
8382"/// \\returns A 256-bit vector of [8 x float] containing the alternating sums and\n"
8383"/// differences between both operands.\n"
8384"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8385"_mm256_addsub_ps(__m256 __a, __m256 __b)\n"
8386"{\n"
8387" return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);\n"
8388"}\n"
8389"\n"
8390"/// Divides two 256-bit vectors of [4 x double].\n"
8391"///\n"
8392"/// \\headerfile <x86intrin.h>\n"
8393"///\n"
8394"/// This intrinsic corresponds to the <c> VDIVPD </c> instruction.\n"
8395"///\n"
8396"/// \\param __a\n"
8397"/// A 256-bit vector of [4 x double] containing the dividend.\n"
8398"/// \\param __b\n"
8399"/// A 256-bit vector of [4 x double] containing the divisor.\n"
8400"/// \\returns A 256-bit vector of [4 x double] containing the quotients of both\n"
8401"/// operands.\n"
8402"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8403"_mm256_div_pd(__m256d __a, __m256d __b)\n"
8404"{\n"
8405" return (__m256d)((__v4df)__a/(__v4df)__b);\n"
8406"}\n"
8407"\n"
8408"/// Divides two 256-bit vectors of [8 x float].\n"
8409"///\n"
8410"/// \\headerfile <x86intrin.h>\n"
8411"///\n"
8412"/// This intrinsic corresponds to the <c> VDIVPS </c> instruction.\n"
8413"///\n"
8414"/// \\param __a\n"
8415"/// A 256-bit vector of [8 x float] containing the dividend.\n"
8416"/// \\param __b\n"
8417"/// A 256-bit vector of [8 x float] containing the divisor.\n"
8418"/// \\returns A 256-bit vector of [8 x float] containing the quotients of both\n"
8419"/// operands.\n"
8420"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8421"_mm256_div_ps(__m256 __a, __m256 __b)\n"
8422"{\n"
8423" return (__m256)((__v8sf)__a/(__v8sf)__b);\n"
8424"}\n"
8425"\n"
8426"/// Compares two 256-bit vectors of [4 x double] and returns the greater\n"
8427"/// of each pair of values.\n"
8428"///\n"
8429"/// \\headerfile <x86intrin.h>\n"
8430"///\n"
8431"/// This intrinsic corresponds to the <c> VMAXPD </c> instruction.\n"
8432"///\n"
8433"/// \\param __a\n"
8434"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8435"/// \\param __b\n"
8436"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8437"/// \\returns A 256-bit vector of [4 x double] containing the maximum values\n"
8438"/// between both operands.\n"
8439"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8440"_mm256_max_pd(__m256d __a, __m256d __b)\n"
8441"{\n"
8442" return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);\n"
8443"}\n"
8444"\n"
8445"/// Compares two 256-bit vectors of [8 x float] and returns the greater\n"
8446"/// of each pair of values.\n"
8447"///\n"
8448"/// \\headerfile <x86intrin.h>\n"
8449"///\n"
8450"/// This intrinsic corresponds to the <c> VMAXPS </c> instruction.\n"
8451"///\n"
8452"/// \\param __a\n"
8453"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8454"/// \\param __b\n"
8455"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8456"/// \\returns A 256-bit vector of [8 x float] containing the maximum values\n"
8457"/// between both operands.\n"
8458"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8459"_mm256_max_ps(__m256 __a, __m256 __b)\n"
8460"{\n"
8461" return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);\n"
8462"}\n"
8463"\n"
8464"/// Compares two 256-bit vectors of [4 x double] and returns the lesser\n"
8465"/// of each pair of values.\n"
8466"///\n"
8467"/// \\headerfile <x86intrin.h>\n"
8468"///\n"
8469"/// This intrinsic corresponds to the <c> VMINPD </c> instruction.\n"
8470"///\n"
8471"/// \\param __a\n"
8472"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8473"/// \\param __b\n"
8474"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8475"/// \\returns A 256-bit vector of [4 x double] containing the minimum values\n"
8476"/// between both operands.\n"
8477"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8478"_mm256_min_pd(__m256d __a, __m256d __b)\n"
8479"{\n"
8480" return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);\n"
8481"}\n"
8482"\n"
8483"/// Compares two 256-bit vectors of [8 x float] and returns the lesser\n"
8484"/// of each pair of values.\n"
8485"///\n"
8486"/// \\headerfile <x86intrin.h>\n"
8487"///\n"
8488"/// This intrinsic corresponds to the <c> VMINPS </c> instruction.\n"
8489"///\n"
8490"/// \\param __a\n"
8491"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8492"/// \\param __b\n"
8493"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8494"/// \\returns A 256-bit vector of [8 x float] containing the minimum values\n"
8495"/// between both operands.\n"
8496"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8497"_mm256_min_ps(__m256 __a, __m256 __b)\n"
8498"{\n"
8499" return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);\n"
8500"}\n"
8501"\n"
8502"/// Multiplies two 256-bit vectors of [4 x double].\n"
8503"///\n"
8504"/// \\headerfile <x86intrin.h>\n"
8505"///\n"
8506"/// This intrinsic corresponds to the <c> VMULPD </c> instruction.\n"
8507"///\n"
8508"/// \\param __a\n"
8509"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8510"/// \\param __b\n"
8511"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8512"/// \\returns A 256-bit vector of [4 x double] containing the products of both\n"
8513"/// operands.\n"
8514"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8515"_mm256_mul_pd(__m256d __a, __m256d __b)\n"
8516"{\n"
8517" return (__m256d)((__v4df)__a * (__v4df)__b);\n"
8518"}\n"
8519"\n"
8520"/// Multiplies two 256-bit vectors of [8 x float].\n"
8521"///\n"
8522"/// \\headerfile <x86intrin.h>\n"
8523"///\n"
8524"/// This intrinsic corresponds to the <c> VMULPS </c> instruction.\n"
8525"///\n"
8526"/// \\param __a\n"
8527"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8528"/// \\param __b\n"
8529"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8530"/// \\returns A 256-bit vector of [8 x float] containing the products of both\n"
8531"/// operands.\n"
8532"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8533"_mm256_mul_ps(__m256 __a, __m256 __b)\n"
8534"{\n"
8535" return (__m256)((__v8sf)__a * (__v8sf)__b);\n"
8536"}\n"
8537"\n"
8538"/// Calculates the square roots of the values in a 256-bit vector of\n"
8539"/// [4 x double].\n"
8540"///\n"
8541"/// \\headerfile <x86intrin.h>\n"
8542"///\n"
8543"/// This intrinsic corresponds to the <c> VSQRTPD </c> instruction.\n"
8544"///\n"
8545"/// \\param __a\n"
8546"/// A 256-bit vector of [4 x double].\n"
8547"/// \\returns A 256-bit vector of [4 x double] containing the square roots of the\n"
8548"/// values in the operand.\n"
8549"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8550"_mm256_sqrt_pd(__m256d __a)\n"
8551"{\n"
8552" return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);\n"
8553"}\n"
8554"\n"
8555"/// Calculates the square roots of the values in a 256-bit vector of\n"
8556"/// [8 x float].\n"
8557"///\n"
8558"/// \\headerfile <x86intrin.h>\n"
8559"///\n"
8560"/// This intrinsic corresponds to the <c> VSQRTPS </c> instruction.\n"
8561"///\n"
8562"/// \\param __a\n"
8563"/// A 256-bit vector of [8 x float].\n"
8564"/// \\returns A 256-bit vector of [8 x float] containing the square roots of the\n"
8565"/// values in the operand.\n"
8566"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8567"_mm256_sqrt_ps(__m256 __a)\n"
8568"{\n"
8569" return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);\n"
8570"}\n"
8571"\n"
8572"/// Calculates the reciprocal square roots of the values in a 256-bit\n"
8573"/// vector of [8 x float].\n"
8574"///\n"
8575"/// \\headerfile <x86intrin.h>\n"
8576"///\n"
8577"/// This intrinsic corresponds to the <c> VRSQRTPS </c> instruction.\n"
8578"///\n"
8579"/// \\param __a\n"
8580"/// A 256-bit vector of [8 x float].\n"
8581"/// \\returns A 256-bit vector of [8 x float] containing the reciprocal square\n"
8582"/// roots of the values in the operand.\n"
8583"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8584"_mm256_rsqrt_ps(__m256 __a)\n"
8585"{\n"
8586" return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);\n"
8587"}\n"
8588"\n"
8589"/// Calculates the reciprocals of the values in a 256-bit vector of\n"
8590"/// [8 x float].\n"
8591"///\n"
8592"/// \\headerfile <x86intrin.h>\n"
8593"///\n"
8594"/// This intrinsic corresponds to the <c> VRCPPS </c> instruction.\n"
8595"///\n"
8596"/// \\param __a\n"
8597"/// A 256-bit vector of [8 x float].\n"
8598"/// \\returns A 256-bit vector of [8 x float] containing the reciprocals of the\n"
8599"/// values in the operand.\n"
8600"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8601"_mm256_rcp_ps(__m256 __a)\n"
8602"{\n"
8603" return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);\n"
8604"}\n"
8605"\n"
8606"/// Rounds the values in a 256-bit vector of [4 x double] as specified\n"
8607"/// by the byte operand. The source values are rounded to integer values and\n"
8608"/// returned as 64-bit double-precision floating-point values.\n"
8609"///\n"
8610"/// \\headerfile <x86intrin.h>\n"
8611"///\n"
8612"/// \\code\n"
8613"/// __m256d _mm256_round_pd(__m256d V, const int M);\n"
8614"/// \\endcode\n"
8615"///\n"
8616"/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n"
8617"///\n"
8618"/// \\param V\n"
8619"/// A 256-bit vector of [4 x double].\n"
8620"/// \\param M\n"
8621"/// An integer value that specifies the rounding operation. \\n\n"
8622"/// Bits [7:4] are reserved. \\n\n"
8623"/// Bit [3] is a precision exception value: \\n\n"
8624"/// 0: A normal PE exception is used. \\n\n"
8625"/// 1: The PE field is not updated. \\n\n"
8626"/// Bit [2] is the rounding control source: \\n\n"
8627"/// 0: Use bits [1:0] of \\a M. \\n\n"
8628"/// 1: Use the current MXCSR setting. \\n\n"
8629"/// Bits [1:0] contain the rounding control definition: \\n\n"
8630"/// 00: Nearest. \\n\n"
8631"/// 01: Downward (toward negative infinity). \\n\n"
8632"/// 10: Upward (toward positive infinity). \\n\n"
8633"/// 11: Truncated.\n"
8634"/// \\returns A 256-bit vector of [4 x double] containing the rounded values.\n"
8635"#define _mm256_round_pd(V, M) \\\n"
8636" (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))\n"
8637"\n"
8638"/// Rounds the values stored in a 256-bit vector of [8 x float] as\n"
8639"/// specified by the byte operand. The source values are rounded to integer\n"
8640"/// values and returned as floating-point values.\n"
8641"///\n"
8642"/// \\headerfile <x86intrin.h>\n"
8643"///\n"
8644"/// \\code\n"
8645"/// __m256 _mm256_round_ps(__m256 V, const int M);\n"
8646"/// \\endcode\n"
8647"///\n"
8648"/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n"
8649"///\n"
8650"/// \\param V\n"
8651"/// A 256-bit vector of [8 x float].\n"
8652"/// \\param M\n"
8653"/// An integer value that specifies the rounding operation. \\n\n"
8654"/// Bits [7:4] are reserved. \\n\n"
8655"/// Bit [3] is a precision exception value: \\n\n"
8656"/// 0: A normal PE exception is used. \\n\n"
8657"/// 1: The PE field is not updated. \\n\n"
8658"/// Bit [2] is the rounding control source: \\n\n"
8659"/// 0: Use bits [1:0] of \\a M. \\n\n"
8660"/// 1: Use the current MXCSR setting. \\n\n"
8661"/// Bits [1:0] contain the rounding control definition: \\n\n"
8662"/// 00: Nearest. \\n\n"
8663"/// 01: Downward (toward negative infinity). \\n\n"
8664"/// 10: Upward (toward positive infinity). \\n\n"
8665"/// 11: Truncated.\n"
8666"/// \\returns A 256-bit vector of [8 x float] containing the rounded values.\n"
8667"#define _mm256_round_ps(V, M) \\\n"
8668" (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))\n"
8669"\n"
8670"/// Rounds up the values stored in a 256-bit vector of [4 x double]. The\n"
8671"/// source values are rounded up to integer values and returned as 64-bit\n"
8672"/// double-precision floating-point values.\n"
8673"///\n"
8674"/// \\headerfile <x86intrin.h>\n"
8675"///\n"
8676"/// \\code\n"
8677"/// __m256d _mm256_ceil_pd(__m256d V);\n"
8678"/// \\endcode\n"
8679"///\n"
8680"/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n"
8681"///\n"
8682"/// \\param V\n"
8683"/// A 256-bit vector of [4 x double].\n"
8684"/// \\returns A 256-bit vector of [4 x double] containing the rounded up values.\n"
8685"#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)\n"
8686"\n"
8687"/// Rounds down the values stored in a 256-bit vector of [4 x double].\n"
8688"/// The source values are rounded down to integer values and returned as\n"
8689"/// 64-bit double-precision floating-point values.\n"
8690"///\n"
8691"/// \\headerfile <x86intrin.h>\n"
8692"///\n"
8693"/// \\code\n"
8694"/// __m256d _mm256_floor_pd(__m256d V);\n"
8695"/// \\endcode\n"
8696"///\n"
8697"/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n"
8698"///\n"
8699"/// \\param V\n"
8700"/// A 256-bit vector of [4 x double].\n"
8701"/// \\returns A 256-bit vector of [4 x double] containing the rounded down\n"
8702"/// values.\n"
8703"#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)\n"
8704"\n"
8705"/// Rounds up the values stored in a 256-bit vector of [8 x float]. The\n"
8706"/// source values are rounded up to integer values and returned as\n"
8707"/// floating-point values.\n"
8708"///\n"
8709"/// \\headerfile <x86intrin.h>\n"
8710"///\n"
8711"/// \\code\n"
8712"/// __m256 _mm256_ceil_ps(__m256 V);\n"
8713"/// \\endcode\n"
8714"///\n"
8715"/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n"
8716"///\n"
8717"/// \\param V\n"
8718"/// A 256-bit vector of [8 x float].\n"
8719"/// \\returns A 256-bit vector of [8 x float] containing the rounded up values.\n"
8720"#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)\n"
8721"\n"
8722"/// Rounds down the values stored in a 256-bit vector of [8 x float]. The\n"
8723"/// source values are rounded down to integer values and returned as\n"
8724"/// floating-point values.\n"
8725"///\n"
8726"/// \\headerfile <x86intrin.h>\n"
8727"///\n"
8728"/// \\code\n"
8729"/// __m256 _mm256_floor_ps(__m256 V);\n"
8730"/// \\endcode\n"
8731"///\n"
8732"/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n"
8733"///\n"
8734"/// \\param V\n"
8735"/// A 256-bit vector of [8 x float].\n"
8736"/// \\returns A 256-bit vector of [8 x float] containing the rounded down values.\n"
8737"#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)\n"
8738"\n"
8739"/* Logical */\n"
8740"/// Performs a bitwise AND of two 256-bit vectors of [4 x double].\n"
8741"///\n"
8742"/// \\headerfile <x86intrin.h>\n"
8743"///\n"
8744"/// This intrinsic corresponds to the <c> VANDPD </c> instruction.\n"
8745"///\n"
8746"/// \\param __a\n"
8747"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8748"/// \\param __b\n"
8749"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8750"/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n"
8751"/// values between both operands.\n"
8752"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8753"_mm256_and_pd(__m256d __a, __m256d __b)\n"
8754"{\n"
8755" return (__m256d)((__v4du)__a & (__v4du)__b);\n"
8756"}\n"
8757"\n"
8758"/// Performs a bitwise AND of two 256-bit vectors of [8 x float].\n"
8759"///\n"
8760"/// \\headerfile <x86intrin.h>\n"
8761"///\n"
8762"/// This intrinsic corresponds to the <c> VANDPS </c> instruction.\n"
8763"///\n"
8764"/// \\param __a\n"
8765"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8766"/// \\param __b\n"
8767"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8768"/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n"
8769"/// values between both operands.\n"
8770"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8771"_mm256_and_ps(__m256 __a, __m256 __b)\n"
8772"{\n"
8773" return (__m256)((__v8su)__a & (__v8su)__b);\n"
8774"}\n"
8775"\n"
8776"/// Performs a bitwise AND of two 256-bit vectors of [4 x double], using\n"
8777"/// the one's complement of the values contained in the first source operand.\n"
8778"///\n"
8779"/// \\headerfile <x86intrin.h>\n"
8780"///\n"
8781"/// This intrinsic corresponds to the <c> VANDNPD </c> instruction.\n"
8782"///\n"
8783"/// \\param __a\n"
8784"/// A 256-bit vector of [4 x double] containing the left source operand. The\n"
8785"/// one's complement of this value is used in the bitwise AND.\n"
8786"/// \\param __b\n"
8787"/// A 256-bit vector of [4 x double] containing the right source operand.\n"
8788"/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n"
8789"/// values of the second operand and the one's complement of the first\n"
8790"/// operand.\n"
8791"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8792"_mm256_andnot_pd(__m256d __a, __m256d __b)\n"
8793"{\n"
8794" return (__m256d)(~(__v4du)__a & (__v4du)__b);\n"
8795"}\n"
8796"\n"
8797"/// Performs a bitwise AND of two 256-bit vectors of [8 x float], using\n"
8798"/// the one's complement of the values contained in the first source operand.\n"
8799"///\n"
8800"/// \\headerfile <x86intrin.h>\n"
8801"///\n"
8802"/// This intrinsic corresponds to the <c> VANDNPS </c> instruction.\n"
8803"///\n"
8804"/// \\param __a\n"
8805"/// A 256-bit vector of [8 x float] containing the left source operand. The\n"
8806"/// one's complement of this value is used in the bitwise AND.\n"
8807"/// \\param __b\n"
8808"/// A 256-bit vector of [8 x float] containing the right source operand.\n"
8809"/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n"
8810"/// values of the second operand and the one's complement of the first\n"
8811"/// operand.\n"
8812"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8813"_mm256_andnot_ps(__m256 __a, __m256 __b)\n"
8814"{\n"
8815" return (__m256)(~(__v8su)__a & (__v8su)__b);\n"
8816"}\n"
8817"\n"
8818"/// Performs a bitwise OR of two 256-bit vectors of [4 x double].\n"
8819"///\n"
8820"/// \\headerfile <x86intrin.h>\n"
8821"///\n"
8822"/// This intrinsic corresponds to the <c> VORPD </c> instruction.\n"
8823"///\n"
8824"/// \\param __a\n"
8825"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8826"/// \\param __b\n"
8827"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8828"/// \\returns A 256-bit vector of [4 x double] containing the bitwise OR of the\n"
8829"/// values between both operands.\n"
8830"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8831"_mm256_or_pd(__m256d __a, __m256d __b)\n"
8832"{\n"
8833" return (__m256d)((__v4du)__a | (__v4du)__b);\n"
8834"}\n"
8835"\n"
8836"/// Performs a bitwise OR of two 256-bit vectors of [8 x float].\n"
8837"///\n"
8838"/// \\headerfile <x86intrin.h>\n"
8839"///\n"
8840"/// This intrinsic corresponds to the <c> VORPS </c> instruction.\n"
8841"///\n"
8842"/// \\param __a\n"
8843"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8844"/// \\param __b\n"
8845"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8846"/// \\returns A 256-bit vector of [8 x float] containing the bitwise OR of the\n"
8847"/// values between both operands.\n"
8848"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8849"_mm256_or_ps(__m256 __a, __m256 __b)\n"
8850"{\n"
8851" return (__m256)((__v8su)__a | (__v8su)__b);\n"
8852"}\n"
8853"\n"
8854"/// Performs a bitwise XOR of two 256-bit vectors of [4 x double].\n"
8855"///\n"
8856"/// \\headerfile <x86intrin.h>\n"
8857"///\n"
8858"/// This intrinsic corresponds to the <c> VXORPD </c> instruction.\n"
8859"///\n"
8860"/// \\param __a\n"
8861"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8862"/// \\param __b\n"
8863"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8864"/// \\returns A 256-bit vector of [4 x double] containing the bitwise XOR of the\n"
8865"/// values between both operands.\n"
8866"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8867"_mm256_xor_pd(__m256d __a, __m256d __b)\n"
8868"{\n"
8869" return (__m256d)((__v4du)__a ^ (__v4du)__b);\n"
8870"}\n"
8871"\n"
8872"/// Performs a bitwise XOR of two 256-bit vectors of [8 x float].\n"
8873"///\n"
8874"/// \\headerfile <x86intrin.h>\n"
8875"///\n"
8876"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
8877"///\n"
8878"/// \\param __a\n"
8879"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8880"/// \\param __b\n"
8881"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8882"/// \\returns A 256-bit vector of [8 x float] containing the bitwise XOR of the\n"
8883"/// values between both operands.\n"
8884"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8885"_mm256_xor_ps(__m256 __a, __m256 __b)\n"
8886"{\n"
8887" return (__m256)((__v8su)__a ^ (__v8su)__b);\n"
8888"}\n"
8889"\n"
8890"/* Horizontal arithmetic */\n"
8891"/// Horizontally adds the adjacent pairs of values contained in two\n"
8892"/// 256-bit vectors of [4 x double].\n"
8893"///\n"
8894"/// \\headerfile <x86intrin.h>\n"
8895"///\n"
8896"/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n"
8897"///\n"
8898"/// \\param __a\n"
8899"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8900"/// The horizontal sums of the values are returned in the even-indexed\n"
8901"/// elements of a vector of [4 x double].\n"
8902"/// \\param __b\n"
8903"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8904"/// The horizontal sums of the values are returned in the odd-indexed\n"
8905"/// elements of a vector of [4 x double].\n"
8906"/// \\returns A 256-bit vector of [4 x double] containing the horizontal sums of\n"
8907"/// both operands.\n"
8908"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8909"_mm256_hadd_pd(__m256d __a, __m256d __b)\n"
8910"{\n"
8911" return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);\n"
8912"}\n"
8913"\n"
8914"/// Horizontally adds the adjacent pairs of values contained in two\n"
8915"/// 256-bit vectors of [8 x float].\n"
8916"///\n"
8917"/// \\headerfile <x86intrin.h>\n"
8918"///\n"
8919"/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n"
8920"///\n"
8921"/// \\param __a\n"
8922"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8923"/// The horizontal sums of the values are returned in the elements with\n"
8924"/// index 0, 1, 4, 5 of a vector of [8 x float].\n"
8925"/// \\param __b\n"
8926"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8927"/// The horizontal sums of the values are returned in the elements with\n"
8928"/// index 2, 3, 6, 7 of a vector of [8 x float].\n"
8929"/// \\returns A 256-bit vector of [8 x float] containing the horizontal sums of\n"
8930"/// both operands.\n"
8931"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8932"_mm256_hadd_ps(__m256 __a, __m256 __b)\n"
8933"{\n"
8934" return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);\n"
8935"}\n"
8936"\n"
8937"/// Horizontally subtracts the adjacent pairs of values contained in two\n"
8938"/// 256-bit vectors of [4 x double].\n"
8939"///\n"
8940"/// \\headerfile <x86intrin.h>\n"
8941"///\n"
8942"/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n"
8943"///\n"
8944"/// \\param __a\n"
8945"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8946"/// The horizontal differences between the values are returned in the\n"
8947"/// even-indexed elements of a vector of [4 x double].\n"
8948"/// \\param __b\n"
8949"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8950"/// The horizontal differences between the values are returned in the\n"
8951"/// odd-indexed elements of a vector of [4 x double].\n"
8952"/// \\returns A 256-bit vector of [4 x double] containing the horizontal\n"
8953"/// differences of both operands.\n"
8954"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8955"_mm256_hsub_pd(__m256d __a, __m256d __b)\n"
8956"{\n"
8957" return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);\n"
8958"}\n"
8959"\n"
8960"/// Horizontally subtracts the adjacent pairs of values contained in two\n"
8961"/// 256-bit vectors of [8 x float].\n"
8962"///\n"
8963"/// \\headerfile <x86intrin.h>\n"
8964"///\n"
8965"/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n"
8966"///\n"
8967"/// \\param __a\n"
8968"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8969"/// The horizontal differences between the values are returned in the\n"
8970"/// elements with index 0, 1, 4, 5 of a vector of [8 x float].\n"
8971"/// \\param __b\n"
8972"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8973"/// The horizontal differences between the values are returned in the\n"
8974"/// elements with index 2, 3, 6, 7 of a vector of [8 x float].\n"
8975"/// \\returns A 256-bit vector of [8 x float] containing the horizontal\n"
8976"/// differences of both operands.\n"
8977"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8978"_mm256_hsub_ps(__m256 __a, __m256 __b)\n"
8979"{\n"
8980" return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);\n"
8981"}\n"
8982"\n"
8983"/* Vector permutations */\n"
8984"/// Copies the values in a 128-bit vector of [2 x double] as specified\n"
8985"/// by the 128-bit integer vector operand.\n"
8986"///\n"
8987"/// \\headerfile <x86intrin.h>\n"
8988"///\n"
8989"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
8990"///\n"
8991"/// \\param __a\n"
8992"/// A 128-bit vector of [2 x double].\n"
8993"/// \\param __c\n"
8994"/// A 128-bit integer vector operand specifying how the values are to be\n"
8995"/// copied. \\n\n"
8996"/// Bit [1]: \\n\n"
8997"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
8998"/// vector. \\n\n"
8999"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9000"/// returned vector. \\n\n"
9001"/// Bit [65]: \\n\n"
9002"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9003"/// returned vector. \\n\n"
9004"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9005"/// returned vector.\n"
9006"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
9007"static __inline __m128d __DEFAULT_FN_ATTRS128\n"
9008"_mm_permutevar_pd(__m128d __a, __m128i __c)\n"
9009"{\n"
9010" return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);\n"
9011"}\n"
9012"\n"
9013"/// Copies the values in a 256-bit vector of [4 x double] as specified\n"
9014"/// by the 256-bit integer vector operand.\n"
9015"///\n"
9016"/// \\headerfile <x86intrin.h>\n"
9017"///\n"
9018"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
9019"///\n"
9020"/// \\param __a\n"
9021"/// A 256-bit vector of [4 x double].\n"
9022"/// \\param __c\n"
9023"/// A 256-bit integer vector operand specifying how the values are to be\n"
9024"/// copied. \\n\n"
9025"/// Bit [1]: \\n\n"
9026"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9027"/// vector. \\n\n"
9028"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9029"/// returned vector. \\n\n"
9030"/// Bit [65]: \\n\n"
9031"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9032"/// returned vector. \\n\n"
9033"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9034"/// returned vector. \\n\n"
9035"/// Bit [129]: \\n\n"
9036"/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n"
9037"/// returned vector. \\n\n"
9038"/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n"
9039"/// returned vector. \\n\n"
9040"/// Bit [193]: \\n\n"
9041"/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n"
9042"/// returned vector. \\n\n"
9043"/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n"
9044"/// returned vector.\n"
9045"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9046"static __inline __m256d __DEFAULT_FN_ATTRS\n"
9047"_mm256_permutevar_pd(__m256d __a, __m256i __c)\n"
9048"{\n"
9049" return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);\n"
9050"}\n"
9051"\n"
9052"/// Copies the values stored in a 128-bit vector of [4 x float] as\n"
9053"/// specified by the 128-bit integer vector operand.\n"
9054"/// \\headerfile <x86intrin.h>\n"
9055"///\n"
9056"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9057"///\n"
9058"/// \\param __a\n"
9059"/// A 128-bit vector of [4 x float].\n"
9060"/// \\param __c\n"
9061"/// A 128-bit integer vector operand specifying how the values are to be\n"
9062"/// copied. \\n\n"
9063"/// Bits [1:0]: \\n\n"
9064"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9065"/// returned vector. \\n\n"
9066"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9067"/// returned vector. \\n\n"
9068"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9069"/// returned vector. \\n\n"
9070"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9071"/// returned vector. \\n\n"
9072"/// Bits [33:32]: \\n\n"
9073"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9074"/// returned vector. \\n\n"
9075"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9076"/// returned vector. \\n\n"
9077"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9078"/// returned vector. \\n\n"
9079"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9080"/// returned vector. \\n\n"
9081"/// Bits [65:64]: \\n\n"
9082"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9083"/// returned vector. \\n\n"
9084"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9085"/// returned vector. \\n\n"
9086"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9087"/// returned vector. \\n\n"
9088"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9089"/// returned vector. \\n\n"
9090"/// Bits [97:96]: \\n\n"
9091"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9092"/// returned vector. \\n\n"
9093"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9094"/// returned vector. \\n\n"
9095"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9096"/// returned vector. \\n\n"
9097"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9098"/// returned vector.\n"
9099"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
9100"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
9101"_mm_permutevar_ps(__m128 __a, __m128i __c)\n"
9102"{\n"
9103" return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);\n"
9104"}\n"
9105"\n"
9106"/// Copies the values stored in a 256-bit vector of [8 x float] as\n"
9107"/// specified by the 256-bit integer vector operand.\n"
9108"///\n"
9109"/// \\headerfile <x86intrin.h>\n"
9110"///\n"
9111"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9112"///\n"
9113"/// \\param __a\n"
9114"/// A 256-bit vector of [8 x float].\n"
9115"/// \\param __c\n"
9116"/// A 256-bit integer vector operand specifying how the values are to be\n"
9117"/// copied. \\n\n"
9118"/// Bits [1:0]: \\n\n"
9119"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9120"/// returned vector. \\n\n"
9121"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9122"/// returned vector. \\n\n"
9123"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9124"/// returned vector. \\n\n"
9125"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9126"/// returned vector. \\n\n"
9127"/// Bits [33:32]: \\n\n"
9128"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9129"/// returned vector. \\n\n"
9130"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9131"/// returned vector. \\n\n"
9132"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9133"/// returned vector. \\n\n"
9134"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9135"/// returned vector. \\n\n"
9136"/// Bits [65:64]: \\n\n"
9137"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9138"/// returned vector. \\n\n"
9139"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9140"/// returned vector. \\n\n"
9141"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9142"/// returned vector. \\n\n"
9143"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9144"/// returned vector. \\n\n"
9145"/// Bits [97:96]: \\n\n"
9146"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9147"/// returned vector. \\n\n"
9148"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9149"/// returned vector. \\n\n"
9150"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9151"/// returned vector. \\n\n"
9152"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9153"/// returned vector. \\n\n"
9154"/// Bits [129:128]: \\n\n"
9155"/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n"
9156"/// returned vector. \\n\n"
9157"/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n"
9158"/// returned vector. \\n\n"
9159"/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n"
9160"/// returned vector. \\n\n"
9161"/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n"
9162"/// returned vector. \\n\n"
9163"/// Bits [161:160]: \\n\n"
9164"/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n"
9165"/// returned vector. \\n\n"
9166"/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n"
9167"/// returned vector. \\n\n"
9168"/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n"
9169"/// returned vector. \\n\n"
9170"/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n"
9171"/// returned vector. \\n\n"
9172"/// Bits [193:192]: \\n\n"
9173"/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n"
9174"/// returned vector. \\n\n"
9175"/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n"
9176"/// returned vector. \\n\n"
9177"/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n"
9178"/// returned vector. \\n\n"
9179"/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n"
9180"/// returned vector. \\n\n"
9181"/// Bits [225:224]: \\n\n"
9182"/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n"
9183"/// returned vector. \\n\n"
9184"/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n"
9185"/// returned vector. \\n\n"
9186"/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n"
9187"/// returned vector. \\n\n"
9188"/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n"
9189"/// returned vector.\n"
9190"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9191"static __inline __m256 __DEFAULT_FN_ATTRS\n"
9192"_mm256_permutevar_ps(__m256 __a, __m256i __c)\n"
9193"{\n"
9194" return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);\n"
9195"}\n"
9196"\n"
9197"/// Copies the values in a 128-bit vector of [2 x double] as specified\n"
9198"/// by the immediate integer operand.\n"
9199"///\n"
9200"/// \\headerfile <x86intrin.h>\n"
9201"///\n"
9202"/// \\code\n"
9203"/// __m128d _mm_permute_pd(__m128d A, const int C);\n"
9204"/// \\endcode\n"
9205"///\n"
9206"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
9207"///\n"
9208"/// \\param A\n"
9209"/// A 128-bit vector of [2 x double].\n"
9210"/// \\param C\n"
9211"/// An immediate integer operand specifying how the values are to be\n"
9212"/// copied. \\n\n"
9213"/// Bit [0]: \\n\n"
9214"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9215"/// vector. \\n\n"
9216"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9217"/// returned vector. \\n\n"
9218"/// Bit [1]: \\n\n"
9219"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9220"/// returned vector. \\n\n"
9221"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9222"/// returned vector.\n"
9223"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
9224"#define _mm_permute_pd(A, C) \\\n"
9225" (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))\n"
9226"\n"
9227"/// Copies the values in a 256-bit vector of [4 x double] as specified by\n"
9228"/// the immediate integer operand.\n"
9229"///\n"
9230"/// \\headerfile <x86intrin.h>\n"
9231"///\n"
9232"/// \\code\n"
9233"/// __m256d _mm256_permute_pd(__m256d A, const int C);\n"
9234"/// \\endcode\n"
9235"///\n"
9236"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
9237"///\n"
9238"/// \\param A\n"
9239"/// A 256-bit vector of [4 x double].\n"
9240"/// \\param C\n"
9241"/// An immediate integer operand specifying how the values are to be\n"
9242"/// copied. \\n\n"
9243"/// Bit [0]: \\n\n"
9244"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9245"/// vector. \\n\n"
9246"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9247"/// returned vector. \\n\n"
9248"/// Bit [1]: \\n\n"
9249"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9250"/// returned vector. \\n\n"
9251"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9252"/// returned vector. \\n\n"
9253"/// Bit [2]: \\n\n"
9254"/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n"
9255"/// returned vector. \\n\n"
9256"/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n"
9257"/// returned vector. \\n\n"
9258"/// Bit [3]: \\n\n"
9259"/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n"
9260"/// returned vector. \\n\n"
9261"/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n"
9262"/// returned vector.\n"
9263"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9264"#define _mm256_permute_pd(A, C) \\\n"
9265" (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))\n"
9266"\n"
9267"/// Copies the values in a 128-bit vector of [4 x float] as specified by\n"
9268"/// the immediate integer operand.\n"
9269"///\n"
9270"/// \\headerfile <x86intrin.h>\n"
9271"///\n"
9272"/// \\code\n"
9273"/// __m128 _mm_permute_ps(__m128 A, const int C);\n"
9274"/// \\endcode\n"
9275"///\n"
9276"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9277"///\n"
9278"/// \\param A\n"
9279"/// A 128-bit vector of [4 x float].\n"
9280"/// \\param C\n"
9281"/// An immediate integer operand specifying how the values are to be\n"
9282"/// copied. \\n\n"
9283"/// Bits [1:0]: \\n\n"
9284"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9285"/// returned vector. \\n\n"
9286"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9287"/// returned vector. \\n\n"
9288"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9289"/// returned vector. \\n\n"
9290"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9291"/// returned vector. \\n\n"
9292"/// Bits [3:2]: \\n\n"
9293"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9294"/// returned vector. \\n\n"
9295"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9296"/// returned vector. \\n\n"
9297"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9298"/// returned vector. \\n\n"
9299"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9300"/// returned vector. \\n\n"
9301"/// Bits [5:4]: \\n\n"
9302"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9303"/// returned vector. \\n\n"
9304"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9305"/// returned vector. \\n\n"
9306"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9307"/// returned vector. \\n\n"
9308"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9309"/// returned vector. \\n\n"
9310"/// Bits [7:6]: \\n\n"
9311"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9312"/// returned vector. \\n\n"
9313"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9314"/// returned vector. \\n\n"
9315"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9316"/// returned vector. \\n\n"
9317"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9318"/// returned vector.\n"
9319"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
9320"#define _mm_permute_ps(A, C) \\\n"
9321" (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))\n"
9322"\n"
9323"/// Copies the values in a 256-bit vector of [8 x float] as specified by\n"
9324"/// the immediate integer operand.\n"
9325"///\n"
9326"/// \\headerfile <x86intrin.h>\n"
9327"///\n"
9328"/// \\code\n"
9329"/// __m256 _mm256_permute_ps(__m256 A, const int C);\n"
9330"/// \\endcode\n"
9331"///\n"
9332"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9333"///\n"
9334"/// \\param A\n"
9335"/// A 256-bit vector of [8 x float].\n"
9336"/// \\param C\n"
9337"/// An immediate integer operand specifying how the values are to be\n"
9338"/// copied. \\n\n"
9339"/// Bits [1:0]: \\n\n"
9340"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9341"/// returned vector. \\n\n"
9342"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9343"/// returned vector. \\n\n"
9344"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9345"/// returned vector. \\n\n"
9346"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9347"/// returned vector. \\n\n"
9348"/// Bits [3:2]: \\n\n"
9349"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9350"/// returned vector. \\n\n"
9351"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9352"/// returned vector. \\n\n"
9353"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9354"/// returned vector. \\n\n"
9355"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9356"/// returned vector. \\n\n"
9357"/// Bits [5:4]: \\n\n"
9358"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9359"/// returned vector. \\n\n"
9360"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9361"/// returned vector. \\n\n"
9362"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9363"/// returned vector. \\n\n"
9364"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9365"/// returned vector. \\n\n"
9366"/// Bits [7:6]: \\n\n"
9367"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9368"/// returned vector. \\n\n"
9369"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9370"/// returned vector. \\n\n"
9371"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9372"/// returned vector. \\n\n"
9373"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9374"/// returned vector. \\n\n"
9375"/// Bits [1:0]: \\n\n"
9376"/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n"
9377"/// returned vector. \\n\n"
9378"/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n"
9379"/// returned vector. \\n\n"
9380"/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n"
9381"/// returned vector. \\n\n"
9382"/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n"
9383"/// returned vector. \\n\n"
9384"/// Bits [3:2]: \\n\n"
9385"/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n"
9386"/// returned vector. \\n\n"
9387"/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n"
9388"/// returned vector. \\n\n"
9389"/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n"
9390"/// returned vector. \\n\n"
9391"/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n"
9392"/// returned vector. \\n\n"
9393"/// Bits [5:4]: \\n\n"
9394"/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n"
9395"/// returned vector. \\n\n"
9396"/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n"
9397"/// returned vector. \\n\n"
9398"/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n"
9399"/// returned vector. \\n\n"
9400"/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n"
9401"/// returned vector. \\n\n"
9402"/// Bits [7:6]: \\n\n"
9403"/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n"
9404"/// returned vector. \\n\n"
9405"/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n"
9406"/// returned vector. \\n\n"
9407"/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n"
9408"/// returned vector. \\n\n"
9409"/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n"
9410"/// returned vector.\n"
9411"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9412"#define _mm256_permute_ps(A, C) \\\n"
9413" (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))\n"
9414"\n"
9415"/// Permutes 128-bit data values stored in two 256-bit vectors of\n"
9416"/// [4 x double], as specified by the immediate integer operand.\n"
9417"///\n"
9418"/// \\headerfile <x86intrin.h>\n"
9419"///\n"
9420"/// \\code\n"
9421"/// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);\n"
9422"/// \\endcode\n"
9423"///\n"
9424"/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n"
9425"///\n"
9426"/// \\param V1\n"
9427"/// A 256-bit vector of [4 x double].\n"
9428"/// \\param V2\n"
9429"/// A 256-bit vector of [4 x double.\n"
9430"/// \\param M\n"
9431"/// An immediate integer operand specifying how the values are to be\n"
9432"/// permuted. \\n\n"
9433"/// Bits [1:0]: \\n\n"
9434"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n"
9435"/// destination. \\n\n"
9436"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n"
9437"/// destination. \\n\n"
9438"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n"
9439"/// destination. \\n\n"
9440"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n"
9441"/// destination. \\n\n"
9442"/// Bits [5:4]: \\n\n"
9443"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n"
9444"/// destination. \\n\n"
9445"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n"
9446"/// destination. \\n\n"
9447"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n"
9448"/// destination. \\n\n"
9449"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n"
9450"/// destination.\n"
9451"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9452"#define _mm256_permute2f128_pd(V1, V2, M) \\\n"
9453" (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \\\n"
9454" (__v4df)(__m256d)(V2), (int)(M))\n"
9455"\n"
9456"/// Permutes 128-bit data values stored in two 256-bit vectors of\n"
9457"/// [8 x float], as specified by the immediate integer operand.\n"
9458"///\n"
9459"/// \\headerfile <x86intrin.h>\n"
9460"///\n"
9461"/// \\code\n"
9462"/// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);\n"
9463"/// \\endcode\n"
9464"///\n"
9465"/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n"
9466"///\n"
9467"/// \\param V1\n"
9468"/// A 256-bit vector of [8 x float].\n"
9469"/// \\param V2\n"
9470"/// A 256-bit vector of [8 x float].\n"
9471"/// \\param M\n"
9472"/// An immediate integer operand specifying how the values are to be\n"
9473"/// permuted. \\n\n"
9474"/// Bits [1:0]: \\n\n"
9475"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n"
9476"/// destination. \\n\n"
9477"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n"
9478"/// destination. \\n\n"
9479"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n"
9480"/// destination. \\n\n"
9481"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n"
9482"/// destination. \\n\n"
9483"/// Bits [5:4]: \\n\n"
9484"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n"
9485"/// destination. \\n\n"
9486"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n"
9487"/// destination. \\n\n"
9488"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n"
9489"/// destination. \\n\n"
9490"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n"
9491"/// destination.\n"
9492"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9493"#define _mm256_permute2f128_ps(V1, V2, M) \\\n"
9494" (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \\\n"
9495" (__v8sf)(__m256)(V2), (int)(M))\n"
9496"\n"
9497"/// Permutes 128-bit data values stored in two 256-bit integer vectors,\n"
9498"/// as specified by the immediate integer operand.\n"
9499"///\n"
9500"/// \\headerfile <x86intrin.h>\n"
9501"///\n"
9502"/// \\code\n"
9503"/// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);\n"
9504"/// \\endcode\n"
9505"///\n"
9506"/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n"
9507"///\n"
9508"/// \\param V1\n"
9509"/// A 256-bit integer vector.\n"
9510"/// \\param V2\n"
9511"/// A 256-bit integer vector.\n"
9512"/// \\param M\n"
9513"/// An immediate integer operand specifying how the values are to be copied.\n"
9514"/// Bits [1:0]: \\n\n"
9515"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n"
9516"/// destination. \\n\n"
9517"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n"
9518"/// destination. \\n\n"
9519"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n"
9520"/// destination. \\n\n"
9521"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n"
9522"/// destination. \\n\n"
9523"/// Bits [5:4]: \\n\n"
9524"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n"
9525"/// destination. \\n\n"
9526"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n"
9527"/// destination. \\n\n"
9528"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n"
9529"/// destination. \\n\n"
9530"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n"
9531"/// destination.\n"
9532"/// \\returns A 256-bit integer vector containing the copied values.\n"
9533"#define _mm256_permute2f128_si256(V1, V2, M) \\\n"
9534" (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \\\n"
9535" (__v8si)(__m256i)(V2), (int)(M))\n"
9536"\n"
9537"/* Vector Blend */\n"
9538"/// Merges 64-bit double-precision data values stored in either of the\n"
9539"/// two 256-bit vectors of [4 x double], as specified by the immediate\n"
9540"/// integer operand.\n"
9541"///\n"
9542"/// \\headerfile <x86intrin.h>\n"
9543"///\n"
9544"/// \\code\n"
9545"/// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);\n"
9546"/// \\endcode\n"
9547"///\n"
9548"/// This intrinsic corresponds to the <c> VBLENDPD </c> instruction.\n"
9549"///\n"
9550"/// \\param V1\n"
9551"/// A 256-bit vector of [4 x double].\n"
9552"/// \\param V2\n"
9553"/// A 256-bit vector of [4 x double].\n"
9554"/// \\param M\n"
9555"/// An immediate integer operand, with mask bits [3:0] specifying how the\n"
9556"/// values are to be copied. The position of the mask bit corresponds to the\n"
9557"/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n"
9558"/// element in operand \\a V1 is copied to the same position in the\n"
9559"/// destination. When a mask bit is 1, the corresponding 64-bit element in\n"
9560"/// operand \\a V2 is copied to the same position in the destination.\n"
9561"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9562"#define _mm256_blend_pd(V1, V2, M) \\\n"
9563" (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \\\n"
9564" (__v4df)(__m256d)(V2), (int)(M))\n"
9565"\n"
9566"/// Merges 32-bit single-precision data values stored in either of the\n"
9567"/// two 256-bit vectors of [8 x float], as specified by the immediate\n"
9568"/// integer operand.\n"
9569"///\n"
9570"/// \\headerfile <x86intrin.h>\n"
9571"///\n"
9572"/// \\code\n"
9573"/// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);\n"
9574"/// \\endcode\n"
9575"///\n"
9576"/// This intrinsic corresponds to the <c> VBLENDPS </c> instruction.\n"
9577"///\n"
9578"/// \\param V1\n"
9579"/// A 256-bit vector of [8 x float].\n"
9580"/// \\param V2\n"
9581"/// A 256-bit vector of [8 x float].\n"
9582"/// \\param M\n"
9583"/// An immediate integer operand, with mask bits [7:0] specifying how the\n"
9584"/// values are to be copied. The position of the mask bit corresponds to the\n"
9585"/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n"
9586"/// element in operand \\a V1 is copied to the same position in the\n"
9587"/// destination. When a mask bit is 1, the corresponding 32-bit element in\n"
9588"/// operand \\a V2 is copied to the same position in the destination.\n"
9589"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9590"#define _mm256_blend_ps(V1, V2, M) \\\n"
9591" (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \\\n"
9592" (__v8sf)(__m256)(V2), (int)(M))\n"
9593"\n"
9594"/// Merges 64-bit double-precision data values stored in either of the\n"
9595"/// two 256-bit vectors of [4 x double], as specified by the 256-bit vector\n"
9596"/// operand.\n"
9597"///\n"
9598"/// \\headerfile <x86intrin.h>\n"
9599"///\n"
9600"/// This intrinsic corresponds to the <c> VBLENDVPD </c> instruction.\n"
9601"///\n"
9602"/// \\param __a\n"
9603"/// A 256-bit vector of [4 x double].\n"
9604"/// \\param __b\n"
9605"/// A 256-bit vector of [4 x double].\n"
9606"/// \\param __c\n"
9607"/// A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying\n"
9608"/// how the values are to be copied. The position of the mask bit corresponds\n"
9609"/// to the most significant bit of a copied value. When a mask bit is 0, the\n"
9610"/// corresponding 64-bit element in operand \\a __a is copied to the same\n"
9611"/// position in the destination. When a mask bit is 1, the corresponding\n"
9612"/// 64-bit element in operand \\a __b is copied to the same position in the\n"
9613"/// destination.\n"
9614"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9615"static __inline __m256d __DEFAULT_FN_ATTRS\n"
9616"_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)\n"
9617"{\n"
9618" return (__m256d)__builtin_ia32_blendvpd256(\n"
9619" (__v4df)__a, (__v4df)__b, (__v4df)__c);\n"
9620"}\n"
9621"\n"
9622"/// Merges 32-bit single-precision data values stored in either of the\n"
9623"/// two 256-bit vectors of [8 x float], as specified by the 256-bit vector\n"
9624"/// operand.\n"
9625"///\n"
9626"/// \\headerfile <x86intrin.h>\n"
9627"///\n"
9628"/// This intrinsic corresponds to the <c> VBLENDVPS </c> instruction.\n"
9629"///\n"
9630"/// \\param __a\n"
9631"/// A 256-bit vector of [8 x float].\n"
9632"/// \\param __b\n"
9633"/// A 256-bit vector of [8 x float].\n"
9634"/// \\param __c\n"
9635"/// A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63,\n"
9636"/// and 31 specifying how the values are to be copied. The position of the\n"
9637"/// mask bit corresponds to the most significant bit of a copied value. When\n"
9638"/// a mask bit is 0, the corresponding 32-bit element in operand \\a __a is\n"
9639"/// copied to the same position in the destination. When a mask bit is 1, the\n"
9640"/// corresponding 32-bit element in operand \\a __b is copied to the same\n"
9641"/// position in the destination.\n"
9642"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9643"static __inline __m256 __DEFAULT_FN_ATTRS\n"
9644"_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)\n"
9645"{\n"
9646" return (__m256)__builtin_ia32_blendvps256(\n"
9647" (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);\n"
9648"}\n"
9649"\n"
9650"/* Vector Dot Product */\n"
9651"/// Computes two dot products in parallel, using the lower and upper\n"
9652"/// halves of two [8 x float] vectors as input to the two computations, and\n"
9653"/// returning the two dot products in the lower and upper halves of the\n"
9654"/// [8 x float] result.\n"
9655"///\n"
9656"/// The immediate integer operand controls which input elements will\n"
9657"/// contribute to the dot product, and where the final results are returned.\n"
9658"/// In general, for each dot product, the four corresponding elements of the\n"
9659"/// input vectors are multiplied; the first two and second two products are\n"
9660"/// summed, then the two sums are added to form the final result.\n"
9661"///\n"
9662"/// \\headerfile <x86intrin.h>\n"
9663"///\n"
9664"/// \\code\n"
9665"/// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);\n"
9666"/// \\endcode\n"
9667"///\n"
9668"/// This intrinsic corresponds to the <c> VDPPS </c> instruction.\n"
9669"///\n"
9670"/// \\param V1\n"
9671"/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n"
9672"/// \\param V2\n"
9673"/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n"
9674"/// \\param M\n"
9675"/// An immediate integer argument. Bits [7:4] determine which elements of\n"
9676"/// the input vectors are used, with bit [4] corresponding to the lowest\n"
9677"/// element and bit [7] corresponding to the highest element of each [4 x\n"
9678"/// float] subvector. If a bit is set, the corresponding elements from the\n"
9679"/// two input vectors are used as an input for dot product; otherwise that\n"
9680"/// input is treated as zero. Bits [3:0] determine which elements of the\n"
9681"/// result will receive a copy of the final dot product, with bit [0]\n"
9682"/// corresponding to the lowest element and bit [3] corresponding to the\n"
9683"/// highest element of each [4 x float] subvector. If a bit is set, the dot\n"
9684"/// product is returned in the corresponding element; otherwise that element\n"
9685"/// is set to zero. The bitmask is applied in the same way to each of the\n"
9686"/// two parallel dot product computations.\n"
9687"/// \\returns A 256-bit vector of [8 x float] containing the two dot products.\n"
9688"#define _mm256_dp_ps(V1, V2, M) \\\n"
9689" (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \\\n"
9690" (__v8sf)(__m256)(V2), (M))\n"
9691"\n"
9692"/* Vector shuffle */\n"
9693"/// Selects 8 float values from the 256-bit operands of [8 x float], as\n"
9694"/// specified by the immediate value operand.\n"
9695"///\n"
9696"/// The four selected elements in each operand are copied to the destination\n"
9697"/// according to the bits specified in the immediate operand. The selected\n"
9698"/// elements from the first 256-bit operand are copied to bits [63:0] and\n"
9699"/// bits [191:128] of the destination, and the selected elements from the\n"
9700"/// second 256-bit operand are copied to bits [127:64] and bits [255:192] of\n"
9701"/// the destination. For example, if bits [7:0] of the immediate operand\n"
9702"/// contain a value of 0xFF, the 256-bit destination vector would contain the\n"
9703"/// following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].\n"
9704"///\n"
9705"/// \\headerfile <x86intrin.h>\n"
9706"///\n"
9707"/// \\code\n"
9708"/// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);\n"
9709"/// \\endcode\n"
9710"///\n"
9711"/// This intrinsic corresponds to the <c> VSHUFPS </c> instruction.\n"
9712"///\n"
9713"/// \\param a\n"
9714"/// A 256-bit vector of [8 x float]. The four selected elements in this\n"
9715"/// operand are copied to bits [63:0] and bits [191:128] in the destination,\n"
9716"/// according to the bits specified in the immediate operand.\n"
9717"/// \\param b\n"
9718"/// A 256-bit vector of [8 x float]. The four selected elements in this\n"
9719"/// operand are copied to bits [127:64] and bits [255:192] in the\n"
9720"/// destination, according to the bits specified in the immediate operand.\n"
9721"/// \\param mask\n"
9722"/// An immediate value containing an 8-bit value specifying which elements to\n"
9723"/// copy from \\a a and \\a b \\n.\n"
9724"/// Bits [3:0] specify the values copied from operand \\a a. \\n\n"
9725"/// Bits [7:4] specify the values copied from operand \\a b. \\n\n"
9726"/// The destinations within the 256-bit destination are assigned values as\n"
9727"/// follows, according to the bit value assignments described below: \\n\n"
9728"/// Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the\n"
9729"/// destination. \\n\n"
9730"/// Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the\n"
9731"/// destination. \\n\n"
9732"/// Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the\n"
9733"/// destination. \\n\n"
9734"/// Bits [7:6] are used to assign values to bits [127:96] and [255:224] in\n"
9735"/// the destination. \\n\n"
9736"/// Bit value assignments: \\n\n"
9737"/// 00: Bits [31:0] and [159:128] are copied from the selected operand. \\n\n"
9738"/// 01: Bits [63:32] and [191:160] are copied from the selected operand. \\n\n"
9739"/// 10: Bits [95:64] and [223:192] are copied from the selected operand. \\n\n"
9740"/// 11: Bits [127:96] and [255:224] are copied from the selected operand.\n"
9741"/// \\returns A 256-bit vector of [8 x float] containing the shuffled values.\n"
9742"#define _mm256_shuffle_ps(a, b, mask) \\\n"
9743" (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \\\n"
9744" (__v8sf)(__m256)(b), (int)(mask))\n"
9745"\n"
9746"/// Selects four double-precision values from the 256-bit operands of\n"
9747"/// [4 x double], as specified by the immediate value operand.\n"
9748"///\n"
9749"/// The selected elements from the first 256-bit operand are copied to bits\n"
9750"/// [63:0] and bits [191:128] in the destination, and the selected elements\n"
9751"/// from the second 256-bit operand are copied to bits [127:64] and bits\n"
9752"/// [255:192] in the destination. For example, if bits [3:0] of the immediate\n"
9753"/// operand contain a value of 0xF, the 256-bit destination vector would\n"
9754"/// contain the following values: b[3], a[3], b[1], a[1].\n"
9755"///\n"
9756"/// \\headerfile <x86intrin.h>\n"
9757"///\n"
9758"/// \\code\n"
9759"/// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);\n"
9760"/// \\endcode\n"
9761"///\n"
9762"/// This intrinsic corresponds to the <c> VSHUFPD </c> instruction.\n"
9763"///\n"
9764"/// \\param a\n"
9765"/// A 256-bit vector of [4 x double].\n"
9766"/// \\param b\n"
9767"/// A 256-bit vector of [4 x double].\n"
9768"/// \\param mask\n"
9769"/// An immediate value containing 8-bit values specifying which elements to\n"
9770"/// copy from \\a a and \\a b: \\n\n"
9771"/// Bit [0]=0: Bits [63:0] are copied from \\a a to bits [63:0] of the\n"
9772"/// destination. \\n\n"
9773"/// Bit [0]=1: Bits [127:64] are copied from \\a a to bits [63:0] of the\n"
9774"/// destination. \\n\n"
9775"/// Bit [1]=0: Bits [63:0] are copied from \\a b to bits [127:64] of the\n"
9776"/// destination. \\n\n"
9777"/// Bit [1]=1: Bits [127:64] are copied from \\a b to bits [127:64] of the\n"
9778"/// destination. \\n\n"
9779"/// Bit [2]=0: Bits [191:128] are copied from \\a a to bits [191:128] of the\n"
9780"/// destination. \\n\n"
9781"/// Bit [2]=1: Bits [255:192] are copied from \\a a to bits [191:128] of the\n"
9782"/// destination. \\n\n"
9783"/// Bit [3]=0: Bits [191:128] are copied from \\a b to bits [255:192] of the\n"
9784"/// destination. \\n\n"
9785"/// Bit [3]=1: Bits [255:192] are copied from \\a b to bits [255:192] of the\n"
9786"/// destination.\n"
9787"/// \\returns A 256-bit vector of [4 x double] containing the shuffled values.\n"
9788"#define _mm256_shuffle_pd(a, b, mask) \\\n"
9789" (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \\\n"
9790" (__v4df)(__m256d)(b), (int)(mask))\n"
9791"\n"
9792"/* Compare */\n"
9793"#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */\n"
9794"#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */\n"
9795"#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */\n"
9796"#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */\n"
9797"#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */\n"
9798"#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */\n"
9799"#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */\n"
9800"#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */\n"
9801"#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */\n"
9802"#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */\n"
9803"#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */\n"
9804"#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */\n"
9805"#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */\n"
9806"#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */\n"
9807"#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */\n"
9808"#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */\n"
9809"#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */\n"
9810"#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */\n"
9811"#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */\n"
9812"#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */\n"
9813"#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */\n"
9814"#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */\n"
9815"#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */\n"
9816"#define _CMP_ORD_S 0x17 /* Ordered (signaling) */\n"
9817"#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */\n"
9818"#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */\n"
9819"#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */\n"
9820"#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */\n"
9821"#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */\n"
9822"#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */\n"
9823"#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */\n"
9824"#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */\n"
9825"\n"
9826"/// Compares each of the corresponding double-precision values of two\n"
9827"/// 128-bit vectors of [2 x double], using the operation specified by the\n"
9828"/// immediate integer operand.\n"
9829"///\n"
9830"/// Returns a [2 x double] vector consisting of two doubles corresponding to\n"
9831"/// the two comparison results: zero if the comparison is false, and all 1's\n"
9832"/// if the comparison is true.\n"
9833"///\n"
9834"/// \\headerfile <x86intrin.h>\n"
9835"///\n"
9836"/// \\code\n"
9837"/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);\n"
9838"/// \\endcode\n"
9839"///\n"
9840"/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n"
9841"///\n"
9842"/// \\param a\n"
9843"/// A 128-bit vector of [2 x double].\n"
9844"/// \\param b\n"
9845"/// A 128-bit vector of [2 x double].\n"
9846"/// \\param c\n"
9847"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
9848"/// operation to use: \\n\n"
9849"/// 0x00: Equal (ordered, non-signaling) \\n\n"
9850"/// 0x01: Less-than (ordered, signaling) \\n\n"
9851"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
9852"/// 0x03: Unordered (non-signaling) \\n\n"
9853"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
9854"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
9855"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
9856"/// 0x07: Ordered (non-signaling) \\n\n"
9857"/// 0x08: Equal (unordered, non-signaling) \\n\n"
9858"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
9859"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
9860"/// 0x0B: False (ordered, non-signaling) \\n\n"
9861"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
9862"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
9863"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
9864"/// 0x0F: True (unordered, non-signaling) \\n\n"
9865"/// 0x10: Equal (ordered, signaling) \\n\n"
9866"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
9867"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
9868"/// 0x13: Unordered (signaling) \\n\n"
9869"/// 0x14: Not-equal (unordered, signaling) \\n\n"
9870"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
9871"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
9872"/// 0x17: Ordered (signaling) \\n\n"
9873"/// 0x18: Equal (unordered, signaling) \\n\n"
9874"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
9875"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
9876"/// 0x1B: False (ordered, signaling) \\n\n"
9877"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
9878"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
9879"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
9880"/// 0x1F: True (unordered, signaling)\n"
9881"/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n"
9882"#define _mm_cmp_pd(a, b, c) \\\n"
9883" (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \\\n"
9884" (__v2df)(__m128d)(b), (c))\n"
9885"\n"
9886"/// Compares each of the corresponding values of two 128-bit vectors of\n"
9887"/// [4 x float], using the operation specified by the immediate integer\n"
9888"/// operand.\n"
9889"///\n"
9890"/// Returns a [4 x float] vector consisting of four floats corresponding to\n"
9891"/// the four comparison results: zero if the comparison is false, and all 1's\n"
9892"/// if the comparison is true.\n"
9893"///\n"
9894"/// \\headerfile <x86intrin.h>\n"
9895"///\n"
9896"/// \\code\n"
9897"/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);\n"
9898"/// \\endcode\n"
9899"///\n"
9900"/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n"
9901"///\n"
9902"/// \\param a\n"
9903"/// A 128-bit vector of [4 x float].\n"
9904"/// \\param b\n"
9905"/// A 128-bit vector of [4 x float].\n"
9906"/// \\param c\n"
9907"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
9908"/// operation to use: \\n\n"
9909"/// 0x00: Equal (ordered, non-signaling) \\n\n"
9910"/// 0x01: Less-than (ordered, signaling) \\n\n"
9911"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
9912"/// 0x03: Unordered (non-signaling) \\n\n"
9913"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
9914"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
9915"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
9916"/// 0x07: Ordered (non-signaling) \\n\n"
9917"/// 0x08: Equal (unordered, non-signaling) \\n\n"
9918"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
9919"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
9920"/// 0x0B: False (ordered, non-signaling) \\n\n"
9921"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
9922"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
9923"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
9924"/// 0x0F: True (unordered, non-signaling) \\n\n"
9925"/// 0x10: Equal (ordered, signaling) \\n\n"
9926"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
9927"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
9928"/// 0x13: Unordered (signaling) \\n\n"
9929"/// 0x14: Not-equal (unordered, signaling) \\n\n"
9930"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
9931"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
9932"/// 0x17: Ordered (signaling) \\n\n"
9933"/// 0x18: Equal (unordered, signaling) \\n\n"
9934"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
9935"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
9936"/// 0x1B: False (ordered, signaling) \\n\n"
9937"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
9938"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
9939"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
9940"/// 0x1F: True (unordered, signaling)\n"
9941"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
9942"#define _mm_cmp_ps(a, b, c) \\\n"
9943" (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \\\n"
9944" (__v4sf)(__m128)(b), (c))\n"
9945"\n"
9946"/// Compares each of the corresponding double-precision values of two\n"
9947"/// 256-bit vectors of [4 x double], using the operation specified by the\n"
9948"/// immediate integer operand.\n"
9949"///\n"
9950"/// Returns a [4 x double] vector consisting of four doubles corresponding to\n"
9951"/// the four comparison results: zero if the comparison is false, and all 1's\n"
9952"/// if the comparison is true.\n"
9953"///\n"
9954"/// \\headerfile <x86intrin.h>\n"
9955"///\n"
9956"/// \\code\n"
9957"/// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);\n"
9958"/// \\endcode\n"
9959"///\n"
9960"/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n"
9961"///\n"
9962"/// \\param a\n"
9963"/// A 256-bit vector of [4 x double].\n"
9964"/// \\param b\n"
9965"/// A 256-bit vector of [4 x double].\n"
9966"/// \\param c\n"
9967"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
9968"/// operation to use: \\n\n"
9969"/// 0x00: Equal (ordered, non-signaling) \\n\n"
9970"/// 0x01: Less-than (ordered, signaling) \\n\n"
9971"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
9972"/// 0x03: Unordered (non-signaling) \\n\n"
9973"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
9974"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
9975"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
9976"/// 0x07: Ordered (non-signaling) \\n\n"
9977"/// 0x08: Equal (unordered, non-signaling) \\n\n"
9978"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
9979"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
9980"/// 0x0B: False (ordered, non-signaling) \\n\n"
9981"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
9982"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
9983"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
9984"/// 0x0F: True (unordered, non-signaling) \\n\n"
9985"/// 0x10: Equal (ordered, signaling) \\n\n"
9986"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
9987"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
9988"/// 0x13: Unordered (signaling) \\n\n"
9989"/// 0x14: Not-equal (unordered, signaling) \\n\n"
9990"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
9991"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
9992"/// 0x17: Ordered (signaling) \\n\n"
9993"/// 0x18: Equal (unordered, signaling) \\n\n"
9994"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
9995"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
9996"/// 0x1B: False (ordered, signaling) \\n\n"
9997"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
9998"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
9999"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10000"/// 0x1F: True (unordered, signaling)\n"
10001"/// \\returns A 256-bit vector of [4 x double] containing the comparison results.\n"
10002"#define _mm256_cmp_pd(a, b, c) \\\n"
10003" (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \\\n"
10004" (__v4df)(__m256d)(b), (c))\n"
10005"\n"
10006"/// Compares each of the corresponding values of two 256-bit vectors of\n"
10007"/// [8 x float], using the operation specified by the immediate integer\n"
10008"/// operand.\n"
10009"///\n"
10010"/// Returns a [8 x float] vector consisting of eight floats corresponding to\n"
10011"/// the eight comparison results: zero if the comparison is false, and all\n"
10012"/// 1's if the comparison is true.\n"
10013"///\n"
10014"/// \\headerfile <x86intrin.h>\n"
10015"///\n"
10016"/// \\code\n"
10017"/// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);\n"
10018"/// \\endcode\n"
10019"///\n"
10020"/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n"
10021"///\n"
10022"/// \\param a\n"
10023"/// A 256-bit vector of [8 x float].\n"
10024"/// \\param b\n"
10025"/// A 256-bit vector of [8 x float].\n"
10026"/// \\param c\n"
10027"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
10028"/// operation to use: \\n\n"
10029"/// 0x00: Equal (ordered, non-signaling) \\n\n"
10030"/// 0x01: Less-than (ordered, signaling) \\n\n"
10031"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
10032"/// 0x03: Unordered (non-signaling) \\n\n"
10033"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
10034"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
10035"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
10036"/// 0x07: Ordered (non-signaling) \\n\n"
10037"/// 0x08: Equal (unordered, non-signaling) \\n\n"
10038"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
10039"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
10040"/// 0x0B: False (ordered, non-signaling) \\n\n"
10041"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
10042"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
10043"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
10044"/// 0x0F: True (unordered, non-signaling) \\n\n"
10045"/// 0x10: Equal (ordered, signaling) \\n\n"
10046"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
10047"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
10048"/// 0x13: Unordered (signaling) \\n\n"
10049"/// 0x14: Not-equal (unordered, signaling) \\n\n"
10050"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
10051"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
10052"/// 0x17: Ordered (signaling) \\n\n"
10053"/// 0x18: Equal (unordered, signaling) \\n\n"
10054"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
10055"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
10056"/// 0x1B: False (ordered, signaling) \\n\n"
10057"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10058"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10059"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10060"/// 0x1F: True (unordered, signaling)\n"
10061"/// \\returns A 256-bit vector of [8 x float] containing the comparison results.\n"
10062"#define _mm256_cmp_ps(a, b, c) \\\n"
10063" (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \\\n"
10064" (__v8sf)(__m256)(b), (c))\n"
10065"\n"
10066"/// Compares each of the corresponding scalar double-precision values of\n"
10067"/// two 128-bit vectors of [2 x double], using the operation specified by the\n"
10068"/// immediate integer operand.\n"
10069"///\n"
10070"/// If the result is true, all 64 bits of the destination vector are set;\n"
10071"/// otherwise they are cleared.\n"
10072"///\n"
10073"/// \\headerfile <x86intrin.h>\n"
10074"///\n"
10075"/// \\code\n"
10076"/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);\n"
10077"/// \\endcode\n"
10078"///\n"
10079"/// This intrinsic corresponds to the <c> VCMPSD </c> instruction.\n"
10080"///\n"
10081"/// \\param a\n"
10082"/// A 128-bit vector of [2 x double].\n"
10083"/// \\param b\n"
10084"/// A 128-bit vector of [2 x double].\n"
10085"/// \\param c\n"
10086"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
10087"/// operation to use: \\n\n"
10088"/// 0x00: Equal (ordered, non-signaling) \\n\n"
10089"/// 0x01: Less-than (ordered, signaling) \\n\n"
10090"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
10091"/// 0x03: Unordered (non-signaling) \\n\n"
10092"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
10093"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
10094"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
10095"/// 0x07: Ordered (non-signaling) \\n\n"
10096"/// 0x08: Equal (unordered, non-signaling) \\n\n"
10097"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
10098"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
10099"/// 0x0B: False (ordered, non-signaling) \\n\n"
10100"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
10101"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
10102"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
10103"/// 0x0F: True (unordered, non-signaling) \\n\n"
10104"/// 0x10: Equal (ordered, signaling) \\n\n"
10105"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
10106"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
10107"/// 0x13: Unordered (signaling) \\n\n"
10108"/// 0x14: Not-equal (unordered, signaling) \\n\n"
10109"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
10110"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
10111"/// 0x17: Ordered (signaling) \\n\n"
10112"/// 0x18: Equal (unordered, signaling) \\n\n"
10113"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
10114"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
10115"/// 0x1B: False (ordered, signaling) \\n\n"
10116"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10117"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10118"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10119"/// 0x1F: True (unordered, signaling)\n"
10120"/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n"
10121"#define _mm_cmp_sd(a, b, c) \\\n"
10122" (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \\\n"
10123" (__v2df)(__m128d)(b), (c))\n"
10124"\n"
10125"/// Compares each of the corresponding scalar values of two 128-bit\n"
10126"/// vectors of [4 x float], using the operation specified by the immediate\n"
10127"/// integer operand.\n"
10128"///\n"
10129"/// If the result is true, all 32 bits of the destination vector are set;\n"
10130"/// otherwise they are cleared.\n"
10131"///\n"
10132"/// \\headerfile <x86intrin.h>\n"
10133"///\n"
10134"/// \\code\n"
10135"/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);\n"
10136"/// \\endcode\n"
10137"///\n"
10138"/// This intrinsic corresponds to the <c> VCMPSS </c> instruction.\n"
10139"///\n"
10140"/// \\param a\n"
10141"/// A 128-bit vector of [4 x float].\n"
10142"/// \\param b\n"
10143"/// A 128-bit vector of [4 x float].\n"
10144"/// \\param c\n"
10145"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
10146"/// operation to use: \\n\n"
10147"/// 0x00: Equal (ordered, non-signaling) \\n\n"
10148"/// 0x01: Less-than (ordered, signaling) \\n\n"
10149"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
10150"/// 0x03: Unordered (non-signaling) \\n\n"
10151"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
10152"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
10153"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
10154"/// 0x07: Ordered (non-signaling) \\n\n"
10155"/// 0x08: Equal (unordered, non-signaling) \\n\n"
10156"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
10157"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
10158"/// 0x0B: False (ordered, non-signaling) \\n\n"
10159"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
10160"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
10161"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
10162"/// 0x0F: True (unordered, non-signaling) \\n\n"
10163"/// 0x10: Equal (ordered, signaling) \\n\n"
10164"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
10165"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
10166"/// 0x13: Unordered (signaling) \\n\n"
10167"/// 0x14: Not-equal (unordered, signaling) \\n\n"
10168"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
10169"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
10170"/// 0x17: Ordered (signaling) \\n\n"
10171"/// 0x18: Equal (unordered, signaling) \\n\n"
10172"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
10173"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
10174"/// 0x1B: False (ordered, signaling) \\n\n"
10175"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10176"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10177"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10178"/// 0x1F: True (unordered, signaling)\n"
10179"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
10180"#define _mm_cmp_ss(a, b, c) \\\n"
10181" (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \\\n"
10182" (__v4sf)(__m128)(b), (c))\n"
10183"\n"
10184"/// Takes a [8 x i32] vector and returns the vector element value\n"
10185"/// indexed by the immediate constant operand.\n"
10186"///\n"
10187"/// \\headerfile <x86intrin.h>\n"
10188"///\n"
10189"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10190"/// instruction.\n"
10191"///\n"
10192"/// \\param __a\n"
10193"/// A 256-bit vector of [8 x i32].\n"
10194"/// \\param __imm\n"
10195"/// An immediate integer operand with bits [2:0] determining which vector\n"
10196"/// element is extracted and returned.\n"
10197"/// \\returns A 32-bit integer containing the extracted 32 bits of extended\n"
10198"/// packed data.\n"
10199"#define _mm256_extract_epi32(X, N) \\\n"
10200" (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))\n"
10201"\n"
10202"/// Takes a [16 x i16] vector and returns the vector element value\n"
10203"/// indexed by the immediate constant operand.\n"
10204"///\n"
10205"/// \\headerfile <x86intrin.h>\n"
10206"///\n"
10207"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10208"/// instruction.\n"
10209"///\n"
10210"/// \\param __a\n"
10211"/// A 256-bit integer vector of [16 x i16].\n"
10212"/// \\param __imm\n"
10213"/// An immediate integer operand with bits [3:0] determining which vector\n"
10214"/// element is extracted and returned.\n"
10215"/// \\returns A 32-bit integer containing the extracted 16 bits of zero extended\n"
10216"/// packed data.\n"
10217"#define _mm256_extract_epi16(X, N) \\\n"
10218" (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \\\n"
10219" (int)(N))\n"
10220"\n"
10221"/// Takes a [32 x i8] vector and returns the vector element value\n"
10222"/// indexed by the immediate constant operand.\n"
10223"///\n"
10224"/// \\headerfile <x86intrin.h>\n"
10225"///\n"
10226"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10227"/// instruction.\n"
10228"///\n"
10229"/// \\param __a\n"
10230"/// A 256-bit integer vector of [32 x i8].\n"
10231"/// \\param __imm\n"
10232"/// An immediate integer operand with bits [4:0] determining which vector\n"
10233"/// element is extracted and returned.\n"
10234"/// \\returns A 32-bit integer containing the extracted 8 bits of zero extended\n"
10235"/// packed data.\n"
10236"#define _mm256_extract_epi8(X, N) \\\n"
10237" (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \\\n"
10238" (int)(N))\n"
10239"\n"
10240"#ifdef __x86_64__\n"
10241"/// Takes a [4 x i64] vector and returns the vector element value\n"
10242"/// indexed by the immediate constant operand.\n"
10243"///\n"
10244"/// \\headerfile <x86intrin.h>\n"
10245"///\n"
10246"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10247"/// instruction.\n"
10248"///\n"
10249"/// \\param __a\n"
10250"/// A 256-bit integer vector of [4 x i64].\n"
10251"/// \\param __imm\n"
10252"/// An immediate integer operand with bits [1:0] determining which vector\n"
10253"/// element is extracted and returned.\n"
10254"/// \\returns A 64-bit integer containing the extracted 64 bits of extended\n"
10255"/// packed data.\n"
10256"#define _mm256_extract_epi64(X, N) \\\n"
10257" (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))\n"
10258"#endif\n"
10259"\n"
10260"/// Takes a [8 x i32] vector and replaces the vector element value\n"
10261"/// indexed by the immediate constant operand by a new value. Returns the\n"
10262"/// modified vector.\n"
10263"///\n"
10264"/// \\headerfile <x86intrin.h>\n"
10265"///\n"
10266"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10267"/// instruction.\n"
10268"///\n"
10269"/// \\param __a\n"
10270"/// A vector of [8 x i32] to be used by the insert operation.\n"
10271"/// \\param __b\n"
10272"/// An integer value. The replacement value for the insert operation.\n"
10273"/// \\param __imm\n"
10274"/// An immediate integer specifying the index of the vector element to be\n"
10275"/// replaced.\n"
10276"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10277"/// \\a __imm with \\a __b.\n"
10278"#define _mm256_insert_epi32(X, I, N) \\\n"
10279" (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \\\n"
10280" (int)(I), (int)(N))\n"
10281"\n"
10282"\n"
10283"/// Takes a [16 x i16] vector and replaces the vector element value\n"
10284"/// indexed by the immediate constant operand with a new value. Returns the\n"
10285"/// modified vector.\n"
10286"///\n"
10287"/// \\headerfile <x86intrin.h>\n"
10288"///\n"
10289"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10290"/// instruction.\n"
10291"///\n"
10292"/// \\param __a\n"
10293"/// A vector of [16 x i16] to be used by the insert operation.\n"
10294"/// \\param __b\n"
10295"/// An i16 integer value. The replacement value for the insert operation.\n"
10296"/// \\param __imm\n"
10297"/// An immediate integer specifying the index of the vector element to be\n"
10298"/// replaced.\n"
10299"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10300"/// \\a __imm with \\a __b.\n"
10301"#define _mm256_insert_epi16(X, I, N) \\\n"
10302" (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \\\n"
10303" (int)(I), (int)(N))\n"
10304"\n"
10305"/// Takes a [32 x i8] vector and replaces the vector element value\n"
10306"/// indexed by the immediate constant operand with a new value. Returns the\n"
10307"/// modified vector.\n"
10308"///\n"
10309"/// \\headerfile <x86intrin.h>\n"
10310"///\n"
10311"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10312"/// instruction.\n"
10313"///\n"
10314"/// \\param __a\n"
10315"/// A vector of [32 x i8] to be used by the insert operation.\n"
10316"/// \\param __b\n"
10317"/// An i8 integer value. The replacement value for the insert operation.\n"
10318"/// \\param __imm\n"
10319"/// An immediate integer specifying the index of the vector element to be\n"
10320"/// replaced.\n"
10321"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10322"/// \\a __imm with \\a __b.\n"
10323"#define _mm256_insert_epi8(X, I, N) \\\n"
10324" (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \\\n"
10325" (int)(I), (int)(N))\n"
10326"\n"
10327"#ifdef __x86_64__\n"
10328"/// Takes a [4 x i64] vector and replaces the vector element value\n"
10329"/// indexed by the immediate constant operand with a new value. Returns the\n"
10330"/// modified vector.\n"
10331"///\n"
10332"/// \\headerfile <x86intrin.h>\n"
10333"///\n"
10334"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10335"/// instruction.\n"
10336"///\n"
10337"/// \\param __a\n"
10338"/// A vector of [4 x i64] to be used by the insert operation.\n"
10339"/// \\param __b\n"
10340"/// A 64-bit integer value. The replacement value for the insert operation.\n"
10341"/// \\param __imm\n"
10342"/// An immediate integer specifying the index of the vector element to be\n"
10343"/// replaced.\n"
10344"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10345"/// \\a __imm with \\a __b.\n"
10346"#define _mm256_insert_epi64(X, I, N) \\\n"
10347" (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \\\n"
10348" (long long)(I), (int)(N))\n"
10349"#endif\n"
10350"\n"
10351"/* Conversion */\n"
10352"/// Converts a vector of [4 x i32] into a vector of [4 x double].\n"
10353"///\n"
10354"/// \\headerfile <x86intrin.h>\n"
10355"///\n"
10356"/// This intrinsic corresponds to the <c> VCVTDQ2PD </c> instruction.\n"
10357"///\n"
10358"/// \\param __a\n"
10359"/// A 128-bit integer vector of [4 x i32].\n"
10360"/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n"
10361"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10362"_mm256_cvtepi32_pd(__m128i __a)\n"
10363"{\n"
10364" return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);\n"
10365"}\n"
10366"\n"
10367"/// Converts a vector of [8 x i32] into a vector of [8 x float].\n"
10368"///\n"
10369"/// \\headerfile <x86intrin.h>\n"
10370"///\n"
10371"/// This intrinsic corresponds to the <c> VCVTDQ2PS </c> instruction.\n"
10372"///\n"
10373"/// \\param __a\n"
10374"/// A 256-bit integer vector.\n"
10375"/// \\returns A 256-bit vector of [8 x float] containing the converted values.\n"
10376"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10377"_mm256_cvtepi32_ps(__m256i __a)\n"
10378"{\n"
10379" return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);\n"
10380"}\n"
10381"\n"
10382"/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of\n"
10383"/// [4 x float].\n"
10384"///\n"
10385"/// \\headerfile <x86intrin.h>\n"
10386"///\n"
10387"/// This intrinsic corresponds to the <c> VCVTPD2PS </c> instruction.\n"
10388"///\n"
10389"/// \\param __a\n"
10390"/// A 256-bit vector of [4 x double].\n"
10391"/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n"
10392"static __inline __m128 __DEFAULT_FN_ATTRS\n"
10393"_mm256_cvtpd_ps(__m256d __a)\n"
10394"{\n"
10395" return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);\n"
10396"}\n"
10397"\n"
10398"/// Converts a vector of [8 x float] into a vector of [8 x i32].\n"
10399"///\n"
10400"/// \\headerfile <x86intrin.h>\n"
10401"///\n"
10402"/// This intrinsic corresponds to the <c> VCVTPS2DQ </c> instruction.\n"
10403"///\n"
10404"/// \\param __a\n"
10405"/// A 256-bit vector of [8 x float].\n"
10406"/// \\returns A 256-bit integer vector containing the converted values.\n"
10407"static __inline __m256i __DEFAULT_FN_ATTRS\n"
10408"_mm256_cvtps_epi32(__m256 __a)\n"
10409"{\n"
10410" return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);\n"
10411"}\n"
10412"\n"
10413"/// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4\n"
10414"/// x double].\n"
10415"///\n"
10416"/// \\headerfile <x86intrin.h>\n"
10417"///\n"
10418"/// This intrinsic corresponds to the <c> VCVTPS2PD </c> instruction.\n"
10419"///\n"
10420"/// \\param __a\n"
10421"/// A 128-bit vector of [4 x float].\n"
10422"/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n"
10423"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10424"_mm256_cvtps_pd(__m128 __a)\n"
10425"{\n"
10426" return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);\n"
10427"}\n"
10428"\n"
10429"/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n"
10430"/// x i32], truncating the result by rounding towards zero when it is\n"
10431"/// inexact.\n"
10432"///\n"
10433"/// \\headerfile <x86intrin.h>\n"
10434"///\n"
10435"/// This intrinsic corresponds to the <c> VCVTTPD2DQ </c> instruction.\n"
10436"///\n"
10437"/// \\param __a\n"
10438"/// A 256-bit vector of [4 x double].\n"
10439"/// \\returns A 128-bit integer vector containing the converted values.\n"
10440"static __inline __m128i __DEFAULT_FN_ATTRS\n"
10441"_mm256_cvttpd_epi32(__m256d __a)\n"
10442"{\n"
10443" return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);\n"
10444"}\n"
10445"\n"
10446"/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n"
10447"/// x i32]. When a conversion is inexact, the value returned is rounded\n"
10448"/// according to the rounding control bits in the MXCSR register.\n"
10449"///\n"
10450"/// \\headerfile <x86intrin.h>\n"
10451"///\n"
10452"/// This intrinsic corresponds to the <c> VCVTPD2DQ </c> instruction.\n"
10453"///\n"
10454"/// \\param __a\n"
10455"/// A 256-bit vector of [4 x double].\n"
10456"/// \\returns A 128-bit integer vector containing the converted values.\n"
10457"static __inline __m128i __DEFAULT_FN_ATTRS\n"
10458"_mm256_cvtpd_epi32(__m256d __a)\n"
10459"{\n"
10460" return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);\n"
10461"}\n"
10462"\n"
10463"/// Converts a vector of [8 x float] into a vector of [8 x i32],\n"
10464"/// truncating the result by rounding towards zero when it is inexact.\n"
10465"///\n"
10466"/// \\headerfile <x86intrin.h>\n"
10467"///\n"
10468"/// This intrinsic corresponds to the <c> VCVTTPS2DQ </c> instruction.\n"
10469"///\n"
10470"/// \\param __a\n"
10471"/// A 256-bit vector of [8 x float].\n"
10472"/// \\returns A 256-bit integer vector containing the converted values.\n"
10473"static __inline __m256i __DEFAULT_FN_ATTRS\n"
10474"_mm256_cvttps_epi32(__m256 __a)\n"
10475"{\n"
10476" return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);\n"
10477"}\n"
10478"\n"
10479"/// Returns the first element of the input vector of [4 x double].\n"
10480"///\n"
10481"/// \\headerfile <avxintrin.h>\n"
10482"///\n"
10483"/// This intrinsic is a utility function and does not correspond to a specific\n"
10484"/// instruction.\n"
10485"///\n"
10486"/// \\param __a\n"
10487"/// A 256-bit vector of [4 x double].\n"
10488"/// \\returns A 64 bit double containing the first element of the input vector.\n"
10489"static __inline double __DEFAULT_FN_ATTRS\n"
10490"_mm256_cvtsd_f64(__m256d __a)\n"
10491"{\n"
10492" return __a[0];\n"
10493"}\n"
10494"\n"
10495"/// Returns the first element of the input vector of [8 x i32].\n"
10496"///\n"
10497"/// \\headerfile <avxintrin.h>\n"
10498"///\n"
10499"/// This intrinsic is a utility function and does not correspond to a specific\n"
10500"/// instruction.\n"
10501"///\n"
10502"/// \\param __a\n"
10503"/// A 256-bit vector of [8 x i32].\n"
10504"/// \\returns A 32 bit integer containing the first element of the input vector.\n"
10505"static __inline int __DEFAULT_FN_ATTRS\n"
10506"_mm256_cvtsi256_si32(__m256i __a)\n"
10507"{\n"
10508" __v8si __b = (__v8si)__a;\n"
10509" return __b[0];\n"
10510"}\n"
10511"\n"
10512"/// Returns the first element of the input vector of [8 x float].\n"
10513"///\n"
10514"/// \\headerfile <avxintrin.h>\n"
10515"///\n"
10516"/// This intrinsic is a utility function and does not correspond to a specific\n"
10517"/// instruction.\n"
10518"///\n"
10519"/// \\param __a\n"
10520"/// A 256-bit vector of [8 x float].\n"
10521"/// \\returns A 32 bit float containing the first element of the input vector.\n"
10522"static __inline float __DEFAULT_FN_ATTRS\n"
10523"_mm256_cvtss_f32(__m256 __a)\n"
10524"{\n"
10525" return __a[0];\n"
10526"}\n"
10527"\n"
10528"/* Vector replicate */\n"
10529"/// Moves and duplicates odd-indexed values from a 256-bit vector of\n"
10530"/// [8 x float] to float values in a 256-bit vector of [8 x float].\n"
10531"///\n"
10532"/// \\headerfile <x86intrin.h>\n"
10533"///\n"
10534"/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n"
10535"///\n"
10536"/// \\param __a\n"
10537"/// A 256-bit vector of [8 x float]. \\n\n"
10538"/// Bits [255:224] of \\a __a are written to bits [255:224] and [223:192] of\n"
10539"/// the return value. \\n\n"
10540"/// Bits [191:160] of \\a __a are written to bits [191:160] and [159:128] of\n"
10541"/// the return value. \\n\n"
10542"/// Bits [127:96] of \\a __a are written to bits [127:96] and [95:64] of the\n"
10543"/// return value. \\n\n"
10544"/// Bits [63:32] of \\a __a are written to bits [63:32] and [31:0] of the\n"
10545"/// return value.\n"
10546"/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n"
10547"/// values.\n"
10548"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10549"_mm256_movehdup_ps(__m256 __a)\n"
10550"{\n"
10551" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);\n"
10552"}\n"
10553"\n"
10554"/// Moves and duplicates even-indexed values from a 256-bit vector of\n"
10555"/// [8 x float] to float values in a 256-bit vector of [8 x float].\n"
10556"///\n"
10557"/// \\headerfile <x86intrin.h>\n"
10558"///\n"
10559"/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n"
10560"///\n"
10561"/// \\param __a\n"
10562"/// A 256-bit vector of [8 x float]. \\n\n"
10563"/// Bits [223:192] of \\a __a are written to bits [255:224] and [223:192] of\n"
10564"/// the return value. \\n\n"
10565"/// Bits [159:128] of \\a __a are written to bits [191:160] and [159:128] of\n"
10566"/// the return value. \\n\n"
10567"/// Bits [95:64] of \\a __a are written to bits [127:96] and [95:64] of the\n"
10568"/// return value. \\n\n"
10569"/// Bits [31:0] of \\a __a are written to bits [63:32] and [31:0] of the\n"
10570"/// return value.\n"
10571"/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n"
10572"/// values.\n"
10573"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10574"_mm256_moveldup_ps(__m256 __a)\n"
10575"{\n"
10576" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);\n"
10577"}\n"
10578"\n"
10579"/// Moves and duplicates double-precision floating point values from a\n"
10580"/// 256-bit vector of [4 x double] to double-precision values in a 256-bit\n"
10581"/// vector of [4 x double].\n"
10582"///\n"
10583"/// \\headerfile <x86intrin.h>\n"
10584"///\n"
10585"/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n"
10586"///\n"
10587"/// \\param __a\n"
10588"/// A 256-bit vector of [4 x double]. \\n\n"
10589"/// Bits [63:0] of \\a __a are written to bits [127:64] and [63:0] of the\n"
10590"/// return value. \\n\n"
10591"/// Bits [191:128] of \\a __a are written to bits [255:192] and [191:128] of\n"
10592"/// the return value.\n"
10593"/// \\returns A 256-bit vector of [4 x double] containing the moved and\n"
10594"/// duplicated values.\n"
10595"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10596"_mm256_movedup_pd(__m256d __a)\n"
10597"{\n"
10598" return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);\n"
10599"}\n"
10600"\n"
10601"/* Unpack and Interleave */\n"
10602"/// Unpacks the odd-indexed vector elements from two 256-bit vectors of\n"
10603"/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n"
10604"///\n"
10605"/// \\headerfile <x86intrin.h>\n"
10606"///\n"
10607"/// This intrinsic corresponds to the <c> VUNPCKHPD </c> instruction.\n"
10608"///\n"
10609"/// \\param __a\n"
10610"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10611"/// Bits [127:64] are written to bits [63:0] of the return value. \\n\n"
10612"/// Bits [255:192] are written to bits [191:128] of the return value. \\n\n"
10613"/// \\param __b\n"
10614"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10615"/// Bits [127:64] are written to bits [127:64] of the return value. \\n\n"
10616"/// Bits [255:192] are written to bits [255:192] of the return value. \\n\n"
10617"/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n"
10618"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10619"_mm256_unpackhi_pd(__m256d __a, __m256d __b)\n"
10620"{\n"
10621" return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);\n"
10622"}\n"
10623"\n"
10624"/// Unpacks the even-indexed vector elements from two 256-bit vectors of\n"
10625"/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n"
10626"///\n"
10627"/// \\headerfile <x86intrin.h>\n"
10628"///\n"
10629"/// This intrinsic corresponds to the <c> VUNPCKLPD </c> instruction.\n"
10630"///\n"
10631"/// \\param __a\n"
10632"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10633"/// Bits [63:0] are written to bits [63:0] of the return value. \\n\n"
10634"/// Bits [191:128] are written to bits [191:128] of the return value.\n"
10635"/// \\param __b\n"
10636"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10637"/// Bits [63:0] are written to bits [127:64] of the return value. \\n\n"
10638"/// Bits [191:128] are written to bits [255:192] of the return value. \\n\n"
10639"/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n"
10640"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10641"_mm256_unpacklo_pd(__m256d __a, __m256d __b)\n"
10642"{\n"
10643" return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);\n"
10644"}\n"
10645"\n"
10646"/// Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the\n"
10647"/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n"
10648"/// vector of [8 x float].\n"
10649"///\n"
10650"/// \\headerfile <x86intrin.h>\n"
10651"///\n"
10652"/// This intrinsic corresponds to the <c> VUNPCKHPS </c> instruction.\n"
10653"///\n"
10654"/// \\param __a\n"
10655"/// A 256-bit vector of [8 x float]. \\n\n"
10656"/// Bits [95:64] are written to bits [31:0] of the return value. \\n\n"
10657"/// Bits [127:96] are written to bits [95:64] of the return value. \\n\n"
10658"/// Bits [223:192] are written to bits [159:128] of the return value. \\n\n"
10659"/// Bits [255:224] are written to bits [223:192] of the return value.\n"
10660"/// \\param __b\n"
10661"/// A 256-bit vector of [8 x float]. \\n\n"
10662"/// Bits [95:64] are written to bits [63:32] of the return value. \\n\n"
10663"/// Bits [127:96] are written to bits [127:96] of the return value. \\n\n"
10664"/// Bits [223:192] are written to bits [191:160] of the return value. \\n\n"
10665"/// Bits [255:224] are written to bits [255:224] of the return value.\n"
10666"/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n"
10667"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10668"_mm256_unpackhi_ps(__m256 __a, __m256 __b)\n"
10669"{\n"
10670" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);\n"
10671"}\n"
10672"\n"
10673"/// Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the\n"
10674"/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n"
10675"/// vector of [8 x float].\n"
10676"///\n"
10677"/// \\headerfile <x86intrin.h>\n"
10678"///\n"
10679"/// This intrinsic corresponds to the <c> VUNPCKLPS </c> instruction.\n"
10680"///\n"
10681"/// \\param __a\n"
10682"/// A 256-bit vector of [8 x float]. \\n\n"
10683"/// Bits [31:0] are written to bits [31:0] of the return value. \\n\n"
10684"/// Bits [63:32] are written to bits [95:64] of the return value. \\n\n"
10685"/// Bits [159:128] are written to bits [159:128] of the return value. \\n\n"
10686"/// Bits [191:160] are written to bits [223:192] of the return value.\n"
10687"/// \\param __b\n"
10688"/// A 256-bit vector of [8 x float]. \\n\n"
10689"/// Bits [31:0] are written to bits [63:32] of the return value. \\n\n"
10690"/// Bits [63:32] are written to bits [127:96] of the return value. \\n\n"
10691"/// Bits [159:128] are written to bits [191:160] of the return value. \\n\n"
10692"/// Bits [191:160] are written to bits [255:224] of the return value.\n"
10693"/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n"
10694"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10695"_mm256_unpacklo_ps(__m256 __a, __m256 __b)\n"
10696"{\n"
10697" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);\n"
10698"}\n"
10699"\n"
10700"/* Bit Test */\n"
10701"/// Given two 128-bit floating-point vectors of [2 x double], perform an\n"
10702"/// element-by-element comparison of the double-precision element in the\n"
10703"/// first source vector and the corresponding element in the second source\n"
10704"/// vector.\n"
10705"///\n"
10706"/// The EFLAGS register is updated as follows: \\n\n"
10707"/// If there is at least one pair of double-precision elements where the\n"
10708"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10709"/// ZF flag is set to 1. \\n\n"
10710"/// If there is at least one pair of double-precision elements where the\n"
10711"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10712"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10713"/// This intrinsic returns the value of the ZF flag.\n"
10714"///\n"
10715"/// \\headerfile <x86intrin.h>\n"
10716"///\n"
10717"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10718"///\n"
10719"/// \\param __a\n"
10720"/// A 128-bit vector of [2 x double].\n"
10721"/// \\param __b\n"
10722"/// A 128-bit vector of [2 x double].\n"
10723"/// \\returns the ZF flag in the EFLAGS register.\n"
10724"static __inline int __DEFAULT_FN_ATTRS128\n"
10725"_mm_testz_pd(__m128d __a, __m128d __b)\n"
10726"{\n"
10727" return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);\n"
10728"}\n"
10729"\n"
10730"/// Given two 128-bit floating-point vectors of [2 x double], perform an\n"
10731"/// element-by-element comparison of the double-precision element in the\n"
10732"/// first source vector and the corresponding element in the second source\n"
10733"/// vector.\n"
10734"///\n"
10735"/// The EFLAGS register is updated as follows: \\n\n"
10736"/// If there is at least one pair of double-precision elements where the\n"
10737"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10738"/// ZF flag is set to 1. \\n\n"
10739"/// If there is at least one pair of double-precision elements where the\n"
10740"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10741"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10742"/// This intrinsic returns the value of the CF flag.\n"
10743"///\n"
10744"/// \\headerfile <x86intrin.h>\n"
10745"///\n"
10746"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10747"///\n"
10748"/// \\param __a\n"
10749"/// A 128-bit vector of [2 x double].\n"
10750"/// \\param __b\n"
10751"/// A 128-bit vector of [2 x double].\n"
10752"/// \\returns the CF flag in the EFLAGS register.\n"
10753"static __inline int __DEFAULT_FN_ATTRS128\n"
10754"_mm_testc_pd(__m128d __a, __m128d __b)\n"
10755"{\n"
10756" return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);\n"
10757"}\n"
10758"\n"
10759"/// Given two 128-bit floating-point vectors of [2 x double], perform an\n"
10760"/// element-by-element comparison of the double-precision element in the\n"
10761"/// first source vector and the corresponding element in the second source\n"
10762"/// vector.\n"
10763"///\n"
10764"/// The EFLAGS register is updated as follows: \\n\n"
10765"/// If there is at least one pair of double-precision elements where the\n"
10766"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10767"/// ZF flag is set to 1. \\n\n"
10768"/// If there is at least one pair of double-precision elements where the\n"
10769"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10770"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10771"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
10772"/// otherwise it returns 0.\n"
10773"///\n"
10774"/// \\headerfile <x86intrin.h>\n"
10775"///\n"
10776"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10777"///\n"
10778"/// \\param __a\n"
10779"/// A 128-bit vector of [2 x double].\n"
10780"/// \\param __b\n"
10781"/// A 128-bit vector of [2 x double].\n"
10782"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
10783"static __inline int __DEFAULT_FN_ATTRS128\n"
10784"_mm_testnzc_pd(__m128d __a, __m128d __b)\n"
10785"{\n"
10786" return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);\n"
10787"}\n"
10788"\n"
10789"/// Given two 128-bit floating-point vectors of [4 x float], perform an\n"
10790"/// element-by-element comparison of the single-precision element in the\n"
10791"/// first source vector and the corresponding element in the second source\n"
10792"/// vector.\n"
10793"///\n"
10794"/// The EFLAGS register is updated as follows: \\n\n"
10795"/// If there is at least one pair of single-precision elements where the\n"
10796"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10797"/// ZF flag is set to 1. \\n\n"
10798"/// If there is at least one pair of single-precision elements where the\n"
10799"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10800"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10801"/// This intrinsic returns the value of the ZF flag.\n"
10802"///\n"
10803"/// \\headerfile <x86intrin.h>\n"
10804"///\n"
10805"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10806"///\n"
10807"/// \\param __a\n"
10808"/// A 128-bit vector of [4 x float].\n"
10809"/// \\param __b\n"
10810"/// A 128-bit vector of [4 x float].\n"
10811"/// \\returns the ZF flag.\n"
10812"static __inline int __DEFAULT_FN_ATTRS128\n"
10813"_mm_testz_ps(__m128 __a, __m128 __b)\n"
10814"{\n"
10815" return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);\n"
10816"}\n"
10817"\n"
10818"/// Given two 128-bit floating-point vectors of [4 x float], perform an\n"
10819"/// element-by-element comparison of the single-precision element in the\n"
10820"/// first source vector and the corresponding element in the second source\n"
10821"/// vector.\n"
10822"///\n"
10823"/// The EFLAGS register is updated as follows: \\n\n"
10824"/// If there is at least one pair of single-precision elements where the\n"
10825"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10826"/// ZF flag is set to 1. \\n\n"
10827"/// If there is at least one pair of single-precision elements where the\n"
10828"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10829"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10830"/// This intrinsic returns the value of the CF flag.\n"
10831"///\n"
10832"/// \\headerfile <x86intrin.h>\n"
10833"///\n"
10834"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10835"///\n"
10836"/// \\param __a\n"
10837"/// A 128-bit vector of [4 x float].\n"
10838"/// \\param __b\n"
10839"/// A 128-bit vector of [4 x float].\n"
10840"/// \\returns the CF flag.\n"
10841"static __inline int __DEFAULT_FN_ATTRS128\n"
10842"_mm_testc_ps(__m128 __a, __m128 __b)\n"
10843"{\n"
10844" return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);\n"
10845"}\n"
10846"\n"
10847"/// Given two 128-bit floating-point vectors of [4 x float], perform an\n"
10848"/// element-by-element comparison of the single-precision element in the\n"
10849"/// first source vector and the corresponding element in the second source\n"
10850"/// vector.\n"
10851"///\n"
10852"/// The EFLAGS register is updated as follows: \\n\n"
10853"/// If there is at least one pair of single-precision elements where the\n"
10854"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10855"/// ZF flag is set to 1. \\n\n"
10856"/// If there is at least one pair of single-precision elements where the\n"
10857"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10858"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10859"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
10860"/// otherwise it returns 0.\n"
10861"///\n"
10862"/// \\headerfile <x86intrin.h>\n"
10863"///\n"
10864"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10865"///\n"
10866"/// \\param __a\n"
10867"/// A 128-bit vector of [4 x float].\n"
10868"/// \\param __b\n"
10869"/// A 128-bit vector of [4 x float].\n"
10870"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
10871"static __inline int __DEFAULT_FN_ATTRS128\n"
10872"_mm_testnzc_ps(__m128 __a, __m128 __b)\n"
10873"{\n"
10874" return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);\n"
10875"}\n"
10876"\n"
10877"/// Given two 256-bit floating-point vectors of [4 x double], perform an\n"
10878"/// element-by-element comparison of the double-precision elements in the\n"
10879"/// first source vector and the corresponding elements in the second source\n"
10880"/// vector.\n"
10881"///\n"
10882"/// The EFLAGS register is updated as follows: \\n\n"
10883"/// If there is at least one pair of double-precision elements where the\n"
10884"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10885"/// ZF flag is set to 1. \\n\n"
10886"/// If there is at least one pair of double-precision elements where the\n"
10887"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10888"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10889"/// This intrinsic returns the value of the ZF flag.\n"
10890"///\n"
10891"/// \\headerfile <x86intrin.h>\n"
10892"///\n"
10893"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10894"///\n"
10895"/// \\param __a\n"
10896"/// A 256-bit vector of [4 x double].\n"
10897"/// \\param __b\n"
10898"/// A 256-bit vector of [4 x double].\n"
10899"/// \\returns the ZF flag.\n"
10900"static __inline int __DEFAULT_FN_ATTRS\n"
10901"_mm256_testz_pd(__m256d __a, __m256d __b)\n"
10902"{\n"
10903" return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);\n"
10904"}\n"
10905"\n"
10906"/// Given two 256-bit floating-point vectors of [4 x double], perform an\n"
10907"/// element-by-element comparison of the double-precision elements in the\n"
10908"/// first source vector and the corresponding elements in the second source\n"
10909"/// vector.\n"
10910"///\n"
10911"/// The EFLAGS register is updated as follows: \\n\n"
10912"/// If there is at least one pair of double-precision elements where the\n"
10913"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10914"/// ZF flag is set to 1. \\n\n"
10915"/// If there is at least one pair of double-precision elements where the\n"
10916"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10917"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10918"/// This intrinsic returns the value of the CF flag.\n"
10919"///\n"
10920"/// \\headerfile <x86intrin.h>\n"
10921"///\n"
10922"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10923"///\n"
10924"/// \\param __a\n"
10925"/// A 256-bit vector of [4 x double].\n"
10926"/// \\param __b\n"
10927"/// A 256-bit vector of [4 x double].\n"
10928"/// \\returns the CF flag.\n"
10929"static __inline int __DEFAULT_FN_ATTRS\n"
10930"_mm256_testc_pd(__m256d __a, __m256d __b)\n"
10931"{\n"
10932" return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);\n"
10933"}\n"
10934"\n"
10935"/// Given two 256-bit floating-point vectors of [4 x double], perform an\n"
10936"/// element-by-element comparison of the double-precision elements in the\n"
10937"/// first source vector and the corresponding elements in the second source\n"
10938"/// vector.\n"
10939"///\n"
10940"/// The EFLAGS register is updated as follows: \\n\n"
10941"/// If there is at least one pair of double-precision elements where the\n"
10942"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10943"/// ZF flag is set to 1. \\n\n"
10944"/// If there is at least one pair of double-precision elements where the\n"
10945"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10946"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10947"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
10948"/// otherwise it returns 0.\n"
10949"///\n"
10950"/// \\headerfile <x86intrin.h>\n"
10951"///\n"
10952"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10953"///\n"
10954"/// \\param __a\n"
10955"/// A 256-bit vector of [4 x double].\n"
10956"/// \\param __b\n"
10957"/// A 256-bit vector of [4 x double].\n"
10958"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
10959"static __inline int __DEFAULT_FN_ATTRS\n"
10960"_mm256_testnzc_pd(__m256d __a, __m256d __b)\n"
10961"{\n"
10962" return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);\n"
10963"}\n"
10964"\n"
10965"/// Given two 256-bit floating-point vectors of [8 x float], perform an\n"
10966"/// element-by-element comparison of the single-precision element in the\n"
10967"/// first source vector and the corresponding element in the second source\n"
10968"/// vector.\n"
10969"///\n"
10970"/// The EFLAGS register is updated as follows: \\n\n"
10971"/// If there is at least one pair of single-precision elements where the\n"
10972"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10973"/// ZF flag is set to 1. \\n\n"
10974"/// If there is at least one pair of single-precision elements where the\n"
10975"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10976"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10977"/// This intrinsic returns the value of the ZF flag.\n"
10978"///\n"
10979"/// \\headerfile <x86intrin.h>\n"
10980"///\n"
10981"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10982"///\n"
10983"/// \\param __a\n"
10984"/// A 256-bit vector of [8 x float].\n"
10985"/// \\param __b\n"
10986"/// A 256-bit vector of [8 x float].\n"
10987"/// \\returns the ZF flag.\n"
10988"static __inline int __DEFAULT_FN_ATTRS\n"
10989"_mm256_testz_ps(__m256 __a, __m256 __b)\n"
10990"{\n"
10991" return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);\n"
10992"}\n"
10993"\n"
10994"/// Given two 256-bit floating-point vectors of [8 x float], perform an\n"
10995"/// element-by-element comparison of the single-precision element in the\n"
10996"/// first source vector and the corresponding element in the second source\n"
10997"/// vector.\n"
10998"///\n"
10999"/// The EFLAGS register is updated as follows: \\n\n"
11000"/// If there is at least one pair of single-precision elements where the\n"
11001"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
11002"/// ZF flag is set to 1. \\n\n"
11003"/// If there is at least one pair of single-precision elements where the\n"
11004"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
11005"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11006"/// This intrinsic returns the value of the CF flag.\n"
11007"///\n"
11008"/// \\headerfile <x86intrin.h>\n"
11009"///\n"
11010"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
11011"///\n"
11012"/// \\param __a\n"
11013"/// A 256-bit vector of [8 x float].\n"
11014"/// \\param __b\n"
11015"/// A 256-bit vector of [8 x float].\n"
11016"/// \\returns the CF flag.\n"
11017"static __inline int __DEFAULT_FN_ATTRS\n"
11018"_mm256_testc_ps(__m256 __a, __m256 __b)\n"
11019"{\n"
11020" return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);\n"
11021"}\n"
11022"\n"
11023"/// Given two 256-bit floating-point vectors of [8 x float], perform an\n"
11024"/// element-by-element comparison of the single-precision elements in the\n"
11025"/// first source vector and the corresponding elements in the second source\n"
11026"/// vector.\n"
11027"///\n"
11028"/// The EFLAGS register is updated as follows: \\n\n"
11029"/// If there is at least one pair of single-precision elements where the\n"
11030"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
11031"/// ZF flag is set to 1. \\n\n"
11032"/// If there is at least one pair of single-precision elements where the\n"
11033"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
11034"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11035"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
11036"/// otherwise it returns 0.\n"
11037"///\n"
11038"/// \\headerfile <x86intrin.h>\n"
11039"///\n"
11040"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
11041"///\n"
11042"/// \\param __a\n"
11043"/// A 256-bit vector of [8 x float].\n"
11044"/// \\param __b\n"
11045"/// A 256-bit vector of [8 x float].\n"
11046"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
11047"static __inline int __DEFAULT_FN_ATTRS\n"
11048"_mm256_testnzc_ps(__m256 __a, __m256 __b)\n"
11049"{\n"
11050" return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);\n"
11051"}\n"
11052"\n"
11053"/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n"
11054"/// of the two source vectors.\n"
11055"///\n"
11056"/// The EFLAGS register is updated as follows: \\n\n"
11057"/// If there is at least one pair of bits where both bits are 1, the ZF flag\n"
11058"/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n"
11059"/// If there is at least one pair of bits where the bit from the first source\n"
11060"/// vector is 0 and the bit from the second source vector is 1, the CF flag\n"
11061"/// is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11062"/// This intrinsic returns the value of the ZF flag.\n"
11063"///\n"
11064"/// \\headerfile <x86intrin.h>\n"
11065"///\n"
11066"/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n"
11067"///\n"
11068"/// \\param __a\n"
11069"/// A 256-bit integer vector.\n"
11070"/// \\param __b\n"
11071"/// A 256-bit integer vector.\n"
11072"/// \\returns the ZF flag.\n"
11073"static __inline int __DEFAULT_FN_ATTRS\n"
11074"_mm256_testz_si256(__m256i __a, __m256i __b)\n"
11075"{\n"
11076" return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);\n"
11077"}\n"
11078"\n"
11079"/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n"
11080"/// of the two source vectors.\n"
11081"///\n"
11082"/// The EFLAGS register is updated as follows: \\n\n"
11083"/// If there is at least one pair of bits where both bits are 1, the ZF flag\n"
11084"/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n"
11085"/// If there is at least one pair of bits where the bit from the first source\n"
11086"/// vector is 0 and the bit from the second source vector is 1, the CF flag\n"
11087"/// is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11088"/// This intrinsic returns the value of the CF flag.\n"
11089"///\n"
11090"/// \\headerfile <x86intrin.h>\n"
11091"///\n"
11092"/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n"
11093"///\n"
11094"/// \\param __a\n"
11095"/// A 256-bit integer vector.\n"
11096"/// \\param __b\n"
11097"/// A 256-bit integer vector.\n"
11098"/// \\returns the CF flag.\n"
11099"static __inline int __DEFAULT_FN_ATTRS\n"
11100"_mm256_testc_si256(__m256i __a, __m256i __b)\n"
11101"{\n"
11102" return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);\n"
11103"}\n"
11104"\n"
11105"/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n"
11106"/// of the two source vectors.\n"
11107"///\n"
11108"/// The EFLAGS register is updated as follows: \\n\n"
11109"/// If there is at least one pair of bits where both bits are 1, the ZF flag\n"
11110"/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n"
11111"/// If there is at least one pair of bits where the bit from the first source\n"
11112"/// vector is 0 and the bit from the second source vector is 1, the CF flag\n"
11113"/// is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11114"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
11115"/// otherwise it returns 0.\n"
11116"///\n"
11117"/// \\headerfile <x86intrin.h>\n"
11118"///\n"
11119"/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n"
11120"///\n"
11121"/// \\param __a\n"
11122"/// A 256-bit integer vector.\n"
11123"/// \\param __b\n"
11124"/// A 256-bit integer vector.\n"
11125"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
11126"static __inline int __DEFAULT_FN_ATTRS\n"
11127"_mm256_testnzc_si256(__m256i __a, __m256i __b)\n"
11128"{\n"
11129" return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);\n"
11130"}\n"
11131"\n"
11132"/* Vector extract sign mask */\n"
11133"/// Extracts the sign bits of double-precision floating point elements\n"
11134"/// in a 256-bit vector of [4 x double] and writes them to the lower order\n"
11135"/// bits of the return value.\n"
11136"///\n"
11137"/// \\headerfile <x86intrin.h>\n"
11138"///\n"
11139"/// This intrinsic corresponds to the <c> VMOVMSKPD </c> instruction.\n"
11140"///\n"
11141"/// \\param __a\n"
11142"/// A 256-bit vector of [4 x double] containing the double-precision\n"
11143"/// floating point values with sign bits to be extracted.\n"
11144"/// \\returns The sign bits from the operand, written to bits [3:0].\n"
11145"static __inline int __DEFAULT_FN_ATTRS\n"
11146"_mm256_movemask_pd(__m256d __a)\n"
11147"{\n"
11148" return __builtin_ia32_movmskpd256((__v4df)__a);\n"
11149"}\n"
11150"\n"
11151"/// Extracts the sign bits of single-precision floating point elements\n"
11152"/// in a 256-bit vector of [8 x float] and writes them to the lower order\n"
11153"/// bits of the return value.\n"
11154"///\n"
11155"/// \\headerfile <x86intrin.h>\n"
11156"///\n"
11157"/// This intrinsic corresponds to the <c> VMOVMSKPS </c> instruction.\n"
11158"///\n"
11159"/// \\param __a\n"
11160"/// A 256-bit vector of [8 x float] containing the single-precision floating\n"
11161"/// point values with sign bits to be extracted.\n"
11162"/// \\returns The sign bits from the operand, written to bits [7:0].\n"
11163"static __inline int __DEFAULT_FN_ATTRS\n"
11164"_mm256_movemask_ps(__m256 __a)\n"
11165"{\n"
11166" return __builtin_ia32_movmskps256((__v8sf)__a);\n"
11167"}\n"
11168"\n"
11169"/* Vector __zero */\n"
11170"/// Zeroes the contents of all XMM or YMM registers.\n"
11171"///\n"
11172"/// \\headerfile <x86intrin.h>\n"
11173"///\n"
11174"/// This intrinsic corresponds to the <c> VZEROALL </c> instruction.\n"
11175"static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n"
11176"_mm256_zeroall(void)\n"
11177"{\n"
11178" __builtin_ia32_vzeroall();\n"
11179"}\n"
11180"\n"
11181"/// Zeroes the upper 128 bits (bits 255:128) of all YMM registers.\n"
11182"///\n"
11183"/// \\headerfile <x86intrin.h>\n"
11184"///\n"
11185"/// This intrinsic corresponds to the <c> VZEROUPPER </c> instruction.\n"
11186"static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n"
11187"_mm256_zeroupper(void)\n"
11188"{\n"
11189" __builtin_ia32_vzeroupper();\n"
11190"}\n"
11191"\n"
11192"/* Vector load with broadcast */\n"
11193"/// Loads a scalar single-precision floating point value from the\n"
11194"/// specified address pointed to by \\a __a and broadcasts it to the elements\n"
11195"/// of a [4 x float] vector.\n"
11196"///\n"
11197"/// \\headerfile <x86intrin.h>\n"
11198"///\n"
11199"/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n"
11200"///\n"
11201"/// \\param __a\n"
11202"/// The single-precision floating point value to be broadcast.\n"
11203"/// \\returns A 128-bit vector of [4 x float] whose 32-bit elements are set\n"
11204"/// equal to the broadcast value.\n"
11205"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
11206"_mm_broadcast_ss(float const *__a)\n"
11207"{\n"
11208" float __f = *__a;\n"
11209" return __extension__ (__m128)(__v4sf){ __f, __f, __f, __f };\n"
11210"}\n"
11211"\n"
11212"/// Loads a scalar double-precision floating point value from the\n"
11213"/// specified address pointed to by \\a __a and broadcasts it to the elements\n"
11214"/// of a [4 x double] vector.\n"
11215"///\n"
11216"/// \\headerfile <x86intrin.h>\n"
11217"///\n"
11218"/// This intrinsic corresponds to the <c> VBROADCASTSD </c> instruction.\n"
11219"///\n"
11220"/// \\param __a\n"
11221"/// The double-precision floating point value to be broadcast.\n"
11222"/// \\returns A 256-bit vector of [4 x double] whose 64-bit elements are set\n"
11223"/// equal to the broadcast value.\n"
11224"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11225"_mm256_broadcast_sd(double const *__a)\n"
11226"{\n"
11227" double __d = *__a;\n"
11228" return __extension__ (__m256d)(__v4df){ __d, __d, __d, __d };\n"
11229"}\n"
11230"\n"
11231"/// Loads a scalar single-precision floating point value from the\n"
11232"/// specified address pointed to by \\a __a and broadcasts it to the elements\n"
11233"/// of a [8 x float] vector.\n"
11234"///\n"
11235"/// \\headerfile <x86intrin.h>\n"
11236"///\n"
11237"/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n"
11238"///\n"
11239"/// \\param __a\n"
11240"/// The single-precision floating point value to be broadcast.\n"
11241"/// \\returns A 256-bit vector of [8 x float] whose 32-bit elements are set\n"
11242"/// equal to the broadcast value.\n"
11243"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11244"_mm256_broadcast_ss(float const *__a)\n"
11245"{\n"
11246" float __f = *__a;\n"
11247" return __extension__ (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };\n"
11248"}\n"
11249"\n"
11250"/// Loads the data from a 128-bit vector of [2 x double] from the\n"
11251"/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n"
11252"/// elements in a 256-bit vector of [4 x double].\n"
11253"///\n"
11254"/// \\headerfile <x86intrin.h>\n"
11255"///\n"
11256"/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n"
11257"///\n"
11258"/// \\param __a\n"
11259"/// The 128-bit vector of [2 x double] to be broadcast.\n"
11260"/// \\returns A 256-bit vector of [4 x double] whose 128-bit elements are set\n"
11261"/// equal to the broadcast value.\n"
11262"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11263"_mm256_broadcast_pd(__m128d const *__a)\n"
11264"{\n"
11265" __m128d __b = _mm_loadu_pd((const double *)__a);\n"
11266" return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b,\n"
11267" 0, 1, 0, 1);\n"
11268"}\n"
11269"\n"
11270"/// Loads the data from a 128-bit vector of [4 x float] from the\n"
11271"/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n"
11272"/// elements in a 256-bit vector of [8 x float].\n"
11273"///\n"
11274"/// \\headerfile <x86intrin.h>\n"
11275"///\n"
11276"/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n"
11277"///\n"
11278"/// \\param __a\n"
11279"/// The 128-bit vector of [4 x float] to be broadcast.\n"
11280"/// \\returns A 256-bit vector of [8 x float] whose 128-bit elements are set\n"
11281"/// equal to the broadcast value.\n"
11282"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11283"_mm256_broadcast_ps(__m128 const *__a)\n"
11284"{\n"
11285" __m128 __b = _mm_loadu_ps((const float *)__a);\n"
11286" return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b,\n"
11287" 0, 1, 2, 3, 0, 1, 2, 3);\n"
11288"}\n"
11289"\n"
11290"/* SIMD load ops */\n"
11291"/// Loads 4 double-precision floating point values from a 32-byte aligned\n"
11292"/// memory location pointed to by \\a __p into a vector of [4 x double].\n"
11293"///\n"
11294"/// \\headerfile <x86intrin.h>\n"
11295"///\n"
11296"/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n"
11297"///\n"
11298"/// \\param __p\n"
11299"/// A 32-byte aligned pointer to a memory location containing\n"
11300"/// double-precision floating point values.\n"
11301"/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n"
11302"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11303"_mm256_load_pd(double const *__p)\n"
11304"{\n"
11305" return *(__m256d *)__p;\n"
11306"}\n"
11307"\n"
11308"/// Loads 8 single-precision floating point values from a 32-byte aligned\n"
11309"/// memory location pointed to by \\a __p into a vector of [8 x float].\n"
11310"///\n"
11311"/// \\headerfile <x86intrin.h>\n"
11312"///\n"
11313"/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n"
11314"///\n"
11315"/// \\param __p\n"
11316"/// A 32-byte aligned pointer to a memory location containing float values.\n"
11317"/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n"
11318"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11319"_mm256_load_ps(float const *__p)\n"
11320"{\n"
11321" return *(__m256 *)__p;\n"
11322"}\n"
11323"\n"
11324"/// Loads 4 double-precision floating point values from an unaligned\n"
11325"/// memory location pointed to by \\a __p into a vector of [4 x double].\n"
11326"///\n"
11327"/// \\headerfile <x86intrin.h>\n"
11328"///\n"
11329"/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n"
11330"///\n"
11331"/// \\param __p\n"
11332"/// A pointer to a memory location containing double-precision floating\n"
11333"/// point values.\n"
11334"/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n"
11335"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11336"_mm256_loadu_pd(double const *__p)\n"
11337"{\n"
11338" struct __loadu_pd {\n"
11339" __m256d __v;\n"
11340" } __attribute__((__packed__, __may_alias__));\n"
11341" return ((struct __loadu_pd*)__p)->__v;\n"
11342"}\n"
11343"\n"
11344"/// Loads 8 single-precision floating point values from an unaligned\n"
11345"/// memory location pointed to by \\a __p into a vector of [8 x float].\n"
11346"///\n"
11347"/// \\headerfile <x86intrin.h>\n"
11348"///\n"
11349"/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n"
11350"///\n"
11351"/// \\param __p\n"
11352"/// A pointer to a memory location containing single-precision floating\n"
11353"/// point values.\n"
11354"/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n"
11355"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11356"_mm256_loadu_ps(float const *__p)\n"
11357"{\n"
11358" struct __loadu_ps {\n"
11359" __m256 __v;\n"
11360" } __attribute__((__packed__, __may_alias__));\n"
11361" return ((struct __loadu_ps*)__p)->__v;\n"
11362"}\n"
11363"\n"
11364"/// Loads 256 bits of integer data from a 32-byte aligned memory\n"
11365"/// location pointed to by \\a __p into elements of a 256-bit integer vector.\n"
11366"///\n"
11367"/// \\headerfile <x86intrin.h>\n"
11368"///\n"
11369"/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n"
11370"///\n"
11371"/// \\param __p\n"
11372"/// A 32-byte aligned pointer to a 256-bit integer vector containing integer\n"
11373"/// values.\n"
11374"/// \\returns A 256-bit integer vector containing the moved values.\n"
11375"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11376"_mm256_load_si256(__m256i const *__p)\n"
11377"{\n"
11378" return *__p;\n"
11379"}\n"
11380"\n"
11381"/// Loads 256 bits of integer data from an unaligned memory location\n"
11382"/// pointed to by \\a __p into a 256-bit integer vector.\n"
11383"///\n"
11384"/// \\headerfile <x86intrin.h>\n"
11385"///\n"
11386"/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n"
11387"///\n"
11388"/// \\param __p\n"
11389"/// A pointer to a 256-bit integer vector containing integer values.\n"
11390"/// \\returns A 256-bit integer vector containing the moved values.\n"
11391"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11392"_mm256_loadu_si256(__m256i const *__p)\n"
11393"{\n"
11394" struct __loadu_si256 {\n"
11395" __m256i __v;\n"
11396" } __attribute__((__packed__, __may_alias__));\n"
11397" return ((struct __loadu_si256*)__p)->__v;\n"
11398"}\n"
11399"\n"
11400"/// Loads 256 bits of integer data from an unaligned memory location\n"
11401"/// pointed to by \\a __p into a 256-bit integer vector. This intrinsic may\n"
11402"/// perform better than \\c _mm256_loadu_si256 when the data crosses a cache\n"
11403"/// line boundary.\n"
11404"///\n"
11405"/// \\headerfile <x86intrin.h>\n"
11406"///\n"
11407"/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n"
11408"///\n"
11409"/// \\param __p\n"
11410"/// A pointer to a 256-bit integer vector containing integer values.\n"
11411"/// \\returns A 256-bit integer vector containing the moved values.\n"
11412"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11413"_mm256_lddqu_si256(__m256i const *__p)\n"
11414"{\n"
11415" return (__m256i)__builtin_ia32_lddqu256((char const *)__p);\n"
11416"}\n"
11417"\n"
11418"/* SIMD store ops */\n"
11419"/// Stores double-precision floating point values from a 256-bit vector\n"
11420"/// of [4 x double] to a 32-byte aligned memory location pointed to by\n"
11421"/// \\a __p.\n"
11422"///\n"
11423"/// \\headerfile <x86intrin.h>\n"
11424"///\n"
11425"/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n"
11426"///\n"
11427"/// \\param __p\n"
11428"/// A 32-byte aligned pointer to a memory location that will receive the\n"
11429"/// double-precision floaing point values.\n"
11430"/// \\param __a\n"
11431"/// A 256-bit vector of [4 x double] containing the values to be moved.\n"
11432"static __inline void __DEFAULT_FN_ATTRS\n"
11433"_mm256_store_pd(double *__p, __m256d __a)\n"
11434"{\n"
11435" *(__m256d *)__p = __a;\n"
11436"}\n"
11437"\n"
11438"/// Stores single-precision floating point values from a 256-bit vector\n"
11439"/// of [8 x float] to a 32-byte aligned memory location pointed to by \\a __p.\n"
11440"///\n"
11441"/// \\headerfile <x86intrin.h>\n"
11442"///\n"
11443"/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n"
11444"///\n"
11445"/// \\param __p\n"
11446"/// A 32-byte aligned pointer to a memory location that will receive the\n"
11447"/// float values.\n"
11448"/// \\param __a\n"
11449"/// A 256-bit vector of [8 x float] containing the values to be moved.\n"
11450"static __inline void __DEFAULT_FN_ATTRS\n"
11451"_mm256_store_ps(float *__p, __m256 __a)\n"
11452"{\n"
11453" *(__m256 *)__p = __a;\n"
11454"}\n"
11455"\n"
11456"/// Stores double-precision floating point values from a 256-bit vector\n"
11457"/// of [4 x double] to an unaligned memory location pointed to by \\a __p.\n"
11458"///\n"
11459"/// \\headerfile <x86intrin.h>\n"
11460"///\n"
11461"/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n"
11462"///\n"
11463"/// \\param __p\n"
11464"/// A pointer to a memory location that will receive the double-precision\n"
11465"/// floating point values.\n"
11466"/// \\param __a\n"
11467"/// A 256-bit vector of [4 x double] containing the values to be moved.\n"
11468"static __inline void __DEFAULT_FN_ATTRS\n"
11469"_mm256_storeu_pd(double *__p, __m256d __a)\n"
11470"{\n"
11471" struct __storeu_pd {\n"
11472" __m256d __v;\n"
11473" } __attribute__((__packed__, __may_alias__));\n"
11474" ((struct __storeu_pd*)__p)->__v = __a;\n"
11475"}\n"
11476"\n"
11477"/// Stores single-precision floating point values from a 256-bit vector\n"
11478"/// of [8 x float] to an unaligned memory location pointed to by \\a __p.\n"
11479"///\n"
11480"/// \\headerfile <x86intrin.h>\n"
11481"///\n"
11482"/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n"
11483"///\n"
11484"/// \\param __p\n"
11485"/// A pointer to a memory location that will receive the float values.\n"
11486"/// \\param __a\n"
11487"/// A 256-bit vector of [8 x float] containing the values to be moved.\n"
11488"static __inline void __DEFAULT_FN_ATTRS\n"
11489"_mm256_storeu_ps(float *__p, __m256 __a)\n"
11490"{\n"
11491" struct __storeu_ps {\n"
11492" __m256 __v;\n"
11493" } __attribute__((__packed__, __may_alias__));\n"
11494" ((struct __storeu_ps*)__p)->__v = __a;\n"
11495"}\n"
11496"\n"
11497"/// Stores integer values from a 256-bit integer vector to a 32-byte\n"
11498"/// aligned memory location pointed to by \\a __p.\n"
11499"///\n"
11500"/// \\headerfile <x86intrin.h>\n"
11501"///\n"
11502"/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n"
11503"///\n"
11504"/// \\param __p\n"
11505"/// A 32-byte aligned pointer to a memory location that will receive the\n"
11506"/// integer values.\n"
11507"/// \\param __a\n"
11508"/// A 256-bit integer vector containing the values to be moved.\n"
11509"static __inline void __DEFAULT_FN_ATTRS\n"
11510"_mm256_store_si256(__m256i *__p, __m256i __a)\n"
11511"{\n"
11512" *__p = __a;\n"
11513"}\n"
11514"\n"
11515"/// Stores integer values from a 256-bit integer vector to an unaligned\n"
11516"/// memory location pointed to by \\a __p.\n"
11517"///\n"
11518"/// \\headerfile <x86intrin.h>\n"
11519"///\n"
11520"/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n"
11521"///\n"
11522"/// \\param __p\n"
11523"/// A pointer to a memory location that will receive the integer values.\n"
11524"/// \\param __a\n"
11525"/// A 256-bit integer vector containing the values to be moved.\n"
11526"static __inline void __DEFAULT_FN_ATTRS\n"
11527"_mm256_storeu_si256(__m256i *__p, __m256i __a)\n"
11528"{\n"
11529" struct __storeu_si256 {\n"
11530" __m256i __v;\n"
11531" } __attribute__((__packed__, __may_alias__));\n"
11532" ((struct __storeu_si256*)__p)->__v = __a;\n"
11533"}\n"
11534"\n"
11535"/* Conditional load ops */\n"
11536"/// Conditionally loads double-precision floating point elements from a\n"
11537"/// memory location pointed to by \\a __p into a 128-bit vector of\n"
11538"/// [2 x double], depending on the mask bits associated with each data\n"
11539"/// element.\n"
11540"///\n"
11541"/// \\headerfile <x86intrin.h>\n"
11542"///\n"
11543"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11544"///\n"
11545"/// \\param __p\n"
11546"/// A pointer to a memory location that contains the double-precision\n"
11547"/// floating point values.\n"
11548"/// \\param __m\n"
11549"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11550"/// each data element represents the mask bits. If a mask bit is zero, the\n"
11551"/// corresponding value in the memory location is not loaded and the\n"
11552"/// corresponding field in the return value is set to zero.\n"
11553"/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n"
11554"static __inline __m128d __DEFAULT_FN_ATTRS128\n"
11555"_mm_maskload_pd(double const *__p, __m128i __m)\n"
11556"{\n"
11557" return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);\n"
11558"}\n"
11559"\n"
11560"/// Conditionally loads double-precision floating point elements from a\n"
11561"/// memory location pointed to by \\a __p into a 256-bit vector of\n"
11562"/// [4 x double], depending on the mask bits associated with each data\n"
11563"/// element.\n"
11564"///\n"
11565"/// \\headerfile <x86intrin.h>\n"
11566"///\n"
11567"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11568"///\n"
11569"/// \\param __p\n"
11570"/// A pointer to a memory location that contains the double-precision\n"
11571"/// floating point values.\n"
11572"/// \\param __m\n"
11573"/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n"
11574"/// significant bit of each quadword element represents the mask bits. If a\n"
11575"/// mask bit is zero, the corresponding value in the memory location is not\n"
11576"/// loaded and the corresponding field in the return value is set to zero.\n"
11577"/// \\returns A 256-bit vector of [4 x double] containing the loaded values.\n"
11578"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11579"_mm256_maskload_pd(double const *__p, __m256i __m)\n"
11580"{\n"
11581" return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,\n"
11582" (__v4di)__m);\n"
11583"}\n"
11584"\n"
11585"/// Conditionally loads single-precision floating point elements from a\n"
11586"/// memory location pointed to by \\a __p into a 128-bit vector of\n"
11587"/// [4 x float], depending on the mask bits associated with each data\n"
11588"/// element.\n"
11589"///\n"
11590"/// \\headerfile <x86intrin.h>\n"
11591"///\n"
11592"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11593"///\n"
11594"/// \\param __p\n"
11595"/// A pointer to a memory location that contains the single-precision\n"
11596"/// floating point values.\n"
11597"/// \\param __m\n"
11598"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11599"/// each data element represents the mask bits. If a mask bit is zero, the\n"
11600"/// corresponding value in the memory location is not loaded and the\n"
11601"/// corresponding field in the return value is set to zero.\n"
11602"/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n"
11603"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
11604"_mm_maskload_ps(float const *__p, __m128i __m)\n"
11605"{\n"
11606" return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);\n"
11607"}\n"
11608"\n"
11609"/// Conditionally loads single-precision floating point elements from a\n"
11610"/// memory location pointed to by \\a __p into a 256-bit vector of\n"
11611"/// [8 x float], depending on the mask bits associated with each data\n"
11612"/// element.\n"
11613"///\n"
11614"/// \\headerfile <x86intrin.h>\n"
11615"///\n"
11616"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11617"///\n"
11618"/// \\param __p\n"
11619"/// A pointer to a memory location that contains the single-precision\n"
11620"/// floating point values.\n"
11621"/// \\param __m\n"
11622"/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n"
11623"/// significant bit of each dword element represents the mask bits. If a mask\n"
11624"/// bit is zero, the corresponding value in the memory location is not loaded\n"
11625"/// and the corresponding field in the return value is set to zero.\n"
11626"/// \\returns A 256-bit vector of [8 x float] containing the loaded values.\n"
11627"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11628"_mm256_maskload_ps(float const *__p, __m256i __m)\n"
11629"{\n"
11630" return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);\n"
11631"}\n"
11632"\n"
11633"/* Conditional store ops */\n"
11634"/// Moves single-precision floating point values from a 256-bit vector\n"
11635"/// of [8 x float] to a memory location pointed to by \\a __p, according to\n"
11636"/// the specified mask.\n"
11637"///\n"
11638"/// \\headerfile <x86intrin.h>\n"
11639"///\n"
11640"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11641"///\n"
11642"/// \\param __p\n"
11643"/// A pointer to a memory location that will receive the float values.\n"
11644"/// \\param __m\n"
11645"/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n"
11646"/// significant bit of each dword element in the mask vector represents the\n"
11647"/// mask bits. If a mask bit is zero, the corresponding value from vector\n"
11648"/// \\a __a is not stored and the corresponding field in the memory location\n"
11649"/// pointed to by \\a __p is not changed.\n"
11650"/// \\param __a\n"
11651"/// A 256-bit vector of [8 x float] containing the values to be stored.\n"
11652"static __inline void __DEFAULT_FN_ATTRS\n"
11653"_mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)\n"
11654"{\n"
11655" __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);\n"
11656"}\n"
11657"\n"
11658"/// Moves double-precision values from a 128-bit vector of [2 x double]\n"
11659"/// to a memory location pointed to by \\a __p, according to the specified\n"
11660"/// mask.\n"
11661"///\n"
11662"/// \\headerfile <x86intrin.h>\n"
11663"///\n"
11664"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11665"///\n"
11666"/// \\param __p\n"
11667"/// A pointer to a memory location that will receive the float values.\n"
11668"/// \\param __m\n"
11669"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11670"/// each field in the mask vector represents the mask bits. If a mask bit is\n"
11671"/// zero, the corresponding value from vector \\a __a is not stored and the\n"
11672"/// corresponding field in the memory location pointed to by \\a __p is not\n"
11673"/// changed.\n"
11674"/// \\param __a\n"
11675"/// A 128-bit vector of [2 x double] containing the values to be stored.\n"
11676"static __inline void __DEFAULT_FN_ATTRS128\n"
11677"_mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)\n"
11678"{\n"
11679" __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);\n"
11680"}\n"
11681"\n"
11682"/// Moves double-precision values from a 256-bit vector of [4 x double]\n"
11683"/// to a memory location pointed to by \\a __p, according to the specified\n"
11684"/// mask.\n"
11685"///\n"
11686"/// \\headerfile <x86intrin.h>\n"
11687"///\n"
11688"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11689"///\n"
11690"/// \\param __p\n"
11691"/// A pointer to a memory location that will receive the float values.\n"
11692"/// \\param __m\n"
11693"/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n"
11694"/// significant bit of each quadword element in the mask vector represents\n"
11695"/// the mask bits. If a mask bit is zero, the corresponding value from vector\n"
11696"/// __a is not stored and the corresponding field in the memory location\n"
11697"/// pointed to by \\a __p is not changed.\n"
11698"/// \\param __a\n"
11699"/// A 256-bit vector of [4 x double] containing the values to be stored.\n"
11700"static __inline void __DEFAULT_FN_ATTRS\n"
11701"_mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)\n"
11702"{\n"
11703" __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);\n"
11704"}\n"
11705"\n"
11706"/// Moves single-precision floating point values from a 128-bit vector\n"
11707"/// of [4 x float] to a memory location pointed to by \\a __p, according to\n"
11708"/// the specified mask.\n"
11709"///\n"
11710"/// \\headerfile <x86intrin.h>\n"
11711"///\n"
11712"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11713"///\n"
11714"/// \\param __p\n"
11715"/// A pointer to a memory location that will receive the float values.\n"
11716"/// \\param __m\n"
11717"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11718"/// each field in the mask vector represents the mask bits. If a mask bit is\n"
11719"/// zero, the corresponding value from vector __a is not stored and the\n"
11720"/// corresponding field in the memory location pointed to by \\a __p is not\n"
11721"/// changed.\n"
11722"/// \\param __a\n"
11723"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
11724"static __inline void __DEFAULT_FN_ATTRS128\n"
11725"_mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)\n"
11726"{\n"
11727" __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);\n"
11728"}\n"
11729"\n"
11730"/* Cacheability support ops */\n"
11731"/// Moves integer data from a 256-bit integer vector to a 32-byte\n"
11732"/// aligned memory location. To minimize caching, the data is flagged as\n"
11733"/// non-temporal (unlikely to be used again soon).\n"
11734"///\n"
11735"/// \\headerfile <x86intrin.h>\n"
11736"///\n"
11737"/// This intrinsic corresponds to the <c> VMOVNTDQ </c> instruction.\n"
11738"///\n"
11739"/// \\param __a\n"
11740"/// A pointer to a 32-byte aligned memory location that will receive the\n"
11741"/// integer values.\n"
11742"/// \\param __b\n"
11743"/// A 256-bit integer vector containing the values to be moved.\n"
11744"static __inline void __DEFAULT_FN_ATTRS\n"
11745"_mm256_stream_si256(__m256i *__a, __m256i __b)\n"
11746"{\n"
11747" typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n"
11748" __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);\n"
11749"}\n"
11750"\n"
11751"/// Moves double-precision values from a 256-bit vector of [4 x double]\n"
11752"/// to a 32-byte aligned memory location. To minimize caching, the data is\n"
11753"/// flagged as non-temporal (unlikely to be used again soon).\n"
11754"///\n"
11755"/// \\headerfile <x86intrin.h>\n"
11756"///\n"
11757"/// This intrinsic corresponds to the <c> VMOVNTPD </c> instruction.\n"
11758"///\n"
11759"/// \\param __a\n"
11760"/// A pointer to a 32-byte aligned memory location that will receive the\n"
11761"/// double-precision floating-point values.\n"
11762"/// \\param __b\n"
11763"/// A 256-bit vector of [4 x double] containing the values to be moved.\n"
11764"static __inline void __DEFAULT_FN_ATTRS\n"
11765"_mm256_stream_pd(double *__a, __m256d __b)\n"
11766"{\n"
11767" typedef __v4df __v4df_aligned __attribute__((aligned(32)));\n"
11768" __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);\n"
11769"}\n"
11770"\n"
11771"/// Moves single-precision floating point values from a 256-bit vector\n"
11772"/// of [8 x float] to a 32-byte aligned memory location. To minimize\n"
11773"/// caching, the data is flagged as non-temporal (unlikely to be used again\n"
11774"/// soon).\n"
11775"///\n"
11776"/// \\headerfile <x86intrin.h>\n"
11777"///\n"
11778"/// This intrinsic corresponds to the <c> VMOVNTPS </c> instruction.\n"
11779"///\n"
11780"/// \\param __p\n"
11781"/// A pointer to a 32-byte aligned memory location that will receive the\n"
11782"/// single-precision floating point values.\n"
11783"/// \\param __a\n"
11784"/// A 256-bit vector of [8 x float] containing the values to be moved.\n"
11785"static __inline void __DEFAULT_FN_ATTRS\n"
11786"_mm256_stream_ps(float *__p, __m256 __a)\n"
11787"{\n"
11788" typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));\n"
11789" __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);\n"
11790"}\n"
11791"\n"
11792"/* Create vectors */\n"
11793"/// Create a 256-bit vector of [4 x double] with undefined values.\n"
11794"///\n"
11795"/// \\headerfile <x86intrin.h>\n"
11796"///\n"
11797"/// This intrinsic has no corresponding instruction.\n"
11798"///\n"
11799"/// \\returns A 256-bit vector of [4 x double] containing undefined values.\n"
11800"static __inline__ __m256d __DEFAULT_FN_ATTRS\n"
11801"_mm256_undefined_pd(void)\n"
11802"{\n"
11803" return (__m256d)__builtin_ia32_undef256();\n"
11804"}\n"
11805"\n"
11806"/// Create a 256-bit vector of [8 x float] with undefined values.\n"
11807"///\n"
11808"/// \\headerfile <x86intrin.h>\n"
11809"///\n"
11810"/// This intrinsic has no corresponding instruction.\n"
11811"///\n"
11812"/// \\returns A 256-bit vector of [8 x float] containing undefined values.\n"
11813"static __inline__ __m256 __DEFAULT_FN_ATTRS\n"
11814"_mm256_undefined_ps(void)\n"
11815"{\n"
11816" return (__m256)__builtin_ia32_undef256();\n"
11817"}\n"
11818"\n"
11819"/// Create a 256-bit integer vector with undefined values.\n"
11820"///\n"
11821"/// \\headerfile <x86intrin.h>\n"
11822"///\n"
11823"/// This intrinsic has no corresponding instruction.\n"
11824"///\n"
11825"/// \\returns A 256-bit integer vector containing undefined values.\n"
11826"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
11827"_mm256_undefined_si256(void)\n"
11828"{\n"
11829" return (__m256i)__builtin_ia32_undef256();\n"
11830"}\n"
11831"\n"
11832"/// Constructs a 256-bit floating-point vector of [4 x double]\n"
11833"/// initialized with the specified double-precision floating-point values.\n"
11834"///\n"
11835"/// \\headerfile <x86intrin.h>\n"
11836"///\n"
11837"/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n"
11838"/// instruction.\n"
11839"///\n"
11840"/// \\param __a\n"
11841"/// A double-precision floating-point value used to initialize bits [255:192]\n"
11842"/// of the result.\n"
11843"/// \\param __b\n"
11844"/// A double-precision floating-point value used to initialize bits [191:128]\n"
11845"/// of the result.\n"
11846"/// \\param __c\n"
11847"/// A double-precision floating-point value used to initialize bits [127:64]\n"
11848"/// of the result.\n"
11849"/// \\param __d\n"
11850"/// A double-precision floating-point value used to initialize bits [63:0]\n"
11851"/// of the result.\n"
11852"/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n"
11853"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11854"_mm256_set_pd(double __a, double __b, double __c, double __d)\n"
11855"{\n"
11856" return __extension__ (__m256d){ __d, __c, __b, __a };\n"
11857"}\n"
11858"\n"
11859"/// Constructs a 256-bit floating-point vector of [8 x float] initialized\n"
11860"/// with the specified single-precision floating-point values.\n"
11861"///\n"
11862"/// \\headerfile <x86intrin.h>\n"
11863"///\n"
11864"/// This intrinsic is a utility function and does not correspond to a specific\n"
11865"/// instruction.\n"
11866"///\n"
11867"/// \\param __a\n"
11868"/// A single-precision floating-point value used to initialize bits [255:224]\n"
11869"/// of the result.\n"
11870"/// \\param __b\n"
11871"/// A single-precision floating-point value used to initialize bits [223:192]\n"
11872"/// of the result.\n"
11873"/// \\param __c\n"
11874"/// A single-precision floating-point value used to initialize bits [191:160]\n"
11875"/// of the result.\n"
11876"/// \\param __d\n"
11877"/// A single-precision floating-point value used to initialize bits [159:128]\n"
11878"/// of the result.\n"
11879"/// \\param __e\n"
11880"/// A single-precision floating-point value used to initialize bits [127:96]\n"
11881"/// of the result.\n"
11882"/// \\param __f\n"
11883"/// A single-precision floating-point value used to initialize bits [95:64]\n"
11884"/// of the result.\n"
11885"/// \\param __g\n"
11886"/// A single-precision floating-point value used to initialize bits [63:32]\n"
11887"/// of the result.\n"
11888"/// \\param __h\n"
11889"/// A single-precision floating-point value used to initialize bits [31:0]\n"
11890"/// of the result.\n"
11891"/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n"
11892"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11893"_mm256_set_ps(float __a, float __b, float __c, float __d,\n"
11894" float __e, float __f, float __g, float __h)\n"
11895"{\n"
11896" return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };\n"
11897"}\n"
11898"\n"
11899"/// Constructs a 256-bit integer vector initialized with the specified\n"
11900"/// 32-bit integral values.\n"
11901"///\n"
11902"/// \\headerfile <x86intrin.h>\n"
11903"///\n"
11904"/// This intrinsic is a utility function and does not correspond to a specific\n"
11905"/// instruction.\n"
11906"///\n"
11907"/// \\param __i0\n"
11908"/// A 32-bit integral value used to initialize bits [255:224] of the result.\n"
11909"/// \\param __i1\n"
11910"/// A 32-bit integral value used to initialize bits [223:192] of the result.\n"
11911"/// \\param __i2\n"
11912"/// A 32-bit integral value used to initialize bits [191:160] of the result.\n"
11913"/// \\param __i3\n"
11914"/// A 32-bit integral value used to initialize bits [159:128] of the result.\n"
11915"/// \\param __i4\n"
11916"/// A 32-bit integral value used to initialize bits [127:96] of the result.\n"
11917"/// \\param __i5\n"
11918"/// A 32-bit integral value used to initialize bits [95:64] of the result.\n"
11919"/// \\param __i6\n"
11920"/// A 32-bit integral value used to initialize bits [63:32] of the result.\n"
11921"/// \\param __i7\n"
11922"/// A 32-bit integral value used to initialize bits [31:0] of the result.\n"
11923"/// \\returns An initialized 256-bit integer vector.\n"
11924"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11925"_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,\n"
11926" int __i4, int __i5, int __i6, int __i7)\n"
11927"{\n"
11928" return __extension__ (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };\n"
11929"}\n"
11930"\n"
11931"/// Constructs a 256-bit integer vector initialized with the specified\n"
11932"/// 16-bit integral values.\n"
11933"///\n"
11934"/// \\headerfile <x86intrin.h>\n"
11935"///\n"
11936"/// This intrinsic is a utility function and does not correspond to a specific\n"
11937"/// instruction.\n"
11938"///\n"
11939"/// \\param __w15\n"
11940"/// A 16-bit integral value used to initialize bits [255:240] of the result.\n"
11941"/// \\param __w14\n"
11942"/// A 16-bit integral value used to initialize bits [239:224] of the result.\n"
11943"/// \\param __w13\n"
11944"/// A 16-bit integral value used to initialize bits [223:208] of the result.\n"
11945"/// \\param __w12\n"
11946"/// A 16-bit integral value used to initialize bits [207:192] of the result.\n"
11947"/// \\param __w11\n"
11948"/// A 16-bit integral value used to initialize bits [191:176] of the result.\n"
11949"/// \\param __w10\n"
11950"/// A 16-bit integral value used to initialize bits [175:160] of the result.\n"
11951"/// \\param __w09\n"
11952"/// A 16-bit integral value used to initialize bits [159:144] of the result.\n"
11953"/// \\param __w08\n"
11954"/// A 16-bit integral value used to initialize bits [143:128] of the result.\n"
11955"/// \\param __w07\n"
11956"/// A 16-bit integral value used to initialize bits [127:112] of the result.\n"
11957"/// \\param __w06\n"
11958"/// A 16-bit integral value used to initialize bits [111:96] of the result.\n"
11959"/// \\param __w05\n"
11960"/// A 16-bit integral value used to initialize bits [95:80] of the result.\n"
11961"/// \\param __w04\n"
11962"/// A 16-bit integral value used to initialize bits [79:64] of the result.\n"
11963"/// \\param __w03\n"
11964"/// A 16-bit integral value used to initialize bits [63:48] of the result.\n"
11965"/// \\param __w02\n"
11966"/// A 16-bit integral value used to initialize bits [47:32] of the result.\n"
11967"/// \\param __w01\n"
11968"/// A 16-bit integral value used to initialize bits [31:16] of the result.\n"
11969"/// \\param __w00\n"
11970"/// A 16-bit integral value used to initialize bits [15:0] of the result.\n"
11971"/// \\returns An initialized 256-bit integer vector.\n"
11972"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11973"_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,\n"
11974" short __w11, short __w10, short __w09, short __w08,\n"
11975" short __w07, short __w06, short __w05, short __w04,\n"
11976" short __w03, short __w02, short __w01, short __w00)\n"
11977"{\n"
11978" return __extension__ (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,\n"
11979" __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };\n"
11980"}\n"
11981"\n"
11982"/// Constructs a 256-bit integer vector initialized with the specified\n"
11983"/// 8-bit integral values.\n"
11984"///\n"
11985"/// \\headerfile <x86intrin.h>\n"
11986"///\n"
11987"/// This intrinsic is a utility function and does not correspond to a specific\n"
11988"/// instruction.\n"
11989"///\n"
11990"/// \\param __b31\n"
11991"/// An 8-bit integral value used to initialize bits [255:248] of the result.\n"
11992"/// \\param __b30\n"
11993"/// An 8-bit integral value used to initialize bits [247:240] of the result.\n"
11994"/// \\param __b29\n"
11995"/// An 8-bit integral value used to initialize bits [239:232] of the result.\n"
11996"/// \\param __b28\n"
11997"/// An 8-bit integral value used to initialize bits [231:224] of the result.\n"
11998"/// \\param __b27\n"
11999"/// An 8-bit integral value used to initialize bits [223:216] of the result.\n"
12000"/// \\param __b26\n"
12001"/// An 8-bit integral value used to initialize bits [215:208] of the result.\n"
12002"/// \\param __b25\n"
12003"/// An 8-bit integral value used to initialize bits [207:200] of the result.\n"
12004"/// \\param __b24\n"
12005"/// An 8-bit integral value used to initialize bits [199:192] of the result.\n"
12006"/// \\param __b23\n"
12007"/// An 8-bit integral value used to initialize bits [191:184] of the result.\n"
12008"/// \\param __b22\n"
12009"/// An 8-bit integral value used to initialize bits [183:176] of the result.\n"
12010"/// \\param __b21\n"
12011"/// An 8-bit integral value used to initialize bits [175:168] of the result.\n"
12012"/// \\param __b20\n"
12013"/// An 8-bit integral value used to initialize bits [167:160] of the result.\n"
12014"/// \\param __b19\n"
12015"/// An 8-bit integral value used to initialize bits [159:152] of the result.\n"
12016"/// \\param __b18\n"
12017"/// An 8-bit integral value used to initialize bits [151:144] of the result.\n"
12018"/// \\param __b17\n"
12019"/// An 8-bit integral value used to initialize bits [143:136] of the result.\n"
12020"/// \\param __b16\n"
12021"/// An 8-bit integral value used to initialize bits [135:128] of the result.\n"
12022"/// \\param __b15\n"
12023"/// An 8-bit integral value used to initialize bits [127:120] of the result.\n"
12024"/// \\param __b14\n"
12025"/// An 8-bit integral value used to initialize bits [119:112] of the result.\n"
12026"/// \\param __b13\n"
12027"/// An 8-bit integral value used to initialize bits [111:104] of the result.\n"
12028"/// \\param __b12\n"
12029"/// An 8-bit integral value used to initialize bits [103:96] of the result.\n"
12030"/// \\param __b11\n"
12031"/// An 8-bit integral value used to initialize bits [95:88] of the result.\n"
12032"/// \\param __b10\n"
12033"/// An 8-bit integral value used to initialize bits [87:80] of the result.\n"
12034"/// \\param __b09\n"
12035"/// An 8-bit integral value used to initialize bits [79:72] of the result.\n"
12036"/// \\param __b08\n"
12037"/// An 8-bit integral value used to initialize bits [71:64] of the result.\n"
12038"/// \\param __b07\n"
12039"/// An 8-bit integral value used to initialize bits [63:56] of the result.\n"
12040"/// \\param __b06\n"
12041"/// An 8-bit integral value used to initialize bits [55:48] of the result.\n"
12042"/// \\param __b05\n"
12043"/// An 8-bit integral value used to initialize bits [47:40] of the result.\n"
12044"/// \\param __b04\n"
12045"/// An 8-bit integral value used to initialize bits [39:32] of the result.\n"
12046"/// \\param __b03\n"
12047"/// An 8-bit integral value used to initialize bits [31:24] of the result.\n"
12048"/// \\param __b02\n"
12049"/// An 8-bit integral value used to initialize bits [23:16] of the result.\n"
12050"/// \\param __b01\n"
12051"/// An 8-bit integral value used to initialize bits [15:8] of the result.\n"
12052"/// \\param __b00\n"
12053"/// An 8-bit integral value used to initialize bits [7:0] of the result.\n"
12054"/// \\returns An initialized 256-bit integer vector.\n"
12055"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12056"_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,\n"
12057" char __b27, char __b26, char __b25, char __b24,\n"
12058" char __b23, char __b22, char __b21, char __b20,\n"
12059" char __b19, char __b18, char __b17, char __b16,\n"
12060" char __b15, char __b14, char __b13, char __b12,\n"
12061" char __b11, char __b10, char __b09, char __b08,\n"
12062" char __b07, char __b06, char __b05, char __b04,\n"
12063" char __b03, char __b02, char __b01, char __b00)\n"
12064"{\n"
12065" return __extension__ (__m256i)(__v32qi){\n"
12066" __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n"
12067" __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n"
12068" __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n"
12069" __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31\n"
12070" };\n"
12071"}\n"
12072"\n"
12073"/// Constructs a 256-bit integer vector initialized with the specified\n"
12074"/// 64-bit integral values.\n"
12075"///\n"
12076"/// \\headerfile <x86intrin.h>\n"
12077"///\n"
12078"/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n"
12079"/// instruction.\n"
12080"///\n"
12081"/// \\param __a\n"
12082"/// A 64-bit integral value used to initialize bits [255:192] of the result.\n"
12083"/// \\param __b\n"
12084"/// A 64-bit integral value used to initialize bits [191:128] of the result.\n"
12085"/// \\param __c\n"
12086"/// A 64-bit integral value used to initialize bits [127:64] of the result.\n"
12087"/// \\param __d\n"
12088"/// A 64-bit integral value used to initialize bits [63:0] of the result.\n"
12089"/// \\returns An initialized 256-bit integer vector.\n"
12090"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12091"_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)\n"
12092"{\n"
12093" return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };\n"
12094"}\n"
12095"\n"
12096"/* Create vectors with elements in reverse order */\n"
12097"/// Constructs a 256-bit floating-point vector of [4 x double],\n"
12098"/// initialized in reverse order with the specified double-precision\n"
12099"/// floating-point values.\n"
12100"///\n"
12101"/// \\headerfile <x86intrin.h>\n"
12102"///\n"
12103"/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n"
12104"/// instruction.\n"
12105"///\n"
12106"/// \\param __a\n"
12107"/// A double-precision floating-point value used to initialize bits [63:0]\n"
12108"/// of the result.\n"
12109"/// \\param __b\n"
12110"/// A double-precision floating-point value used to initialize bits [127:64]\n"
12111"/// of the result.\n"
12112"/// \\param __c\n"
12113"/// A double-precision floating-point value used to initialize bits [191:128]\n"
12114"/// of the result.\n"
12115"/// \\param __d\n"
12116"/// A double-precision floating-point value used to initialize bits [255:192]\n"
12117"/// of the result.\n"
12118"/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n"
12119"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12120"_mm256_setr_pd(double __a, double __b, double __c, double __d)\n"
12121"{\n"
12122" return _mm256_set_pd(__d, __c, __b, __a);\n"
12123"}\n"
12124"\n"
12125"/// Constructs a 256-bit floating-point vector of [8 x float],\n"
12126"/// initialized in reverse order with the specified single-precision\n"
12127"/// float-point values.\n"
12128"///\n"
12129"/// \\headerfile <x86intrin.h>\n"
12130"///\n"
12131"/// This intrinsic is a utility function and does not correspond to a specific\n"
12132"/// instruction.\n"
12133"///\n"
12134"/// \\param __a\n"
12135"/// A single-precision floating-point value used to initialize bits [31:0]\n"
12136"/// of the result.\n"
12137"/// \\param __b\n"
12138"/// A single-precision floating-point value used to initialize bits [63:32]\n"
12139"/// of the result.\n"
12140"/// \\param __c\n"
12141"/// A single-precision floating-point value used to initialize bits [95:64]\n"
12142"/// of the result.\n"
12143"/// \\param __d\n"
12144"/// A single-precision floating-point value used to initialize bits [127:96]\n"
12145"/// of the result.\n"
12146"/// \\param __e\n"
12147"/// A single-precision floating-point value used to initialize bits [159:128]\n"
12148"/// of the result.\n"
12149"/// \\param __f\n"
12150"/// A single-precision floating-point value used to initialize bits [191:160]\n"
12151"/// of the result.\n"
12152"/// \\param __g\n"
12153"/// A single-precision floating-point value used to initialize bits [223:192]\n"
12154"/// of the result.\n"
12155"/// \\param __h\n"
12156"/// A single-precision floating-point value used to initialize bits [255:224]\n"
12157"/// of the result.\n"
12158"/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n"
12159"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12160"_mm256_setr_ps(float __a, float __b, float __c, float __d,\n"
12161" float __e, float __f, float __g, float __h)\n"
12162"{\n"
12163" return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a);\n"
12164"}\n"
12165"\n"
12166"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12167"/// with the specified 32-bit integral values.\n"
12168"///\n"
12169"/// \\headerfile <x86intrin.h>\n"
12170"///\n"
12171"/// This intrinsic is a utility function and does not correspond to a specific\n"
12172"/// instruction.\n"
12173"///\n"
12174"/// \\param __i0\n"
12175"/// A 32-bit integral value used to initialize bits [31:0] of the result.\n"
12176"/// \\param __i1\n"
12177"/// A 32-bit integral value used to initialize bits [63:32] of the result.\n"
12178"/// \\param __i2\n"
12179"/// A 32-bit integral value used to initialize bits [95:64] of the result.\n"
12180"/// \\param __i3\n"
12181"/// A 32-bit integral value used to initialize bits [127:96] of the result.\n"
12182"/// \\param __i4\n"
12183"/// A 32-bit integral value used to initialize bits [159:128] of the result.\n"
12184"/// \\param __i5\n"
12185"/// A 32-bit integral value used to initialize bits [191:160] of the result.\n"
12186"/// \\param __i6\n"
12187"/// A 32-bit integral value used to initialize bits [223:192] of the result.\n"
12188"/// \\param __i7\n"
12189"/// A 32-bit integral value used to initialize bits [255:224] of the result.\n"
12190"/// \\returns An initialized 256-bit integer vector.\n"
12191"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12192"_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,\n"
12193" int __i4, int __i5, int __i6, int __i7)\n"
12194"{\n"
12195" return _mm256_set_epi32(__i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0);\n"
12196"}\n"
12197"\n"
12198"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12199"/// with the specified 16-bit integral values.\n"
12200"///\n"
12201"/// \\headerfile <x86intrin.h>\n"
12202"///\n"
12203"/// This intrinsic is a utility function and does not correspond to a specific\n"
12204"/// instruction.\n"
12205"///\n"
12206"/// \\param __w15\n"
12207"/// A 16-bit integral value used to initialize bits [15:0] of the result.\n"
12208"/// \\param __w14\n"
12209"/// A 16-bit integral value used to initialize bits [31:16] of the result.\n"
12210"/// \\param __w13\n"
12211"/// A 16-bit integral value used to initialize bits [47:32] of the result.\n"
12212"/// \\param __w12\n"
12213"/// A 16-bit integral value used to initialize bits [63:48] of the result.\n"
12214"/// \\param __w11\n"
12215"/// A 16-bit integral value used to initialize bits [79:64] of the result.\n"
12216"/// \\param __w10\n"
12217"/// A 16-bit integral value used to initialize bits [95:80] of the result.\n"
12218"/// \\param __w09\n"
12219"/// A 16-bit integral value used to initialize bits [111:96] of the result.\n"
12220"/// \\param __w08\n"
12221"/// A 16-bit integral value used to initialize bits [127:112] of the result.\n"
12222"/// \\param __w07\n"
12223"/// A 16-bit integral value used to initialize bits [143:128] of the result.\n"
12224"/// \\param __w06\n"
12225"/// A 16-bit integral value used to initialize bits [159:144] of the result.\n"
12226"/// \\param __w05\n"
12227"/// A 16-bit integral value used to initialize bits [175:160] of the result.\n"
12228"/// \\param __w04\n"
12229"/// A 16-bit integral value used to initialize bits [191:176] of the result.\n"
12230"/// \\param __w03\n"
12231"/// A 16-bit integral value used to initialize bits [207:192] of the result.\n"
12232"/// \\param __w02\n"
12233"/// A 16-bit integral value used to initialize bits [223:208] of the result.\n"
12234"/// \\param __w01\n"
12235"/// A 16-bit integral value used to initialize bits [239:224] of the result.\n"
12236"/// \\param __w00\n"
12237"/// A 16-bit integral value used to initialize bits [255:240] of the result.\n"
12238"/// \\returns An initialized 256-bit integer vector.\n"
12239"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12240"_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,\n"
12241" short __w11, short __w10, short __w09, short __w08,\n"
12242" short __w07, short __w06, short __w05, short __w04,\n"
12243" short __w03, short __w02, short __w01, short __w00)\n"
12244"{\n"
12245" return _mm256_set_epi16(__w00, __w01, __w02, __w03,\n"
12246" __w04, __w05, __w06, __w07,\n"
12247" __w08, __w09, __w10, __w11,\n"
12248" __w12, __w13, __w14, __w15);\n"
12249"}\n"
12250"\n"
12251"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12252"/// with the specified 8-bit integral values.\n"
12253"///\n"
12254"/// \\headerfile <x86intrin.h>\n"
12255"///\n"
12256"/// This intrinsic is a utility function and does not correspond to a specific\n"
12257"/// instruction.\n"
12258"///\n"
12259"/// \\param __b31\n"
12260"/// An 8-bit integral value used to initialize bits [7:0] of the result.\n"
12261"/// \\param __b30\n"
12262"/// An 8-bit integral value used to initialize bits [15:8] of the result.\n"
12263"/// \\param __b29\n"
12264"/// An 8-bit integral value used to initialize bits [23:16] of the result.\n"
12265"/// \\param __b28\n"
12266"/// An 8-bit integral value used to initialize bits [31:24] of the result.\n"
12267"/// \\param __b27\n"
12268"/// An 8-bit integral value used to initialize bits [39:32] of the result.\n"
12269"/// \\param __b26\n"
12270"/// An 8-bit integral value used to initialize bits [47:40] of the result.\n"
12271"/// \\param __b25\n"
12272"/// An 8-bit integral value used to initialize bits [55:48] of the result.\n"
12273"/// \\param __b24\n"
12274"/// An 8-bit integral value used to initialize bits [63:56] of the result.\n"
12275"/// \\param __b23\n"
12276"/// An 8-bit integral value used to initialize bits [71:64] of the result.\n"
12277"/// \\param __b22\n"
12278"/// An 8-bit integral value used to initialize bits [79:72] of the result.\n"
12279"/// \\param __b21\n"
12280"/// An 8-bit integral value used to initialize bits [87:80] of the result.\n"
12281"/// \\param __b20\n"
12282"/// An 8-bit integral value used to initialize bits [95:88] of the result.\n"
12283"/// \\param __b19\n"
12284"/// An 8-bit integral value used to initialize bits [103:96] of the result.\n"
12285"/// \\param __b18\n"
12286"/// An 8-bit integral value used to initialize bits [111:104] of the result.\n"
12287"/// \\param __b17\n"
12288"/// An 8-bit integral value used to initialize bits [119:112] of the result.\n"
12289"/// \\param __b16\n"
12290"/// An 8-bit integral value used to initialize bits [127:120] of the result.\n"
12291"/// \\param __b15\n"
12292"/// An 8-bit integral value used to initialize bits [135:128] of the result.\n"
12293"/// \\param __b14\n"
12294"/// An 8-bit integral value used to initialize bits [143:136] of the result.\n"
12295"/// \\param __b13\n"
12296"/// An 8-bit integral value used to initialize bits [151:144] of the result.\n"
12297"/// \\param __b12\n"
12298"/// An 8-bit integral value used to initialize bits [159:152] of the result.\n"
12299"/// \\param __b11\n"
12300"/// An 8-bit integral value used to initialize bits [167:160] of the result.\n"
12301"/// \\param __b10\n"
12302"/// An 8-bit integral value used to initialize bits [175:168] of the result.\n"
12303"/// \\param __b09\n"
12304"/// An 8-bit integral value used to initialize bits [183:176] of the result.\n"
12305"/// \\param __b08\n"
12306"/// An 8-bit integral value used to initialize bits [191:184] of the result.\n"
12307"/// \\param __b07\n"
12308"/// An 8-bit integral value used to initialize bits [199:192] of the result.\n"
12309"/// \\param __b06\n"
12310"/// An 8-bit integral value used to initialize bits [207:200] of the result.\n"
12311"/// \\param __b05\n"
12312"/// An 8-bit integral value used to initialize bits [215:208] of the result.\n"
12313"/// \\param __b04\n"
12314"/// An 8-bit integral value used to initialize bits [223:216] of the result.\n"
12315"/// \\param __b03\n"
12316"/// An 8-bit integral value used to initialize bits [231:224] of the result.\n"
12317"/// \\param __b02\n"
12318"/// An 8-bit integral value used to initialize bits [239:232] of the result.\n"
12319"/// \\param __b01\n"
12320"/// An 8-bit integral value used to initialize bits [247:240] of the result.\n"
12321"/// \\param __b00\n"
12322"/// An 8-bit integral value used to initialize bits [255:248] of the result.\n"
12323"/// \\returns An initialized 256-bit integer vector.\n"
12324"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12325"_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,\n"
12326" char __b27, char __b26, char __b25, char __b24,\n"
12327" char __b23, char __b22, char __b21, char __b20,\n"
12328" char __b19, char __b18, char __b17, char __b16,\n"
12329" char __b15, char __b14, char __b13, char __b12,\n"
12330" char __b11, char __b10, char __b09, char __b08,\n"
12331" char __b07, char __b06, char __b05, char __b04,\n"
12332" char __b03, char __b02, char __b01, char __b00)\n"
12333"{\n"
12334" return _mm256_set_epi8(__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n"
12335" __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n"
12336" __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n"
12337" __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31);\n"
12338"}\n"
12339"\n"
12340"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12341"/// with the specified 64-bit integral values.\n"
12342"///\n"
12343"/// \\headerfile <x86intrin.h>\n"
12344"///\n"
12345"/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n"
12346"/// instruction.\n"
12347"///\n"
12348"/// \\param __a\n"
12349"/// A 64-bit integral value used to initialize bits [63:0] of the result.\n"
12350"/// \\param __b\n"
12351"/// A 64-bit integral value used to initialize bits [127:64] of the result.\n"
12352"/// \\param __c\n"
12353"/// A 64-bit integral value used to initialize bits [191:128] of the result.\n"
12354"/// \\param __d\n"
12355"/// A 64-bit integral value used to initialize bits [255:192] of the result.\n"
12356"/// \\returns An initialized 256-bit integer vector.\n"
12357"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12358"_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)\n"
12359"{\n"
12360" return _mm256_set_epi64x(__d, __c, __b, __a);\n"
12361"}\n"
12362"\n"
12363"/* Create vectors with repeated elements */\n"
12364"/// Constructs a 256-bit floating-point vector of [4 x double], with each\n"
12365"/// of the four double-precision floating-point vector elements set to the\n"
12366"/// specified double-precision floating-point value.\n"
12367"///\n"
12368"/// \\headerfile <x86intrin.h>\n"
12369"///\n"
12370"/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n"
12371"///\n"
12372"/// \\param __w\n"
12373"/// A double-precision floating-point value used to initialize each vector\n"
12374"/// element of the result.\n"
12375"/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n"
12376"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12377"_mm256_set1_pd(double __w)\n"
12378"{\n"
12379" return _mm256_set_pd(__w, __w, __w, __w);\n"
12380"}\n"
12381"\n"
12382"/// Constructs a 256-bit floating-point vector of [8 x float], with each\n"
12383"/// of the eight single-precision floating-point vector elements set to the\n"
12384"/// specified single-precision floating-point value.\n"
12385"///\n"
12386"/// \\headerfile <x86intrin.h>\n"
12387"///\n"
12388"/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n"
12389"/// instruction.\n"
12390"///\n"
12391"/// \\param __w\n"
12392"/// A single-precision floating-point value used to initialize each vector\n"
12393"/// element of the result.\n"
12394"/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n"
12395"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12396"_mm256_set1_ps(float __w)\n"
12397"{\n"
12398" return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w);\n"
12399"}\n"
12400"\n"
12401"/// Constructs a 256-bit integer vector of [8 x i32], with each of the\n"
12402"/// 32-bit integral vector elements set to the specified 32-bit integral\n"
12403"/// value.\n"
12404"///\n"
12405"/// \\headerfile <x86intrin.h>\n"
12406"///\n"
12407"/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n"
12408"/// instruction.\n"
12409"///\n"
12410"/// \\param __i\n"
12411"/// A 32-bit integral value used to initialize each vector element of the\n"
12412"/// result.\n"
12413"/// \\returns An initialized 256-bit integer vector of [8 x i32].\n"
12414"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12415"_mm256_set1_epi32(int __i)\n"
12416"{\n"
12417" return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i);\n"
12418"}\n"
12419"\n"
12420"/// Constructs a 256-bit integer vector of [16 x i16], with each of the\n"
12421"/// 16-bit integral vector elements set to the specified 16-bit integral\n"
12422"/// value.\n"
12423"///\n"
12424"/// \\headerfile <x86intrin.h>\n"
12425"///\n"
12426"/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n"
12427"///\n"
12428"/// \\param __w\n"
12429"/// A 16-bit integral value used to initialize each vector element of the\n"
12430"/// result.\n"
12431"/// \\returns An initialized 256-bit integer vector of [16 x i16].\n"
12432"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12433"_mm256_set1_epi16(short __w)\n"
12434"{\n"
12435" return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w,\n"
12436" __w, __w, __w, __w, __w, __w, __w, __w);\n"
12437"}\n"
12438"\n"
12439"/// Constructs a 256-bit integer vector of [32 x i8], with each of the\n"
12440"/// 8-bit integral vector elements set to the specified 8-bit integral value.\n"
12441"///\n"
12442"/// \\headerfile <x86intrin.h>\n"
12443"///\n"
12444"/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n"
12445"///\n"
12446"/// \\param __b\n"
12447"/// An 8-bit integral value used to initialize each vector element of the\n"
12448"/// result.\n"
12449"/// \\returns An initialized 256-bit integer vector of [32 x i8].\n"
12450"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12451"_mm256_set1_epi8(char __b)\n"
12452"{\n"
12453" return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b,\n"
12454" __b, __b, __b, __b, __b, __b, __b, __b,\n"
12455" __b, __b, __b, __b, __b, __b, __b, __b,\n"
12456" __b, __b, __b, __b, __b, __b, __b, __b);\n"
12457"}\n"
12458"\n"
12459"/// Constructs a 256-bit integer vector of [4 x i64], with each of the\n"
12460"/// 64-bit integral vector elements set to the specified 64-bit integral\n"
12461"/// value.\n"
12462"///\n"
12463"/// \\headerfile <x86intrin.h>\n"
12464"///\n"
12465"/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n"
12466"///\n"
12467"/// \\param __q\n"
12468"/// A 64-bit integral value used to initialize each vector element of the\n"
12469"/// result.\n"
12470"/// \\returns An initialized 256-bit integer vector of [4 x i64].\n"
12471"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12472"_mm256_set1_epi64x(long long __q)\n"
12473"{\n"
12474" return _mm256_set_epi64x(__q, __q, __q, __q);\n"
12475"}\n"
12476"\n"
12477"/* Create __zeroed vectors */\n"
12478"/// Constructs a 256-bit floating-point vector of [4 x double] with all\n"
12479"/// vector elements initialized to zero.\n"
12480"///\n"
12481"/// \\headerfile <x86intrin.h>\n"
12482"///\n"
12483"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
12484"///\n"
12485"/// \\returns A 256-bit vector of [4 x double] with all elements set to zero.\n"
12486"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12487"_mm256_setzero_pd(void)\n"
12488"{\n"
12489" return __extension__ (__m256d){ 0, 0, 0, 0 };\n"
12490"}\n"
12491"\n"
12492"/// Constructs a 256-bit floating-point vector of [8 x float] with all\n"
12493"/// vector elements initialized to zero.\n"
12494"///\n"
12495"/// \\headerfile <x86intrin.h>\n"
12496"///\n"
12497"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
12498"///\n"
12499"/// \\returns A 256-bit vector of [8 x float] with all elements set to zero.\n"
12500"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12501"_mm256_setzero_ps(void)\n"
12502"{\n"
12503" return __extension__ (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };\n"
12504"}\n"
12505"\n"
12506"/// Constructs a 256-bit integer vector initialized to zero.\n"
12507"///\n"
12508"/// \\headerfile <x86intrin.h>\n"
12509"///\n"
12510"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
12511"///\n"
12512"/// \\returns A 256-bit integer vector initialized to zero.\n"
12513"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12514"_mm256_setzero_si256(void)\n"
12515"{\n"
12516" return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };\n"
12517"}\n"
12518"\n"
12519"/* Cast between vector types */\n"
12520"/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n"
12521"/// floating-point vector of [8 x float].\n"
12522"///\n"
12523"/// \\headerfile <x86intrin.h>\n"
12524"///\n"
12525"/// This intrinsic has no corresponding instruction.\n"
12526"///\n"
12527"/// \\param __a\n"
12528"/// A 256-bit floating-point vector of [4 x double].\n"
12529"/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n"
12530"/// bitwise pattern as the parameter.\n"
12531"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12532"_mm256_castpd_ps(__m256d __a)\n"
12533"{\n"
12534" return (__m256)__a;\n"
12535"}\n"
12536"\n"
12537"/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n"
12538"/// integer vector.\n"
12539"///\n"
12540"/// \\headerfile <x86intrin.h>\n"
12541"///\n"
12542"/// This intrinsic has no corresponding instruction.\n"
12543"///\n"
12544"/// \\param __a\n"
12545"/// A 256-bit floating-point vector of [4 x double].\n"
12546"/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n"
12547"/// parameter.\n"
12548"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12549"_mm256_castpd_si256(__m256d __a)\n"
12550"{\n"
12551" return (__m256i)__a;\n"
12552"}\n"
12553"\n"
12554"/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n"
12555"/// floating-point vector of [4 x double].\n"
12556"///\n"
12557"/// \\headerfile <x86intrin.h>\n"
12558"///\n"
12559"/// This intrinsic has no corresponding instruction.\n"
12560"///\n"
12561"/// \\param __a\n"
12562"/// A 256-bit floating-point vector of [8 x float].\n"
12563"/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n"
12564"/// bitwise pattern as the parameter.\n"
12565"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12566"_mm256_castps_pd(__m256 __a)\n"
12567"{\n"
12568" return (__m256d)__a;\n"
12569"}\n"
12570"\n"
12571"/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n"
12572"/// integer vector.\n"
12573"///\n"
12574"/// \\headerfile <x86intrin.h>\n"
12575"///\n"
12576"/// This intrinsic has no corresponding instruction.\n"
12577"///\n"
12578"/// \\param __a\n"
12579"/// A 256-bit floating-point vector of [8 x float].\n"
12580"/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n"
12581"/// parameter.\n"
12582"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12583"_mm256_castps_si256(__m256 __a)\n"
12584"{\n"
12585" return (__m256i)__a;\n"
12586"}\n"
12587"\n"
12588"/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n"
12589"/// of [8 x float].\n"
12590"///\n"
12591"/// \\headerfile <x86intrin.h>\n"
12592"///\n"
12593"/// This intrinsic has no corresponding instruction.\n"
12594"///\n"
12595"/// \\param __a\n"
12596"/// A 256-bit integer vector.\n"
12597"/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n"
12598"/// bitwise pattern as the parameter.\n"
12599"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12600"_mm256_castsi256_ps(__m256i __a)\n"
12601"{\n"
12602" return (__m256)__a;\n"
12603"}\n"
12604"\n"
12605"/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n"
12606"/// of [4 x double].\n"
12607"///\n"
12608"/// \\headerfile <x86intrin.h>\n"
12609"///\n"
12610"/// This intrinsic has no corresponding instruction.\n"
12611"///\n"
12612"/// \\param __a\n"
12613"/// A 256-bit integer vector.\n"
12614"/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n"
12615"/// bitwise pattern as the parameter.\n"
12616"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12617"_mm256_castsi256_pd(__m256i __a)\n"
12618"{\n"
12619" return (__m256d)__a;\n"
12620"}\n"
12621"\n"
12622"/// Returns the lower 128 bits of a 256-bit floating-point vector of\n"
12623"/// [4 x double] as a 128-bit floating-point vector of [2 x double].\n"
12624"///\n"
12625"/// \\headerfile <x86intrin.h>\n"
12626"///\n"
12627"/// This intrinsic has no corresponding instruction.\n"
12628"///\n"
12629"/// \\param __a\n"
12630"/// A 256-bit floating-point vector of [4 x double].\n"
12631"/// \\returns A 128-bit floating-point vector of [2 x double] containing the\n"
12632"/// lower 128 bits of the parameter.\n"
12633"static __inline __m128d __DEFAULT_FN_ATTRS\n"
12634"_mm256_castpd256_pd128(__m256d __a)\n"
12635"{\n"
12636" return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);\n"
12637"}\n"
12638"\n"
12639"/// Returns the lower 128 bits of a 256-bit floating-point vector of\n"
12640"/// [8 x float] as a 128-bit floating-point vector of [4 x float].\n"
12641"///\n"
12642"/// \\headerfile <x86intrin.h>\n"
12643"///\n"
12644"/// This intrinsic has no corresponding instruction.\n"
12645"///\n"
12646"/// \\param __a\n"
12647"/// A 256-bit floating-point vector of [8 x float].\n"
12648"/// \\returns A 128-bit floating-point vector of [4 x float] containing the\n"
12649"/// lower 128 bits of the parameter.\n"
12650"static __inline __m128 __DEFAULT_FN_ATTRS\n"
12651"_mm256_castps256_ps128(__m256 __a)\n"
12652"{\n"
12653" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);\n"
12654"}\n"
12655"\n"
12656"/// Truncates a 256-bit integer vector into a 128-bit integer vector.\n"
12657"///\n"
12658"/// \\headerfile <x86intrin.h>\n"
12659"///\n"
12660"/// This intrinsic has no corresponding instruction.\n"
12661"///\n"
12662"/// \\param __a\n"
12663"/// A 256-bit integer vector.\n"
12664"/// \\returns A 128-bit integer vector containing the lower 128 bits of the\n"
12665"/// parameter.\n"
12666"static __inline __m128i __DEFAULT_FN_ATTRS\n"
12667"_mm256_castsi256_si128(__m256i __a)\n"
12668"{\n"
12669" return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);\n"
12670"}\n"
12671"\n"
12672"/// Constructs a 256-bit floating-point vector of [4 x double] from a\n"
12673"/// 128-bit floating-point vector of [2 x double].\n"
12674"///\n"
12675"/// The lower 128 bits contain the value of the source vector. The contents\n"
12676"/// of the upper 128 bits are undefined.\n"
12677"///\n"
12678"/// \\headerfile <x86intrin.h>\n"
12679"///\n"
12680"/// This intrinsic has no corresponding instruction.\n"
12681"///\n"
12682"/// \\param __a\n"
12683"/// A 128-bit vector of [2 x double].\n"
12684"/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n"
12685"/// contain the value of the parameter. The contents of the upper 128 bits\n"
12686"/// are undefined.\n"
12687"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12688"_mm256_castpd128_pd256(__m128d __a)\n"
12689"{\n"
12690" return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1);\n"
12691"}\n"
12692"\n"
12693"/// Constructs a 256-bit floating-point vector of [8 x float] from a\n"
12694"/// 128-bit floating-point vector of [4 x float].\n"
12695"///\n"
12696"/// The lower 128 bits contain the value of the source vector. The contents\n"
12697"/// of the upper 128 bits are undefined.\n"
12698"///\n"
12699"/// \\headerfile <x86intrin.h>\n"
12700"///\n"
12701"/// This intrinsic has no corresponding instruction.\n"
12702"///\n"
12703"/// \\param __a\n"
12704"/// A 128-bit vector of [4 x float].\n"
12705"/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n"
12706"/// contain the value of the parameter. The contents of the upper 128 bits\n"
12707"/// are undefined.\n"
12708"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12709"_mm256_castps128_ps256(__m128 __a)\n"
12710"{\n"
12711" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1);\n"
12712"}\n"
12713"\n"
12714"/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n"
12715"///\n"
12716"/// The lower 128 bits contain the value of the source vector. The contents\n"
12717"/// of the upper 128 bits are undefined.\n"
12718"///\n"
12719"/// \\headerfile <x86intrin.h>\n"
12720"///\n"
12721"/// This intrinsic has no corresponding instruction.\n"
12722"///\n"
12723"/// \\param __a\n"
12724"/// A 128-bit integer vector.\n"
12725"/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n"
12726"/// the parameter. The contents of the upper 128 bits are undefined.\n"
12727"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12728"_mm256_castsi128_si256(__m128i __a)\n"
12729"{\n"
12730" return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1);\n"
12731"}\n"
12732"\n"
12733"/// Constructs a 256-bit floating-point vector of [4 x double] from a\n"
12734"/// 128-bit floating-point vector of [2 x double]. The lower 128 bits\n"
12735"/// contain the value of the source vector. The upper 128 bits are set\n"
12736"/// to zero.\n"
12737"///\n"
12738"/// \\headerfile <x86intrin.h>\n"
12739"///\n"
12740"/// This intrinsic has no corresponding instruction.\n"
12741"///\n"
12742"/// \\param __a\n"
12743"/// A 128-bit vector of [2 x double].\n"
12744"/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n"
12745"/// contain the value of the parameter. The upper 128 bits are set to zero.\n"
12746"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12747"_mm256_zextpd128_pd256(__m128d __a)\n"
12748"{\n"
12749" return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);\n"
12750"}\n"
12751"\n"
12752"/// Constructs a 256-bit floating-point vector of [8 x float] from a\n"
12753"/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain\n"
12754"/// the value of the source vector. The upper 128 bits are set to zero.\n"
12755"///\n"
12756"/// \\headerfile <x86intrin.h>\n"
12757"///\n"
12758"/// This intrinsic has no corresponding instruction.\n"
12759"///\n"
12760"/// \\param __a\n"
12761"/// A 128-bit vector of [4 x float].\n"
12762"/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n"
12763"/// contain the value of the parameter. The upper 128 bits are set to zero.\n"
12764"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12765"_mm256_zextps128_ps256(__m128 __a)\n"
12766"{\n"
12767" return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);\n"
12768"}\n"
12769"\n"
12770"/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n"
12771"/// The lower 128 bits contain the value of the source vector. The upper\n"
12772"/// 128 bits are set to zero.\n"
12773"///\n"
12774"/// \\headerfile <x86intrin.h>\n"
12775"///\n"
12776"/// This intrinsic has no corresponding instruction.\n"
12777"///\n"
12778"/// \\param __a\n"
12779"/// A 128-bit integer vector.\n"
12780"/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n"
12781"/// the parameter. The upper 128 bits are set to zero.\n"
12782"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12783"_mm256_zextsi128_si256(__m128i __a)\n"
12784"{\n"
12785" return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);\n"
12786"}\n"
12787"\n"
12788"/*\n"
12789" Vector insert.\n"
12790" We use macros rather than inlines because we only want to accept\n"
12791" invocations where the immediate M is a constant expression.\n"
12792"*/\n"
12793"/// Constructs a new 256-bit vector of [8 x float] by first duplicating\n"
12794"/// a 256-bit vector of [8 x float] given in the first parameter, and then\n"
12795"/// replacing either the upper or the lower 128 bits with the contents of a\n"
12796"/// 128-bit vector of [4 x float] in the second parameter.\n"
12797"///\n"
12798"/// The immediate integer parameter determines between the upper or the lower\n"
12799"/// 128 bits.\n"
12800"///\n"
12801"/// \\headerfile <x86intrin.h>\n"
12802"///\n"
12803"/// \\code\n"
12804"/// __m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M);\n"
12805"/// \\endcode\n"
12806"///\n"
12807"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
12808"///\n"
12809"/// \\param V1\n"
12810"/// A 256-bit vector of [8 x float]. This vector is copied to the result\n"
12811"/// first, and then either the upper or the lower 128 bits of the result will\n"
12812"/// be replaced by the contents of \\a V2.\n"
12813"/// \\param V2\n"
12814"/// A 128-bit vector of [4 x float]. The contents of this parameter are\n"
12815"/// written to either the upper or the lower 128 bits of the result depending\n"
12816"/// on the value of parameter \\a M.\n"
12817"/// \\param M\n"
12818"/// An immediate integer. The least significant bit determines how the values\n"
12819"/// from the two parameters are interleaved: \\n\n"
12820"/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n"
12821"/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n"
12822"/// result. \\n\n"
12823"/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n"
12824"/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n"
12825"/// result.\n"
12826"/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n"
12827"#define _mm256_insertf128_ps(V1, V2, M) \\\n"
12828" (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \\\n"
12829" (__v4sf)(__m128)(V2), (int)(M))\n"
12830"\n"
12831"/// Constructs a new 256-bit vector of [4 x double] by first duplicating\n"
12832"/// a 256-bit vector of [4 x double] given in the first parameter, and then\n"
12833"/// replacing either the upper or the lower 128 bits with the contents of a\n"
12834"/// 128-bit vector of [2 x double] in the second parameter.\n"
12835"///\n"
12836"/// The immediate integer parameter determines between the upper or the lower\n"
12837"/// 128 bits.\n"
12838"///\n"
12839"/// \\headerfile <x86intrin.h>\n"
12840"///\n"
12841"/// \\code\n"
12842"/// __m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M);\n"
12843"/// \\endcode\n"
12844"///\n"
12845"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
12846"///\n"
12847"/// \\param V1\n"
12848"/// A 256-bit vector of [4 x double]. This vector is copied to the result\n"
12849"/// first, and then either the upper or the lower 128 bits of the result will\n"
12850"/// be replaced by the contents of \\a V2.\n"
12851"/// \\param V2\n"
12852"/// A 128-bit vector of [2 x double]. The contents of this parameter are\n"
12853"/// written to either the upper or the lower 128 bits of the result depending\n"
12854"/// on the value of parameter \\a M.\n"
12855"/// \\param M\n"
12856"/// An immediate integer. The least significant bit determines how the values\n"
12857"/// from the two parameters are interleaved: \\n\n"
12858"/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n"
12859"/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n"
12860"/// result. \\n\n"
12861"/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n"
12862"/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n"
12863"/// result.\n"
12864"/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n"
12865"#define _mm256_insertf128_pd(V1, V2, M) \\\n"
12866" (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \\\n"
12867" (__v2df)(__m128d)(V2), (int)(M))\n"
12868"\n"
12869"/// Constructs a new 256-bit integer vector by first duplicating a\n"
12870"/// 256-bit integer vector given in the first parameter, and then replacing\n"
12871"/// either the upper or the lower 128 bits with the contents of a 128-bit\n"
12872"/// integer vector in the second parameter.\n"
12873"///\n"
12874"/// The immediate integer parameter determines between the upper or the lower\n"
12875"/// 128 bits.\n"
12876"///\n"
12877"/// \\headerfile <x86intrin.h>\n"
12878"///\n"
12879"/// \\code\n"
12880"/// __m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M);\n"
12881"/// \\endcode\n"
12882"///\n"
12883"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
12884"///\n"
12885"/// \\param V1\n"
12886"/// A 256-bit integer vector. This vector is copied to the result first, and\n"
12887"/// then either the upper or the lower 128 bits of the result will be\n"
12888"/// replaced by the contents of \\a V2.\n"
12889"/// \\param V2\n"
12890"/// A 128-bit integer vector. The contents of this parameter are written to\n"
12891"/// either the upper or the lower 128 bits of the result depending on the\n"
12892"/// value of parameter \\a M.\n"
12893"/// \\param M\n"
12894"/// An immediate integer. The least significant bit determines how the values\n"
12895"/// from the two parameters are interleaved: \\n\n"
12896"/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n"
12897"/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n"
12898"/// result. \\n\n"
12899"/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n"
12900"/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n"
12901"/// result.\n"
12902"/// \\returns A 256-bit integer vector containing the interleaved values.\n"
12903"#define _mm256_insertf128_si256(V1, V2, M) \\\n"
12904" (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \\\n"
12905" (__v4si)(__m128i)(V2), (int)(M))\n"
12906"\n"
12907"/*\n"
12908" Vector extract.\n"
12909" We use macros rather than inlines because we only want to accept\n"
12910" invocations where the immediate M is a constant expression.\n"
12911"*/\n"
12912"/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n"
12913"/// of [8 x float], as determined by the immediate integer parameter, and\n"
12914"/// returns the extracted bits as a 128-bit vector of [4 x float].\n"
12915"///\n"
12916"/// \\headerfile <x86intrin.h>\n"
12917"///\n"
12918"/// \\code\n"
12919"/// __m128 _mm256_extractf128_ps(__m256 V, const int M);\n"
12920"/// \\endcode\n"
12921"///\n"
12922"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n"
12923"///\n"
12924"/// \\param V\n"
12925"/// A 256-bit vector of [8 x float].\n"
12926"/// \\param M\n"
12927"/// An immediate integer. The least significant bit determines which bits are\n"
12928"/// extracted from the first parameter: \\n\n"
12929"/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n"
12930"/// result. \\n\n"
12931"/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n"
12932"/// \\returns A 128-bit vector of [4 x float] containing the extracted bits.\n"
12933"#define _mm256_extractf128_ps(V, M) \\\n"
12934" (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))\n"
12935"\n"
12936"/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n"
12937"/// of [4 x double], as determined by the immediate integer parameter, and\n"
12938"/// returns the extracted bits as a 128-bit vector of [2 x double].\n"
12939"///\n"
12940"/// \\headerfile <x86intrin.h>\n"
12941"///\n"
12942"/// \\code\n"
12943"/// __m128d _mm256_extractf128_pd(__m256d V, const int M);\n"
12944"/// \\endcode\n"
12945"///\n"
12946"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n"
12947"///\n"
12948"/// \\param V\n"
12949"/// A 256-bit vector of [4 x double].\n"
12950"/// \\param M\n"
12951"/// An immediate integer. The least significant bit determines which bits are\n"
12952"/// extracted from the first parameter: \\n\n"
12953"/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n"
12954"/// result. \\n\n"
12955"/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n"
12956"/// \\returns A 128-bit vector of [2 x double] containing the extracted bits.\n"
12957"#define _mm256_extractf128_pd(V, M) \\\n"
12958" (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))\n"
12959"\n"
12960"/// Extracts either the upper or the lower 128 bits from a 256-bit\n"
12961"/// integer vector, as determined by the immediate integer parameter, and\n"
12962"/// returns the extracted bits as a 128-bit integer vector.\n"
12963"///\n"
12964"/// \\headerfile <x86intrin.h>\n"
12965"///\n"
12966"/// \\code\n"
12967"/// __m128i _mm256_extractf128_si256(__m256i V, const int M);\n"
12968"/// \\endcode\n"
12969"///\n"
12970"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n"
12971"///\n"
12972"/// \\param V\n"
12973"/// A 256-bit integer vector.\n"
12974"/// \\param M\n"
12975"/// An immediate integer. The least significant bit determines which bits are\n"
12976"/// extracted from the first parameter: \\n\n"
12977"/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n"
12978"/// result. \\n\n"
12979"/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n"
12980"/// \\returns A 128-bit integer vector containing the extracted bits.\n"
12981"#define _mm256_extractf128_si256(V, M) \\\n"
12982" (__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))\n"
12983"\n"
12984"/* SIMD load ops (unaligned) */\n"
12985"/// Loads two 128-bit floating-point vectors of [4 x float] from\n"
12986"/// unaligned memory locations and constructs a 256-bit floating-point vector\n"
12987"/// of [8 x float] by concatenating the two 128-bit vectors.\n"
12988"///\n"
12989"/// \\headerfile <x86intrin.h>\n"
12990"///\n"
12991"/// This intrinsic corresponds to load instructions followed by the\n"
12992"/// <c> VINSERTF128 </c> instruction.\n"
12993"///\n"
12994"/// \\param __addr_hi\n"
12995"/// A pointer to a 128-bit memory location containing 4 consecutive\n"
12996"/// single-precision floating-point values. These values are to be copied to\n"
12997"/// bits[255:128] of the result. The address of the memory location does not\n"
12998"/// have to be aligned.\n"
12999"/// \\param __addr_lo\n"
13000"/// A pointer to a 128-bit memory location containing 4 consecutive\n"
13001"/// single-precision floating-point values. These values are to be copied to\n"
13002"/// bits[127:0] of the result. The address of the memory location does not\n"
13003"/// have to be aligned.\n"
13004"/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n"
13005"/// concatenated result.\n"
13006"static __inline __m256 __DEFAULT_FN_ATTRS\n"
13007"_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)\n"
13008"{\n"
13009" __m256 __v256 = _mm256_castps128_ps256(_mm_loadu_ps(__addr_lo));\n"
13010" return _mm256_insertf128_ps(__v256, _mm_loadu_ps(__addr_hi), 1);\n"
13011"}\n"
13012"\n"
13013"/// Loads two 128-bit floating-point vectors of [2 x double] from\n"
13014"/// unaligned memory locations and constructs a 256-bit floating-point vector\n"
13015"/// of [4 x double] by concatenating the two 128-bit vectors.\n"
13016"///\n"
13017"/// \\headerfile <x86intrin.h>\n"
13018"///\n"
13019"/// This intrinsic corresponds to load instructions followed by the\n"
13020"/// <c> VINSERTF128 </c> instruction.\n"
13021"///\n"
13022"/// \\param __addr_hi\n"
13023"/// A pointer to a 128-bit memory location containing two consecutive\n"
13024"/// double-precision floating-point values. These values are to be copied to\n"
13025"/// bits[255:128] of the result. The address of the memory location does not\n"
13026"/// have to be aligned.\n"
13027"/// \\param __addr_lo\n"
13028"/// A pointer to a 128-bit memory location containing two consecutive\n"
13029"/// double-precision floating-point values. These values are to be copied to\n"
13030"/// bits[127:0] of the result. The address of the memory location does not\n"
13031"/// have to be aligned.\n"
13032"/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n"
13033"/// concatenated result.\n"
13034"static __inline __m256d __DEFAULT_FN_ATTRS\n"
13035"_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)\n"
13036"{\n"
13037" __m256d __v256 = _mm256_castpd128_pd256(_mm_loadu_pd(__addr_lo));\n"
13038" return _mm256_insertf128_pd(__v256, _mm_loadu_pd(__addr_hi), 1);\n"
13039"}\n"
13040"\n"
13041"/// Loads two 128-bit integer vectors from unaligned memory locations and\n"
13042"/// constructs a 256-bit integer vector by concatenating the two 128-bit\n"
13043"/// vectors.\n"
13044"///\n"
13045"/// \\headerfile <x86intrin.h>\n"
13046"///\n"
13047"/// This intrinsic corresponds to load instructions followed by the\n"
13048"/// <c> VINSERTF128 </c> instruction.\n"
13049"///\n"
13050"/// \\param __addr_hi\n"
13051"/// A pointer to a 128-bit memory location containing a 128-bit integer\n"
13052"/// vector. This vector is to be copied to bits[255:128] of the result. The\n"
13053"/// address of the memory location does not have to be aligned.\n"
13054"/// \\param __addr_lo\n"
13055"/// A pointer to a 128-bit memory location containing a 128-bit integer\n"
13056"/// vector. This vector is to be copied to bits[127:0] of the result. The\n"
13057"/// address of the memory location does not have to be aligned.\n"
13058"/// \\returns A 256-bit integer vector containing the concatenated result.\n"
13059"static __inline __m256i __DEFAULT_FN_ATTRS\n"
13060"_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)\n"
13061"{\n"
13062" __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));\n"
13063" return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);\n"
13064"}\n"
13065"\n"
13066"/* SIMD store ops (unaligned) */\n"
13067"/// Stores the upper and lower 128 bits of a 256-bit floating-point\n"
13068"/// vector of [8 x float] into two different unaligned memory locations.\n"
13069"///\n"
13070"/// \\headerfile <x86intrin.h>\n"
13071"///\n"
13072"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n"
13073"/// store instructions.\n"
13074"///\n"
13075"/// \\param __addr_hi\n"
13076"/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n"
13077"/// copied to this memory location. The address of this memory location does\n"
13078"/// not have to be aligned.\n"
13079"/// \\param __addr_lo\n"
13080"/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n"
13081"/// copied to this memory location. The address of this memory location does\n"
13082"/// not have to be aligned.\n"
13083"/// \\param __a\n"
13084"/// A 256-bit floating-point vector of [8 x float].\n"
13085"static __inline void __DEFAULT_FN_ATTRS\n"
13086"_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)\n"
13087"{\n"
13088" __m128 __v128;\n"
13089"\n"
13090" __v128 = _mm256_castps256_ps128(__a);\n"
13091" _mm_storeu_ps(__addr_lo, __v128);\n"
13092" __v128 = _mm256_extractf128_ps(__a, 1);\n"
13093" _mm_storeu_ps(__addr_hi, __v128);\n"
13094"}\n"
13095"\n"
13096"/// Stores the upper and lower 128 bits of a 256-bit floating-point\n"
13097"/// vector of [4 x double] into two different unaligned memory locations.\n"
13098"///\n"
13099"/// \\headerfile <x86intrin.h>\n"
13100"///\n"
13101"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n"
13102"/// store instructions.\n"
13103"///\n"
13104"/// \\param __addr_hi\n"
13105"/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n"
13106"/// copied to this memory location. The address of this memory location does\n"
13107"/// not have to be aligned.\n"
13108"/// \\param __addr_lo\n"
13109"/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n"
13110"/// copied to this memory location. The address of this memory location does\n"
13111"/// not have to be aligned.\n"
13112"/// \\param __a\n"
13113"/// A 256-bit floating-point vector of [4 x double].\n"
13114"static __inline void __DEFAULT_FN_ATTRS\n"
13115"_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)\n"
13116"{\n"
13117" __m128d __v128;\n"
13118"\n"
13119" __v128 = _mm256_castpd256_pd128(__a);\n"
13120" _mm_storeu_pd(__addr_lo, __v128);\n"
13121" __v128 = _mm256_extractf128_pd(__a, 1);\n"
13122" _mm_storeu_pd(__addr_hi, __v128);\n"
13123"}\n"
13124"\n"
13125"/// Stores the upper and lower 128 bits of a 256-bit integer vector into\n"
13126"/// two different unaligned memory locations.\n"
13127"///\n"
13128"/// \\headerfile <x86intrin.h>\n"
13129"///\n"
13130"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n"
13131"/// store instructions.\n"
13132"///\n"
13133"/// \\param __addr_hi\n"
13134"/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n"
13135"/// copied to this memory location. The address of this memory location does\n"
13136"/// not have to be aligned.\n"
13137"/// \\param __addr_lo\n"
13138"/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n"
13139"/// copied to this memory location. The address of this memory location does\n"
13140"/// not have to be aligned.\n"
13141"/// \\param __a\n"
13142"/// A 256-bit integer vector.\n"
13143"static __inline void __DEFAULT_FN_ATTRS\n"
13144"_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)\n"
13145"{\n"
13146" __m128i __v128;\n"
13147"\n"
13148" __v128 = _mm256_castsi256_si128(__a);\n"
13149" _mm_storeu_si128(__addr_lo, __v128);\n"
13150" __v128 = _mm256_extractf128_si256(__a, 1);\n"
13151" _mm_storeu_si128(__addr_hi, __v128);\n"
13152"}\n"
13153"\n"
13154"/// Constructs a 256-bit floating-point vector of [8 x float] by\n"
13155"/// concatenating two 128-bit floating-point vectors of [4 x float].\n"
13156"///\n"
13157"/// \\headerfile <x86intrin.h>\n"
13158"///\n"
13159"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13160"///\n"
13161"/// \\param __hi\n"
13162"/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n"
13163"/// 128 bits of the result.\n"
13164"/// \\param __lo\n"
13165"/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n"
13166"/// 128 bits of the result.\n"
13167"/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n"
13168"/// concatenated result.\n"
13169"static __inline __m256 __DEFAULT_FN_ATTRS\n"
13170"_mm256_set_m128 (__m128 __hi, __m128 __lo)\n"
13171"{\n"
13172" return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);\n"
13173"}\n"
13174"\n"
13175"/// Constructs a 256-bit floating-point vector of [4 x double] by\n"
13176"/// concatenating two 128-bit floating-point vectors of [2 x double].\n"
13177"///\n"
13178"/// \\headerfile <x86intrin.h>\n"
13179"///\n"
13180"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13181"///\n"
13182"/// \\param __hi\n"
13183"/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n"
13184"/// 128 bits of the result.\n"
13185"/// \\param __lo\n"
13186"/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n"
13187"/// 128 bits of the result.\n"
13188"/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n"
13189"/// concatenated result.\n"
13190"static __inline __m256d __DEFAULT_FN_ATTRS\n"
13191"_mm256_set_m128d (__m128d __hi, __m128d __lo)\n"
13192"{\n"
13193" return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);\n"
13194"}\n"
13195"\n"
13196"/// Constructs a 256-bit integer vector by concatenating two 128-bit\n"
13197"/// integer vectors.\n"
13198"///\n"
13199"/// \\headerfile <x86intrin.h>\n"
13200"///\n"
13201"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13202"///\n"
13203"/// \\param __hi\n"
13204"/// A 128-bit integer vector to be copied to the upper 128 bits of the\n"
13205"/// result.\n"
13206"/// \\param __lo\n"
13207"/// A 128-bit integer vector to be copied to the lower 128 bits of the\n"
13208"/// result.\n"
13209"/// \\returns A 256-bit integer vector containing the concatenated result.\n"
13210"static __inline __m256i __DEFAULT_FN_ATTRS\n"
13211"_mm256_set_m128i (__m128i __hi, __m128i __lo)\n"
13212"{\n"
13213" return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);\n"
13214"}\n"
13215"\n"
13216"/// Constructs a 256-bit floating-point vector of [8 x float] by\n"
13217"/// concatenating two 128-bit floating-point vectors of [4 x float]. This is\n"
13218"/// similar to _mm256_set_m128, but the order of the input parameters is\n"
13219"/// swapped.\n"
13220"///\n"
13221"/// \\headerfile <x86intrin.h>\n"
13222"///\n"
13223"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13224"///\n"
13225"/// \\param __lo\n"
13226"/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n"
13227"/// 128 bits of the result.\n"
13228"/// \\param __hi\n"
13229"/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n"
13230"/// 128 bits of the result.\n"
13231"/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n"
13232"/// concatenated result.\n"
13233"static __inline __m256 __DEFAULT_FN_ATTRS\n"
13234"_mm256_setr_m128 (__m128 __lo, __m128 __hi)\n"
13235"{\n"
13236" return _mm256_set_m128(__hi, __lo);\n"
13237"}\n"
13238"\n"
13239"/// Constructs a 256-bit floating-point vector of [4 x double] by\n"
13240"/// concatenating two 128-bit floating-point vectors of [2 x double]. This is\n"
13241"/// similar to _mm256_set_m128d, but the order of the input parameters is\n"
13242"/// swapped.\n"
13243"///\n"
13244"/// \\headerfile <x86intrin.h>\n"
13245"///\n"
13246"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13247"///\n"
13248"/// \\param __lo\n"
13249"/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n"
13250"/// 128 bits of the result.\n"
13251"/// \\param __hi\n"
13252"/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n"
13253"/// 128 bits of the result.\n"
13254"/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n"
13255"/// concatenated result.\n"
13256"static __inline __m256d __DEFAULT_FN_ATTRS\n"
13257"_mm256_setr_m128d (__m128d __lo, __m128d __hi)\n"
13258"{\n"
13259" return (__m256d)_mm256_set_m128d(__hi, __lo);\n"
13260"}\n"
13261"\n"
13262"/// Constructs a 256-bit integer vector by concatenating two 128-bit\n"
13263"/// integer vectors. This is similar to _mm256_set_m128i, but the order of\n"
13264"/// the input parameters is swapped.\n"
13265"///\n"
13266"/// \\headerfile <x86intrin.h>\n"
13267"///\n"
13268"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13269"///\n"
13270"/// \\param __lo\n"
13271"/// A 128-bit integer vector to be copied to the lower 128 bits of the\n"
13272"/// result.\n"
13273"/// \\param __hi\n"
13274"/// A 128-bit integer vector to be copied to the upper 128 bits of the\n"
13275"/// result.\n"
13276"/// \\returns A 256-bit integer vector containing the concatenated result.\n"
13277"static __inline __m256i __DEFAULT_FN_ATTRS\n"
13278"_mm256_setr_m128i (__m128i __lo, __m128i __hi)\n"
13279"{\n"
13280" return (__m256i)_mm256_set_m128i(__hi, __lo);\n"
13281"}\n"
13282"\n"
13283"#undef __DEFAULT_FN_ATTRS\n"
13284"#undef __DEFAULT_FN_ATTRS128\n"
13285"\n"
13286"#endif /* __AVXINTRIN_H */\n"
13287"" } ,
13288 { "/builtins/bmi2intrin.h" , "/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===\n"
13289" *\n"
13290" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13291" * of this software and associated documentation files (the \"Software\"), to deal\n"
13292" * in the Software without restriction, including without limitation the rights\n"
13293" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13294" * copies of the Software, and to permit persons to whom the Software is\n"
13295" * furnished to do so, subject to the following conditions:\n"
13296" *\n"
13297" * The above copyright notice and this permission notice shall be included in\n"
13298" * all copies or substantial portions of the Software.\n"
13299" *\n"
13300" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13301" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13302" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13303" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13304" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13305" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13306" * THE SOFTWARE.\n"
13307" *\n"
13308" *===-----------------------------------------------------------------------===\n"
13309" */\n"
13310"\n"
13311"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
13312"#error \"Never use <bmi2intrin.h> directly; include <x86intrin.h> instead.\"\n"
13313"#endif\n"
13314"\n"
13315"#ifndef __BMI2INTRIN_H\n"
13316"#define __BMI2INTRIN_H\n"
13317"\n"
13318"/* Define the default attributes for the functions in this file. */\n"
13319"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi2\")))\n"
13320"\n"
13321"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13322"_bzhi_u32(unsigned int __X, unsigned int __Y)\n"
13323"{\n"
13324" return __builtin_ia32_bzhi_si(__X, __Y);\n"
13325"}\n"
13326"\n"
13327"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13328"_pdep_u32(unsigned int __X, unsigned int __Y)\n"
13329"{\n"
13330" return __builtin_ia32_pdep_si(__X, __Y);\n"
13331"}\n"
13332"\n"
13333"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13334"_pext_u32(unsigned int __X, unsigned int __Y)\n"
13335"{\n"
13336" return __builtin_ia32_pext_si(__X, __Y);\n"
13337"}\n"
13338"\n"
13339"#ifdef __x86_64__\n"
13340"\n"
13341"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13342"_bzhi_u64(unsigned long long __X, unsigned long long __Y)\n"
13343"{\n"
13344" return __builtin_ia32_bzhi_di(__X, __Y);\n"
13345"}\n"
13346"\n"
13347"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13348"_pdep_u64(unsigned long long __X, unsigned long long __Y)\n"
13349"{\n"
13350" return __builtin_ia32_pdep_di(__X, __Y);\n"
13351"}\n"
13352"\n"
13353"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13354"_pext_u64(unsigned long long __X, unsigned long long __Y)\n"
13355"{\n"
13356" return __builtin_ia32_pext_di(__X, __Y);\n"
13357"}\n"
13358"\n"
13359"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13360"_mulx_u64 (unsigned long long __X, unsigned long long __Y,\n"
13361" unsigned long long *__P)\n"
13362"{\n"
13363" unsigned __int128 __res = (unsigned __int128) __X * __Y;\n"
13364" *__P = (unsigned long long) (__res >> 64);\n"
13365" return (unsigned long long) __res;\n"
13366"}\n"
13367"\n"
13368"#else /* !__x86_64__ */\n"
13369"\n"
13370"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13371"_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)\n"
13372"{\n"
13373" unsigned long long __res = (unsigned long long) __X * __Y;\n"
13374" *__P = (unsigned int) (__res >> 32);\n"
13375" return (unsigned int) __res;\n"
13376"}\n"
13377"\n"
13378"#endif /* !__x86_64__ */\n"
13379"\n"
13380"#undef __DEFAULT_FN_ATTRS\n"
13381"\n"
13382"#endif /* __BMI2INTRIN_H */\n"
13383"" } ,
13384 { "/builtins/bmiintrin.h" , "/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===\n"
13385" *\n"
13386" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13387" * of this software and associated documentation files (the \"Software\"), to deal\n"
13388" * in the Software without restriction, including without limitation the rights\n"
13389" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13390" * copies of the Software, and to permit persons to whom the Software is\n"
13391" * furnished to do so, subject to the following conditions:\n"
13392" *\n"
13393" * The above copyright notice and this permission notice shall be included in\n"
13394" * all copies or substantial portions of the Software.\n"
13395" *\n"
13396" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13397" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13398" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13399" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13400" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13401" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13402" * THE SOFTWARE.\n"
13403" *\n"
13404" *===-----------------------------------------------------------------------===\n"
13405" */\n"
13406"\n"
13407"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
13408"#error \"Never use <bmiintrin.h> directly; include <x86intrin.h> instead.\"\n"
13409"#endif\n"
13410"\n"
13411"#ifndef __BMIINTRIN_H\n"
13412"#define __BMIINTRIN_H\n"
13413"\n"
13414"#define _tzcnt_u16(a) (__tzcnt_u16((a)))\n"
13415"\n"
13416"#define _andn_u32(a, b) (__andn_u32((a), (b)))\n"
13417"\n"
13418"/* _bextr_u32 != __bextr_u32 */\n"
13419"#define _blsi_u32(a) (__blsi_u32((a)))\n"
13420"\n"
13421"#define _blsmsk_u32(a) (__blsmsk_u32((a)))\n"
13422"\n"
13423"#define _blsr_u32(a) (__blsr_u32((a)))\n"
13424"\n"
13425"#define _tzcnt_u32(a) (__tzcnt_u32((a)))\n"
13426"\n"
13427"/* Define the default attributes for the functions in this file. */\n"
13428"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi\")))\n"
13429"\n"
13430"/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT\n"
13431" instruction behaves as BSF on non-BMI targets, there is code that expects\n"
13432" to use it as a potentially faster version of BSF. */\n"
13433"#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
13434"\n"
13435"/// Counts the number of trailing zero bits in the operand.\n"
13436"///\n"
13437"/// \\headerfile <x86intrin.h>\n"
13438"///\n"
13439"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13440"///\n"
13441"/// \\param __X\n"
13442"/// An unsigned 16-bit integer whose trailing zeros are to be counted.\n"
13443"/// \\returns An unsigned 16-bit integer containing the number of trailing zero\n"
13444"/// bits in the operand.\n"
13445"static __inline__ unsigned short __RELAXED_FN_ATTRS\n"
13446"__tzcnt_u16(unsigned short __X)\n"
13447"{\n"
13448" return __builtin_ia32_tzcnt_u16(__X);\n"
13449"}\n"
13450"\n"
13451"/// Performs a bitwise AND of the second operand with the one's\n"
13452"/// complement of the first operand.\n"
13453"///\n"
13454"/// \\headerfile <x86intrin.h>\n"
13455"///\n"
13456"/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n"
13457"///\n"
13458"/// \\param __X\n"
13459"/// An unsigned integer containing one of the operands.\n"
13460"/// \\param __Y\n"
13461"/// An unsigned integer containing one of the operands.\n"
13462"/// \\returns An unsigned integer containing the bitwise AND of the second\n"
13463"/// operand with the one's complement of the first operand.\n"
13464"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13465"__andn_u32(unsigned int __X, unsigned int __Y)\n"
13466"{\n"
13467" return ~__X & __Y;\n"
13468"}\n"
13469"\n"
13470"/* AMD-specified, double-leading-underscore version of BEXTR */\n"
13471"/// Extracts the specified bits from the first operand and returns them\n"
13472"/// in the least significant bits of the result.\n"
13473"///\n"
13474"/// \\headerfile <x86intrin.h>\n"
13475"///\n"
13476"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13477"///\n"
13478"/// \\param __X\n"
13479"/// An unsigned integer whose bits are to be extracted.\n"
13480"/// \\param __Y\n"
13481"/// An unsigned integer used to specify which bits are extracted. Bits [7:0]\n"
13482"/// specify the index of the least significant bit. Bits [15:8] specify the\n"
13483"/// number of bits to be extracted.\n"
13484"/// \\returns An unsigned integer whose least significant bits contain the\n"
13485"/// extracted bits.\n"
13486"/// \\see _bextr_u32\n"
13487"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13488"__bextr_u32(unsigned int __X, unsigned int __Y)\n"
13489"{\n"
13490" return __builtin_ia32_bextr_u32(__X, __Y);\n"
13491"}\n"
13492"\n"
13493"/* Intel-specified, single-leading-underscore version of BEXTR */\n"
13494"/// Extracts the specified bits from the first operand and returns them\n"
13495"/// in the least significant bits of the result.\n"
13496"///\n"
13497"/// \\headerfile <x86intrin.h>\n"
13498"///\n"
13499"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13500"///\n"
13501"/// \\param __X\n"
13502"/// An unsigned integer whose bits are to be extracted.\n"
13503"/// \\param __Y\n"
13504"/// An unsigned integer used to specify the index of the least significant\n"
13505"/// bit for the bits to be extracted. Bits [7:0] specify the index.\n"
13506"/// \\param __Z\n"
13507"/// An unsigned integer used to specify the number of bits to be extracted.\n"
13508"/// Bits [7:0] specify the number of bits.\n"
13509"/// \\returns An unsigned integer whose least significant bits contain the\n"
13510"/// extracted bits.\n"
13511"/// \\see __bextr_u32\n"
13512"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13513"_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)\n"
13514"{\n"
13515" return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n"
13516"}\n"
13517"\n"
13518"/// Clears all bits in the source except for the least significant bit\n"
13519"/// containing a value of 1 and returns the result.\n"
13520"///\n"
13521"/// \\headerfile <x86intrin.h>\n"
13522"///\n"
13523"/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n"
13524"///\n"
13525"/// \\param __X\n"
13526"/// An unsigned integer whose bits are to be cleared.\n"
13527"/// \\returns An unsigned integer containing the result of clearing the bits from\n"
13528"/// the source operand.\n"
13529"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13530"__blsi_u32(unsigned int __X)\n"
13531"{\n"
13532" return __X & -__X;\n"
13533"}\n"
13534"\n"
13535"/// Creates a mask whose bits are set to 1, using bit 0 up to and\n"
13536"/// including the least significant bit that is set to 1 in the source\n"
13537"/// operand and returns the result.\n"
13538"///\n"
13539"/// \\headerfile <x86intrin.h>\n"
13540"///\n"
13541"/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n"
13542"///\n"
13543"/// \\param __X\n"
13544"/// An unsigned integer used to create the mask.\n"
13545"/// \\returns An unsigned integer containing the newly created mask.\n"
13546"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13547"__blsmsk_u32(unsigned int __X)\n"
13548"{\n"
13549" return __X ^ (__X - 1);\n"
13550"}\n"
13551"\n"
13552"/// Clears the least significant bit that is set to 1 in the source\n"
13553"/// operand and returns the result.\n"
13554"///\n"
13555"/// \\headerfile <x86intrin.h>\n"
13556"///\n"
13557"/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n"
13558"///\n"
13559"/// \\param __X\n"
13560"/// An unsigned integer containing the operand to be cleared.\n"
13561"/// \\returns An unsigned integer containing the result of clearing the source\n"
13562"/// operand.\n"
13563"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13564"__blsr_u32(unsigned int __X)\n"
13565"{\n"
13566" return __X & (__X - 1);\n"
13567"}\n"
13568"\n"
13569"/// Counts the number of trailing zero bits in the operand.\n"
13570"///\n"
13571"/// \\headerfile <x86intrin.h>\n"
13572"///\n"
13573"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13574"///\n"
13575"/// \\param __X\n"
13576"/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n"
13577"/// \\returns An unsigned 32-bit integer containing the number of trailing zero\n"
13578"/// bits in the operand.\n"
13579"static __inline__ unsigned int __RELAXED_FN_ATTRS\n"
13580"__tzcnt_u32(unsigned int __X)\n"
13581"{\n"
13582" return __builtin_ia32_tzcnt_u32(__X);\n"
13583"}\n"
13584"\n"
13585"/// Counts the number of trailing zero bits in the operand.\n"
13586"///\n"
13587"/// \\headerfile <x86intrin.h>\n"
13588"///\n"
13589"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13590"///\n"
13591"/// \\param __X\n"
13592"/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n"
13593"/// \\returns An 32-bit integer containing the number of trailing zero bits in\n"
13594"/// the operand.\n"
13595"static __inline__ int __RELAXED_FN_ATTRS\n"
13596"_mm_tzcnt_32(unsigned int __X)\n"
13597"{\n"
13598" return __builtin_ia32_tzcnt_u32(__X);\n"
13599"}\n"
13600"\n"
13601"#ifdef __x86_64__\n"
13602"\n"
13603"#define _andn_u64(a, b) (__andn_u64((a), (b)))\n"
13604"\n"
13605"/* _bextr_u64 != __bextr_u64 */\n"
13606"#define _blsi_u64(a) (__blsi_u64((a)))\n"
13607"\n"
13608"#define _blsmsk_u64(a) (__blsmsk_u64((a)))\n"
13609"\n"
13610"#define _blsr_u64(a) (__blsr_u64((a)))\n"
13611"\n"
13612"#define _tzcnt_u64(a) (__tzcnt_u64((a)))\n"
13613"\n"
13614"/// Performs a bitwise AND of the second operand with the one's\n"
13615"/// complement of the first operand.\n"
13616"///\n"
13617"/// \\headerfile <x86intrin.h>\n"
13618"///\n"
13619"/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n"
13620"///\n"
13621"/// \\param __X\n"
13622"/// An unsigned 64-bit integer containing one of the operands.\n"
13623"/// \\param __Y\n"
13624"/// An unsigned 64-bit integer containing one of the operands.\n"
13625"/// \\returns An unsigned 64-bit integer containing the bitwise AND of the second\n"
13626"/// operand with the one's complement of the first operand.\n"
13627"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13628"__andn_u64 (unsigned long long __X, unsigned long long __Y)\n"
13629"{\n"
13630" return ~__X & __Y;\n"
13631"}\n"
13632"\n"
13633"/* AMD-specified, double-leading-underscore version of BEXTR */\n"
13634"/// Extracts the specified bits from the first operand and returns them\n"
13635"/// in the least significant bits of the result.\n"
13636"///\n"
13637"/// \\headerfile <x86intrin.h>\n"
13638"///\n"
13639"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13640"///\n"
13641"/// \\param __X\n"
13642"/// An unsigned 64-bit integer whose bits are to be extracted.\n"
13643"/// \\param __Y\n"
13644"/// An unsigned 64-bit integer used to specify which bits are extracted. Bits\n"
13645"/// [7:0] specify the index of the least significant bit. Bits [15:8] specify\n"
13646"/// the number of bits to be extracted.\n"
13647"/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n"
13648"/// extracted bits.\n"
13649"/// \\see _bextr_u64\n"
13650"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13651"__bextr_u64(unsigned long long __X, unsigned long long __Y)\n"
13652"{\n"
13653" return __builtin_ia32_bextr_u64(__X, __Y);\n"
13654"}\n"
13655"\n"
13656"/* Intel-specified, single-leading-underscore version of BEXTR */\n"
13657"/// Extracts the specified bits from the first operand and returns them\n"
13658"/// in the least significant bits of the result.\n"
13659"///\n"
13660"/// \\headerfile <x86intrin.h>\n"
13661"///\n"
13662"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13663"///\n"
13664"/// \\param __X\n"
13665"/// An unsigned 64-bit integer whose bits are to be extracted.\n"
13666"/// \\param __Y\n"
13667"/// An unsigned integer used to specify the index of the least significant\n"
13668"/// bit for the bits to be extracted. Bits [7:0] specify the index.\n"
13669"/// \\param __Z\n"
13670"/// An unsigned integer used to specify the number of bits to be extracted.\n"
13671"/// Bits [7:0] specify the number of bits.\n"
13672"/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n"
13673"/// extracted bits.\n"
13674"/// \\see __bextr_u64\n"
13675"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13676"_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)\n"
13677"{\n"
13678" return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n"
13679"}\n"
13680"\n"
13681"/// Clears all bits in the source except for the least significant bit\n"
13682"/// containing a value of 1 and returns the result.\n"
13683"///\n"
13684"/// \\headerfile <x86intrin.h>\n"
13685"///\n"
13686"/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n"
13687"///\n"
13688"/// \\param __X\n"
13689"/// An unsigned 64-bit integer whose bits are to be cleared.\n"
13690"/// \\returns An unsigned 64-bit integer containing the result of clearing the\n"
13691"/// bits from the source operand.\n"
13692"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13693"__blsi_u64(unsigned long long __X)\n"
13694"{\n"
13695" return __X & -__X;\n"
13696"}\n"
13697"\n"
13698"/// Creates a mask whose bits are set to 1, using bit 0 up to and\n"
13699"/// including the least significant bit that is set to 1 in the source\n"
13700"/// operand and returns the result.\n"
13701"///\n"
13702"/// \\headerfile <x86intrin.h>\n"
13703"///\n"
13704"/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n"
13705"///\n"
13706"/// \\param __X\n"
13707"/// An unsigned 64-bit integer used to create the mask.\n"
13708"/// \\returns An unsigned 64-bit integer containing the newly created mask.\n"
13709"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13710"__blsmsk_u64(unsigned long long __X)\n"
13711"{\n"
13712" return __X ^ (__X - 1);\n"
13713"}\n"
13714"\n"
13715"/// Clears the least significant bit that is set to 1 in the source\n"
13716"/// operand and returns the result.\n"
13717"///\n"
13718"/// \\headerfile <x86intrin.h>\n"
13719"///\n"
13720"/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n"
13721"///\n"
13722"/// \\param __X\n"
13723"/// An unsigned 64-bit integer containing the operand to be cleared.\n"
13724"/// \\returns An unsigned 64-bit integer containing the result of clearing the\n"
13725"/// source operand.\n"
13726"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13727"__blsr_u64(unsigned long long __X)\n"
13728"{\n"
13729" return __X & (__X - 1);\n"
13730"}\n"
13731"\n"
13732"/// Counts the number of trailing zero bits in the operand.\n"
13733"///\n"
13734"/// \\headerfile <x86intrin.h>\n"
13735"///\n"
13736"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13737"///\n"
13738"/// \\param __X\n"
13739"/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n"
13740"/// \\returns An unsigned 64-bit integer containing the number of trailing zero\n"
13741"/// bits in the operand.\n"
13742"static __inline__ unsigned long long __RELAXED_FN_ATTRS\n"
13743"__tzcnt_u64(unsigned long long __X)\n"
13744"{\n"
13745" return __builtin_ia32_tzcnt_u64(__X);\n"
13746"}\n"
13747"\n"
13748"/// Counts the number of trailing zero bits in the operand.\n"
13749"///\n"
13750"/// \\headerfile <x86intrin.h>\n"
13751"///\n"
13752"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13753"///\n"
13754"/// \\param __X\n"
13755"/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n"
13756"/// \\returns An 64-bit integer containing the number of trailing zero bits in\n"
13757"/// the operand.\n"
13758"static __inline__ long long __RELAXED_FN_ATTRS\n"
13759"_mm_tzcnt_64(unsigned long long __X)\n"
13760"{\n"
13761" return __builtin_ia32_tzcnt_u64(__X);\n"
13762"}\n"
13763"\n"
13764"#endif /* __x86_64__ */\n"
13765"\n"
13766"#undef __DEFAULT_FN_ATTRS\n"
13767"#undef __RELAXED_FN_ATTRS\n"
13768"\n"
13769"#endif /* __BMIINTRIN_H */\n"
13770"" } ,
13771 { "/builtins/cetintrin.h" , "/*===---- cetintrin.h - CET intrinsic --------------------------------------===\n"
13772" *\n"
13773" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13774" * of this software and associated documentation files (the \"Software\"), to deal\n"
13775" * in the Software without restriction, including without limitation the rights\n"
13776" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13777" * copies of the Software, and to permit persons to whom the Software is\n"
13778" * furnished to do so, subject to the following conditions:\n"
13779" *\n"
13780" * The above copyright notice and this permission notice shall be included in\n"
13781" * all copies or substantial portions of the Software.\n"
13782" *\n"
13783" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13784" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13785" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13786" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13787" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13788" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13789" * THE SOFTWARE.\n"
13790" *\n"
13791" *===-----------------------------------------------------------------------===\n"
13792" */\n"
13793"\n"
13794"#ifndef __IMMINTRIN_H\n"
13795"#error \"Never use <cetintrin.h> directly; include <immintrin.h> instead.\"\n"
13796"#endif\n"
13797"\n"
13798"#ifndef __CETINTRIN_H\n"
13799"#define __CETINTRIN_H\n"
13800"\n"
13801"/* Define the default attributes for the functions in this file. */\n"
13802"#define __DEFAULT_FN_ATTRS \\\n"
13803" __attribute__((__always_inline__, __nodebug__, __target__(\"shstk\")))\n"
13804"\n"
13805"static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {\n"
13806" __builtin_ia32_incsspd(__a);\n"
13807"}\n"
13808"\n"
13809"#ifdef __x86_64__\n"
13810"static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {\n"
13811" __builtin_ia32_incsspq(__a);\n"
13812"}\n"
13813"#endif /* __x86_64__ */\n"
13814"\n"
13815"#ifdef __x86_64__\n"
13816"static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n"
13817" __builtin_ia32_incsspq(__a);\n"
13818"}\n"
13819"#else /* __x86_64__ */\n"
13820"static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n"
13821" __builtin_ia32_incsspd((int)__a);\n"
13822"}\n"
13823"#endif /* __x86_64__ */\n"
13824"\n"
13825"static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {\n"
13826" return __builtin_ia32_rdsspd(__a);\n"
13827"}\n"
13828"\n"
13829"#ifdef __x86_64__\n"
13830"static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {\n"
13831" return __builtin_ia32_rdsspq(__a);\n"
13832"}\n"
13833"#endif /* __x86_64__ */\n"
13834"\n"
13835"#ifdef __x86_64__\n"
13836"static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) {\n"
13837" return __builtin_ia32_rdsspq(0);\n"
13838"}\n"
13839"#else /* __x86_64__ */\n"
13840"static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) {\n"
13841" return __builtin_ia32_rdsspd(0);\n"
13842"}\n"
13843"#endif /* __x86_64__ */\n"
13844"\n"
13845"static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {\n"
13846" __builtin_ia32_saveprevssp();\n"
13847"}\n"
13848"\n"
13849"static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {\n"
13850" __builtin_ia32_rstorssp(__p);\n"
13851"}\n"
13852"\n"
13853"static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {\n"
13854" __builtin_ia32_wrssd(__a, __p);\n"
13855"}\n"
13856"\n"
13857"#ifdef __x86_64__\n"
13858"static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {\n"
13859" __builtin_ia32_wrssq(__a, __p);\n"
13860"}\n"
13861"#endif /* __x86_64__ */\n"
13862"\n"
13863"static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {\n"
13864" __builtin_ia32_wrussd(__a, __p);\n"
13865"}\n"
13866"\n"
13867"#ifdef __x86_64__\n"
13868"static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {\n"
13869" __builtin_ia32_wrussq(__a, __p);\n"
13870"}\n"
13871"#endif /* __x86_64__ */\n"
13872"\n"
13873"static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {\n"
13874" __builtin_ia32_setssbsy();\n"
13875"}\n"
13876"\n"
13877"static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {\n"
13878" __builtin_ia32_clrssbsy(__p);\n"
13879"}\n"
13880"\n"
13881"#undef __DEFAULT_FN_ATTRS\n"
13882"\n"
13883"#endif /* __CETINTRIN_H */\n"
13884"" } ,
13885 { "/builtins/cldemoteintrin.h" , "/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------===\n"
13886" *\n"
13887" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13888" * of this software and associated documentation files (the \"Software\"), to deal\n"
13889" * in the Software without restriction, including without limitation the rights\n"
13890" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13891" * copies of the Software, and to permit persons to whom the Software is\n"
13892" * furnished to do so, subject to the following conditions:\n"
13893" *\n"
13894" * The above copyright notice and this permission notice shall be included in\n"
13895" * all copies or substantial portions of the Software.\n"
13896" *\n"
13897" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13898" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13899" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13900" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13901" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13902" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13903" * THE SOFTWARE.\n"
13904" *\n"
13905" *===-----------------------------------------------------------------------===\n"
13906" */\n"
13907"\n"
13908"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
13909"#error \"Never use <cldemoteintrin.h> directly; include <x86intrin.h> instead.\"\n"
13910"#endif\n"
13911"\n"
13912"#ifndef __CLDEMOTEINTRIN_H\n"
13913"#define __CLDEMOTEINTRIN_H\n"
13914"\n"
13915"/* Define the default attributes for the functions in this file. */\n"
13916"#define __DEFAULT_FN_ATTRS \\\n"
13917" __attribute__((__always_inline__, __nodebug__, __target__(\"cldemote\")))\n"
13918"\n"
13919"static __inline__ void __DEFAULT_FN_ATTRS\n"
13920"_cldemote(const void * __P) {\n"
13921" __builtin_ia32_cldemote(__P);\n"
13922"}\n"
13923"\n"
13924"#undef __DEFAULT_FN_ATTRS\n"
13925"\n"
13926"#endif\n"
13927"" } ,
13928 { "/builtins/clflushoptintrin.h" , "/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------===\n"
13929" *\n"
13930" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13931" * of this software and associated documentation files (the \"Software\"), to deal\n"
13932" * in the Software without restriction, including without limitation the rights\n"
13933" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13934" * copies of the Software, and to permit persons to whom the Software is\n"
13935" * furnished to do so, subject to the following conditions:\n"
13936" *\n"
13937" * The above copyright notice and this permission notice shall be included in\n"
13938" * all copies or substantial portions of the Software.\n"
13939" *\n"
13940" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13941" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13942" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13943" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13944" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13945" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13946" * THE SOFTWARE.\n"
13947" *\n"
13948" *===-----------------------------------------------------------------------===\n"
13949" */\n"
13950"\n"
13951"#ifndef __IMMINTRIN_H\n"
13952"#error \"Never use <clflushoptintrin.h> directly; include <immintrin.h> instead.\"\n"
13953"#endif\n"
13954"\n"
13955"#ifndef __CLFLUSHOPTINTRIN_H\n"
13956"#define __CLFLUSHOPTINTRIN_H\n"
13957"\n"
13958"/* Define the default attributes for the functions in this file. */\n"
13959"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clflushopt\")))\n"
13960"\n"
13961"static __inline__ void __DEFAULT_FN_ATTRS\n"
13962"_mm_clflushopt(void const * __m) {\n"
13963" __builtin_ia32_clflushopt(__m);\n"
13964"}\n"
13965"\n"
13966"#undef __DEFAULT_FN_ATTRS\n"
13967"\n"
13968"#endif\n"
13969"" } ,
13970 { "/builtins/clwbintrin.h" , "/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===\n"
13971" *\n"
13972" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13973" * of this software and associated documentation files (the \"Software\"), to deal\n"
13974" * in the Software without restriction, including without limitation the rights\n"
13975" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13976" * copies of the Software, and to permit persons to whom the Software is\n"
13977" * furnished to do so, subject to the following conditions:\n"
13978" *\n"
13979" * The above copyright notice and this permission notice shall be included in\n"
13980" * all copies or substantial portions of the Software.\n"
13981" *\n"
13982" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13983" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13984" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13985" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13986" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13987" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13988" * THE SOFTWARE.\n"
13989" *\n"
13990" *===-----------------------------------------------------------------------===\n"
13991" */\n"
13992"\n"
13993"#ifndef __IMMINTRIN_H\n"
13994"#error \"Never use <clwbintrin.h> directly; include <immintrin.h> instead.\"\n"
13995"#endif\n"
13996"\n"
13997"#ifndef __CLWBINTRIN_H\n"
13998"#define __CLWBINTRIN_H\n"
13999"\n"
14000"/* Define the default attributes for the functions in this file. */\n"
14001"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clwb\")))\n"
14002"\n"
14003"/// Writes back to memory the cache line (if modified) that contains the\n"
14004"/// linear address specified in \\a __p from any level of the cache hierarchy in\n"
14005"/// the cache coherence domain\n"
14006"///\n"
14007"/// \\headerfile <immintrin.h>\n"
14008"///\n"
14009"/// This intrinsic corresponds to the <c> CLWB </c> instruction.\n"
14010"///\n"
14011"/// \\param __p\n"
14012"/// A pointer to the memory location used to identify the cache line to be\n"
14013"/// written back.\n"
14014"static __inline__ void __DEFAULT_FN_ATTRS\n"
14015"_mm_clwb(void const *__p) {\n"
14016" __builtin_ia32_clwb(__p);\n"
14017"}\n"
14018"\n"
14019"#undef __DEFAULT_FN_ATTRS\n"
14020"\n"
14021"#endif\n"
14022"" } ,
14023 { "/builtins/clzerointrin.h" , "/*===----------------------- clzerointrin.h - CLZERO ----------------------===\n"
14024" *\n"
14025" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
14026" * of this software and associated documentation files (the \"Software\"), to deal\n"
14027" * in the Software without restriction, including without limitation the rights\n"
14028" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
14029" * copies of the Software, and to permit persons to whom the Software is\n"
14030" * furnished to do so, subject to the following conditions:\n"
14031" *\n"
14032" * The above copyright notice and this permission notice shall be included in\n"
14033" * all copies or substantial portions of the Software.\n"
14034" *\n"
14035" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
14036" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
14037" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
14038" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
14039" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
14040" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
14041" * THE SOFTWARE.\n"
14042" *\n"
14043" *===-----------------------------------------------------------------------===\n"
14044" */\n"
14045"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
14046"#error \"Never use <clzerointrin.h> directly; include <x86intrin.h> instead.\"\n"
14047"#endif\n"
14048"\n"
14049"#ifndef __CLZEROINTRIN_H\n"
14050"#define __CLZEROINTRIN_H\n"
14051"\n"
14052"/* Define the default attributes for the functions in this file. */\n"
14053"#define __DEFAULT_FN_ATTRS \\\n"
14054" __attribute__((__always_inline__, __nodebug__, __target__(\"clzero\")))\n"
14055"\n"
14056"/// Loads the cache line address and zero's out the cacheline\n"
14057"///\n"
14058"/// \\headerfile <clzerointrin.h>\n"
14059"///\n"
14060"/// This intrinsic corresponds to the <c> CLZERO </c> instruction.\n"
14061"///\n"
14062"/// \\param __line\n"
14063"/// A pointer to a cacheline which needs to be zeroed out.\n"
14064"static __inline__ void __DEFAULT_FN_ATTRS\n"
14065"_mm_clzero (void * __line)\n"
14066"{\n"
14067" __builtin_ia32_clzero ((void *)__line);\n"
14068"}\n"
14069"\n"
14070"#undef __DEFAULT_FN_ATTRS\n"
14071"\n"
14072"#endif /* __CLZEROINTRIN_H */\n"
14073"" } ,
14074 { "/builtins/cpuid.h" , "/*===---- cpuid.h - X86 cpu model detection --------------------------------===\n"
14075" *\n"
14076" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
14077" * of this software and associated documentation files (the \"Software\"), to deal\n"
14078" * in the Software without restriction, including without limitation the rights\n"
14079" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
14080" * copies of the Software, and to permit persons to whom the Software is\n"
14081" * furnished to do so, subject to the following conditions:\n"
14082" *\n"
14083" * The above copyright notice and this permission notice shall be included in\n"
14084" * all copies or substantial portions of the Software.\n"
14085" *\n"
14086" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
14087" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
14088" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
14089" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
14090" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
14091" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
14092" * THE SOFTWARE.\n"
14093" *\n"
14094" *===-----------------------------------------------------------------------===\n"
14095" */\n"
14096"\n"
14097"#if !(__x86_64__ || __i386__)\n"
14098"#error this header is for x86 only\n"
14099"#endif\n"
14100"\n"
14101"/* Responses identification request with %eax 0 */\n"
14102"/* AMD: \"AuthenticAMD\" */\n"
14103"#define signature_AMD_ebx 0x68747541\n"
14104"#define signature_AMD_edx 0x69746e65\n"
14105"#define signature_AMD_ecx 0x444d4163\n"
14106"/* CENTAUR: \"CentaurHauls\" */\n"
14107"#define signature_CENTAUR_ebx 0x746e6543\n"
14108"#define signature_CENTAUR_edx 0x48727561\n"
14109"#define signature_CENTAUR_ecx 0x736c7561\n"
14110"/* CYRIX: \"CyrixInstead\" */\n"
14111"#define signature_CYRIX_ebx 0x69727943\n"
14112"#define signature_CYRIX_edx 0x736e4978\n"
14113"#define signature_CYRIX_ecx 0x64616574\n"
14114"/* INTEL: \"GenuineIntel\" */\n"
14115"#define signature_INTEL_ebx 0x756e6547\n"
14116"#define signature_INTEL_edx 0x49656e69\n"
14117"#define signature_INTEL_ecx 0x6c65746e\n"
14118"/* TM1: \"TransmetaCPU\" */\n"
14119"#define signature_TM1_ebx 0x6e617254\n"
14120"#define signature_TM1_edx 0x74656d73\n"
14121"#define signature_TM1_ecx 0x55504361\n"
14122"/* TM2: \"GenuineTMx86\" */\n"
14123"#define signature_TM2_ebx 0x756e6547\n"
14124"#define signature_TM2_edx 0x54656e69\n"
14125"#define signature_TM2_ecx 0x3638784d\n"
14126"/* NSC: \"Geode by NSC\" */\n"
14127"#define signature_NSC_ebx 0x646f6547\n"
14128"#define signature_NSC_edx 0x43534e20\n"
14129"#define signature_NSC_ecx 0x79622065\n"
14130"/* NEXGEN: \"NexGenDriven\" */\n"
14131"#define signature_NEXGEN_ebx 0x4778654e\n"
14132"#define signature_NEXGEN_edx 0x72446e65\n"
14133"#define signature_NEXGEN_ecx 0x6e657669\n"
14134"/* RISE: \"RiseRiseRise\" */\n"
14135"#define signature_RISE_ebx 0x65736952\n"
14136"#define signature_RISE_edx 0x65736952\n"
14137"#define signature_RISE_ecx 0x65736952\n"
14138"/* SIS: \"SiS SiS SiS \" */\n"
14139"#define signature_SIS_ebx 0x20536953\n"
14140"#define signature_SIS_edx 0x20536953\n"
14141"#define signature_SIS_ecx 0x20536953\n"
14142"/* UMC: \"UMC UMC UMC \" */\n"
14143"#define signature_UMC_ebx 0x20434d55\n"
14144"#define signature_UMC_edx 0x20434d55\n"
14145"#define signature_UMC_ecx 0x20434d55\n"
14146"/* VIA: \"VIA VIA VIA \" */\n"
14147"#define signature_VIA_ebx 0x20414956\n"
14148"#define signature_VIA_edx 0x20414956\n"
14149"#define signature_VIA_ecx 0x20414956\n"
14150"/* VORTEX: \"Vortex86 SoC\" */\n"
14151"#define signature_VORTEX_ebx 0x74726f56\n"
14152"#define signature_VORTEX_edx 0x36387865\n"
14153"#define signature_VORTEX_ecx 0x436f5320\n"
14154"\n"
14155"/* Features in %ecx for leaf 1 */\n"
14156"#define bit_SSE3 0x00000001\n"
14157"#define bit_PCLMULQDQ 0x00000002\n"
14158"#define bit_PCLMUL bit_PCLMULQDQ /* for gcc compat */\n"
14159"#define bit_DTES64 0x00000004\n"
14160"#define bit_MONITOR 0x00000008\n"
14161"#define bit_DSCPL 0x00000010\n"
14162"#define bit_VMX 0x00000020\n"
14163"#define bit_SMX 0x00000040\n"
14164"#define bit_EIST 0x00000080\n"
14165"#define bit_TM2 0x00000100\n"
14166"#define bit_SSSE3 0x00000200\n"
14167"#define bit_CNXTID 0x00000400\n"
14168"#define bit_FMA 0x00001000\n"
14169"#define bit_CMPXCHG16B 0x00002000\n"
14170"#define bit_xTPR 0x00004000\n"
14171"#define bit_PDCM 0x00008000\n"
14172"#define bit_PCID 0x00020000\n"
14173"#define bit_DCA 0x00040000\n"
14174"#define bit_SSE41 0x00080000\n"
14175"#define bit_SSE4_1 bit_SSE41 /* for gcc compat */\n"
14176"#define bit_SSE42 0x00100000\n"
14177"#define bit_SSE4_2 bit_SSE42 /* for gcc compat */\n"
14178"#define bit_x2APIC 0x00200000\n"
14179"#define bit_MOVBE 0x00400000\n"
14180"#define bit_POPCNT 0x00800000\n"
14181"#define bit_TSCDeadline 0x01000000\n"
14182"#define bit_AESNI 0x02000000\n"
14183"#define bit_AES bit_AESNI /* for gcc compat */\n"
14184"#define bit_XSAVE 0x04000000\n"
14185"#define bit_OSXSAVE 0x08000000\n"
14186"#define bit_AVX 0x10000000\n"
14187"#define bit_F16C 0x20000000\n"
14188"#define bit_RDRND 0x40000000\n"
14189"\n"
14190"/* Features in %edx for leaf 1 */\n"
14191"#define bit_FPU 0x00000001\n"
14192"#define bit_VME 0x00000002\n"
14193"#define bit_DE 0x00000004\n"
14194"#define bit_PSE 0x00000008\n"
14195"#define bit_TSC 0x00000010\n"
14196"#define bit_MSR 0x00000020\n"
14197"#define bit_PAE 0x00000040\n"
14198"#define bit_MCE 0x00000080\n"
14199"#define bit_CX8 0x00000100\n"
14200"#define bit_CMPXCHG8B bit_CX8 /* for gcc compat */\n"
14201"#define bit_APIC 0x00000200\n"
14202"#define bit_SEP 0x00000800\n"
14203"#define bit_MTRR 0x00001000\n"
14204"#define bit_PGE 0x00002000\n"
14205"#define bit_MCA 0x00004000\n"
14206"#define bit_CMOV 0x00008000\n"
14207"#define bit_PAT 0x00010000\n"
14208"#define bit_PSE36 0x00020000\n"
14209"#define bit_PSN 0x00040000\n"
14210"#define bit_CLFSH 0x00080000\n"
14211"#define bit_DS 0x00200000\n"
14212"#define bit_ACPI 0x00400000\n"
14213"#define bit_MMX 0x00800000\n"
14214"#define bit_FXSR 0x01000000\n"
14215"#define bit_FXSAVE bit_FXSR /* for gcc compat */\n"
14216"#define bit_SSE 0x02000000\n"
14217"#define bit_SSE2 0x04000000\n"
14218"#define bit_SS 0x08000000\n"
14219"#define bit_HTT 0x10000000\n"
14220"#define bit_TM 0x20000000\n"
14221"#define bit_PBE 0x80000000\n"
14222"\n"
14223"/* Features in %ebx for leaf 7 sub-leaf 0 */\n"
14224"#define bit_FSGSBASE 0x00000001\n"
14225"#define bit_SGX 0x00000004\n"
14226"#define bit_BMI 0x00000008\n"
14227"#define bit_HLE 0x00000010\n"
14228"#define bit_AVX2 0x00000020\n"
14229"#define bit_SMEP 0x00000080\n"
14230"#define bit_BMI2 0x00000100\n"
14231"#define bit_ENH_MOVSB 0x00000200\n"
14232"#define bit_INVPCID 0x00000400\n"
14233"#define bit_RTM 0x00000800\n"
14234"#define bit_MPX 0x00004000\n"
14235"#define bit_AVX512F 0x00010000\n"
14236"#define bit_AVX512DQ 0x00020000\n"
14237"#define bit_RDSEED 0x00040000\n"
14238"#define bit_ADX 0x00080000\n"
14239"#define bit_AVX512IFMA 0x00200000\n"
14240"#define bit_CLFLUSHOPT 0x00800000\n"
14241"#define bit_CLWB 0x01000000\n"
14242"#define bit_AVX512PF 0x04000000\n"
14243"#define bit_AVX512ER 0x08000000\n"
14244"#define bit_AVX512CD 0x10000000\n"
14245"#define bit_SHA 0x20000000\n"
14246"#define bit_AVX512BW 0x40000000\n"
14247"#define bit_AVX512VL 0x80000000\n"
14248"\n"
14249"/* Features in %ecx for leaf 7 sub-leaf 0 */\n"
14250"#define bit_PREFTCHWT1 0x00000001\n"
14251"#define bit_AVX512VBMI 0x00000002\n"
14252"#define bit_PKU 0x00000004\n"
14253"#define bit_OSPKE 0x00000010\n"
14254"#define bit_WAITPKG 0x00000020\n"
14255"#define bit_AVX512VBMI2 0x00000040\n"
14256"#define bit_SHSTK 0x00000080\n"
14257"#define bit_GFNI 0x00000100\n"
14258"#define bit_VAES 0x00000200\n"
14259"#define bit_VPCLMULQDQ 0x00000400\n"
14260"#define bit_AVX512VNNI 0x00000800\n"
14261"#define bit_AVX512BITALG 0x00001000\n"
14262"#define bit_AVX512VPOPCNTDQ 0x00004000\n"
14263"#define bit_RDPID 0x00400000\n"
14264"#define bit_CLDEMOTE 0x02000000\n"
14265"#define bit_MOVDIRI 0x08000000\n"
14266"#define bit_MOVDIR64B 0x10000000\n"
14267"\n"
14268"/* Features in %edx for leaf 7 sub-leaf 0 */\n"
14269"#define bit_AVX5124VNNIW 0x00000004\n"
14270"#define bit_AVX5124FMAPS 0x00000008\n"
14271"#define bit_PCONFIG 0x00040000\n"
14272"#define bit_IBT 0x00100000\n"
14273"\n"
14274"/* Features in %eax for leaf 13 sub-leaf 1 */\n"
14275"#define bit_XSAVEOPT 0x00000001\n"
14276"#define bit_XSAVEC 0x00000002\n"
14277"#define bit_XSAVES 0x00000008\n"
14278"\n"
14279"/* Features in %eax for leaf 0x14 sub-leaf 0 */\n"
14280"#define bit_PTWRITE 0x00000010\n"
14281"\n"
14282"/* Features in %ecx for leaf 0x80000001 */\n"
14283"#define bit_LAHF_LM 0x00000001\n"
14284"#define bit_ABM 0x00000020\n"
14285"#define bit_LZCNT bit_ABM /* for gcc compat */\n"
14286"#define bit_SSE4a 0x00000040\n"
14287"#define bit_PRFCHW 0x00000100\n"
14288"#define bit_XOP 0x00000800\n"
14289"#define bit_LWP 0x00008000\n"
14290"#define bit_FMA4 0x00010000\n"
14291"#define bit_TBM 0x00200000\n"
14292"#define bit_MWAITX 0x20000000\n"
14293"\n"
14294"/* Features in %edx for leaf 0x80000001 */\n"
14295"#define bit_MMXEXT 0x00400000\n"
14296"#define bit_LM 0x20000000\n"
14297"#define bit_3DNOWP 0x40000000\n"
14298"#define bit_3DNOW 0x80000000\n"
14299"\n"
14300"/* Features in %ebx for leaf 0x80000008 */\n"
14301"#define bit_CLZERO 0x00000001\n"
14302"#define bit_WBNOINVD 0x00000200\n"
14303"\n"
14304"\n"
14305"#if __i386__\n"
14306"#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n"
14307" __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14308" : \"0\"(__leaf))\n"
14309"\n"
14310"#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n"
14311" __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14312" : \"0\"(__leaf), \"2\"(__count))\n"
14313"#else\n"
14314"/* x86-64 uses %rbx as the base register, so preserve it. */\n"
14315"#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n"
14316" __asm(\" xchgq %%rbx,%q1\\n\" \\\n"
14317" \" cpuid\\n\" \\\n"
14318" \" xchgq %%rbx,%q1\" \\\n"
14319" : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14320" : \"0\"(__leaf))\n"
14321"\n"
14322"#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n"
14323" __asm(\" xchgq %%rbx,%q1\\n\" \\\n"
14324" \" cpuid\\n\" \\\n"
14325" \" xchgq %%rbx,%q1\" \\\n"
14326" : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14327" : \"0\"(__leaf), \"2\"(__count))\n"
14328"#endif\n"
14329"\n"
14330"static __inline int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig)\n"
14331"{\n"
14332" unsigned int __eax, __ebx, __ecx, __edx;\n"
14333"#if __i386__\n"
14334" int __cpuid_supported;\n"
14335"\n"
14336" __asm(\" pushfl\\n\"\n"
14337" \" popl %%eax\\n\"\n"
14338" \" movl %%eax,%%ecx\\n\"\n"
14339" \" xorl $0x00200000,%%eax\\n\"\n"
14340" \" pushl %%eax\\n\"\n"
14341" \" popfl\\n\"\n"
14342" \" pushfl\\n\"\n"
14343" \" popl %%eax\\n\"\n"
14344" \" movl $0,%0\\n\"\n"
14345" \" cmpl %%eax,%%ecx\\n\"\n"
14346" \" je 1f\\n\"\n"
14347" \" movl $1,%0\\n\"\n"
14348" \"1:\"\n"
14349" : \"=r\" (__cpuid_supported) : : \"eax\", \"ecx\");\n"
14350" if (!__cpuid_supported)\n"
14351" return 0;\n"
14352"#endif\n"
14353"\n"
14354" __cpuid(__leaf, __eax, __ebx, __ecx, __edx);\n"
14355" if (__sig)\n"
14356" *__sig = __ebx;\n"
14357" return __eax;\n"
14358"}\n"
14359"\n"
14360"static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax,\n"
14361" unsigned int *__ebx, unsigned int *__ecx,\n"
14362" unsigned int *__edx)\n"
14363"{\n"
14364" unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n"
14365"\n"
14366" if (__max_leaf == 0 || __max_leaf < __leaf)\n"
14367" return 0;\n"
14368"\n"
14369" __cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx);\n"
14370" return 1;\n"
14371"}\n"
14372"\n"
14373"static __inline int __get_cpuid_count (unsigned int __leaf,\n"
14374" unsigned int __subleaf,\n"
14375" unsigned int *__eax, unsigned int *__ebx,\n"
14376" unsigned int *__ecx, unsigned int *__edx)\n"
14377"{\n"
14378" unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n"
14379"\n"
14380" if (__max_leaf == 0 || __max_leaf < __leaf)\n"
14381" return 0;\n"
14382"\n"
14383" __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);\n"
14384" return 1;\n"
14385"}\n"
14386"" } ,
14387 { "/builtins/emmintrin.h" , "/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===\n"
14388" *\n"
14389" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
14390" * of this software and associated documentation files (the \"Software\"), to deal\n"
14391" * in the Software without restriction, including without limitation the rights\n"
14392" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
14393" * copies of the Software, and to permit persons to whom the Software is\n"
14394" * furnished to do so, subject to the following conditions:\n"
14395" *\n"
14396" * The above copyright notice and this permission notice shall be included in\n"
14397" * all copies or substantial portions of the Software.\n"
14398" *\n"
14399" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
14400" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
14401" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
14402" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
14403" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
14404" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
14405" * THE SOFTWARE.\n"
14406" *\n"
14407" *===-----------------------------------------------------------------------===\n"
14408" */\n"
14409"\n"
14410"#ifndef __EMMINTRIN_H\n"
14411"#define __EMMINTRIN_H\n"
14412"\n"
14413"#include <xmmintrin.h>\n"
14414"\n"
14415"typedef double __m128d __attribute__((__vector_size__(16)));\n"
14416"typedef long long __m128i __attribute__((__vector_size__(16)));\n"
14417"\n"
14418"/* Type defines. */\n"
14419"typedef double __v2df __attribute__ ((__vector_size__ (16)));\n"
14420"typedef long long __v2di __attribute__ ((__vector_size__ (16)));\n"
14421"typedef short __v8hi __attribute__((__vector_size__(16)));\n"
14422"typedef char __v16qi __attribute__((__vector_size__(16)));\n"
14423"\n"
14424"/* Unsigned types */\n"
14425"typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));\n"
14426"typedef unsigned short __v8hu __attribute__((__vector_size__(16)));\n"
14427"typedef unsigned char __v16qu __attribute__((__vector_size__(16)));\n"
14428"\n"
14429"/* We need an explicitly signed variant for char. Note that this shouldn't\n"
14430" * appear in the interface though. */\n"
14431"typedef signed char __v16qs __attribute__((__vector_size__(16)));\n"
14432"\n"
14433"/* Define the default attributes for the functions in this file. */\n"
14434"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\"), __min_vector_width__(128)))\n"
14435"#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse2\"), __min_vector_width__(64)))\n"
14436"\n"
14437"/// Adds lower double-precision values in both operands and returns the\n"
14438"/// sum in the lower 64 bits of the result. The upper 64 bits of the result\n"
14439"/// are copied from the upper double-precision value of the first operand.\n"
14440"///\n"
14441"/// \\headerfile <x86intrin.h>\n"
14442"///\n"
14443"/// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.\n"
14444"///\n"
14445"/// \\param __a\n"
14446"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14447"/// \\param __b\n"
14448"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14449"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14450"/// sum of the lower 64 bits of both operands. The upper 64 bits are copied\n"
14451"/// from the upper 64 bits of the first source operand.\n"
14452"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14453"_mm_add_sd(__m128d __a, __m128d __b)\n"
14454"{\n"
14455" __a[0] += __b[0];\n"
14456" return __a;\n"
14457"}\n"
14458"\n"
14459"/// Adds two 128-bit vectors of [2 x double].\n"
14460"///\n"
14461"/// \\headerfile <x86intrin.h>\n"
14462"///\n"
14463"/// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.\n"
14464"///\n"
14465"/// \\param __a\n"
14466"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14467"/// \\param __b\n"
14468"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14469"/// \\returns A 128-bit vector of [2 x double] containing the sums of both\n"
14470"/// operands.\n"
14471"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14472"_mm_add_pd(__m128d __a, __m128d __b)\n"
14473"{\n"
14474" return (__m128d)((__v2df)__a + (__v2df)__b);\n"
14475"}\n"
14476"\n"
14477"/// Subtracts the lower double-precision value of the second operand\n"
14478"/// from the lower double-precision value of the first operand and returns\n"
14479"/// the difference in the lower 64 bits of the result. The upper 64 bits of\n"
14480"/// the result are copied from the upper double-precision value of the first\n"
14481"/// operand.\n"
14482"///\n"
14483"/// \\headerfile <x86intrin.h>\n"
14484"///\n"
14485"/// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.\n"
14486"///\n"
14487"/// \\param __a\n"
14488"/// A 128-bit vector of [2 x double] containing the minuend.\n"
14489"/// \\param __b\n"
14490"/// A 128-bit vector of [2 x double] containing the subtrahend.\n"
14491"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14492"/// difference of the lower 64 bits of both operands. The upper 64 bits are\n"
14493"/// copied from the upper 64 bits of the first source operand.\n"
14494"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14495"_mm_sub_sd(__m128d __a, __m128d __b)\n"
14496"{\n"
14497" __a[0] -= __b[0];\n"
14498" return __a;\n"
14499"}\n"
14500"\n"
14501"/// Subtracts two 128-bit vectors of [2 x double].\n"
14502"///\n"
14503"/// \\headerfile <x86intrin.h>\n"
14504"///\n"
14505"/// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.\n"
14506"///\n"
14507"/// \\param __a\n"
14508"/// A 128-bit vector of [2 x double] containing the minuend.\n"
14509"/// \\param __b\n"
14510"/// A 128-bit vector of [2 x double] containing the subtrahend.\n"
14511"/// \\returns A 128-bit vector of [2 x double] containing the differences between\n"
14512"/// both operands.\n"
14513"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14514"_mm_sub_pd(__m128d __a, __m128d __b)\n"
14515"{\n"
14516" return (__m128d)((__v2df)__a - (__v2df)__b);\n"
14517"}\n"
14518"\n"
14519"/// Multiplies lower double-precision values in both operands and returns\n"
14520"/// the product in the lower 64 bits of the result. The upper 64 bits of the\n"
14521"/// result are copied from the upper double-precision value of the first\n"
14522"/// operand.\n"
14523"///\n"
14524"/// \\headerfile <x86intrin.h>\n"
14525"///\n"
14526"/// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.\n"
14527"///\n"
14528"/// \\param __a\n"
14529"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14530"/// \\param __b\n"
14531"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14532"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14533"/// product of the lower 64 bits of both operands. The upper 64 bits are\n"
14534"/// copied from the upper 64 bits of the first source operand.\n"
14535"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14536"_mm_mul_sd(__m128d __a, __m128d __b)\n"
14537"{\n"
14538" __a[0] *= __b[0];\n"
14539" return __a;\n"
14540"}\n"
14541"\n"
14542"/// Multiplies two 128-bit vectors of [2 x double].\n"
14543"///\n"
14544"/// \\headerfile <x86intrin.h>\n"
14545"///\n"
14546"/// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.\n"
14547"///\n"
14548"/// \\param __a\n"
14549"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14550"/// \\param __b\n"
14551"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14552"/// \\returns A 128-bit vector of [2 x double] containing the products of both\n"
14553"/// operands.\n"
14554"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14555"_mm_mul_pd(__m128d __a, __m128d __b)\n"
14556"{\n"
14557" return (__m128d)((__v2df)__a * (__v2df)__b);\n"
14558"}\n"
14559"\n"
14560"/// Divides the lower double-precision value of the first operand by the\n"
14561"/// lower double-precision value of the second operand and returns the\n"
14562"/// quotient in the lower 64 bits of the result. The upper 64 bits of the\n"
14563"/// result are copied from the upper double-precision value of the first\n"
14564"/// operand.\n"
14565"///\n"
14566"/// \\headerfile <x86intrin.h>\n"
14567"///\n"
14568"/// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.\n"
14569"///\n"
14570"/// \\param __a\n"
14571"/// A 128-bit vector of [2 x double] containing the dividend.\n"
14572"/// \\param __b\n"
14573"/// A 128-bit vector of [2 x double] containing divisor.\n"
14574"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14575"/// quotient of the lower 64 bits of both operands. The upper 64 bits are\n"
14576"/// copied from the upper 64 bits of the first source operand.\n"
14577"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14578"_mm_div_sd(__m128d __a, __m128d __b)\n"
14579"{\n"
14580" __a[0] /= __b[0];\n"
14581" return __a;\n"
14582"}\n"
14583"\n"
14584"/// Performs an element-by-element division of two 128-bit vectors of\n"
14585"/// [2 x double].\n"
14586"///\n"
14587"/// \\headerfile <x86intrin.h>\n"
14588"///\n"
14589"/// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.\n"
14590"///\n"
14591"/// \\param __a\n"
14592"/// A 128-bit vector of [2 x double] containing the dividend.\n"
14593"/// \\param __b\n"
14594"/// A 128-bit vector of [2 x double] containing the divisor.\n"
14595"/// \\returns A 128-bit vector of [2 x double] containing the quotients of both\n"
14596"/// operands.\n"
14597"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14598"_mm_div_pd(__m128d __a, __m128d __b)\n"
14599"{\n"
14600" return (__m128d)((__v2df)__a / (__v2df)__b);\n"
14601"}\n"
14602"\n"
14603"/// Calculates the square root of the lower double-precision value of\n"
14604"/// the second operand and returns it in the lower 64 bits of the result.\n"
14605"/// The upper 64 bits of the result are copied from the upper\n"
14606"/// double-precision value of the first operand.\n"
14607"///\n"
14608"/// \\headerfile <x86intrin.h>\n"
14609"///\n"
14610"/// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.\n"
14611"///\n"
14612"/// \\param __a\n"
14613"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14614"/// upper 64 bits of this operand are copied to the upper 64 bits of the\n"
14615"/// result.\n"
14616"/// \\param __b\n"
14617"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14618"/// square root is calculated using the lower 64 bits of this operand.\n"
14619"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14620"/// square root of the lower 64 bits of operand \\a __b, and whose upper 64\n"
14621"/// bits are copied from the upper 64 bits of operand \\a __a.\n"
14622"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14623"_mm_sqrt_sd(__m128d __a, __m128d __b)\n"
14624"{\n"
14625" __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);\n"
14626" return __extension__ (__m128d) { __c[0], __a[1] };\n"
14627"}\n"
14628"\n"
14629"/// Calculates the square root of the each of two values stored in a\n"
14630"/// 128-bit vector of [2 x double].\n"
14631"///\n"
14632"/// \\headerfile <x86intrin.h>\n"
14633"///\n"
14634"/// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.\n"
14635"///\n"
14636"/// \\param __a\n"
14637"/// A 128-bit vector of [2 x double].\n"
14638"/// \\returns A 128-bit vector of [2 x double] containing the square roots of the\n"
14639"/// values in the operand.\n"
14640"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14641"_mm_sqrt_pd(__m128d __a)\n"
14642"{\n"
14643" return __builtin_ia32_sqrtpd((__v2df)__a);\n"
14644"}\n"
14645"\n"
14646"/// Compares lower 64-bit double-precision values of both operands, and\n"
14647"/// returns the lesser of the pair of values in the lower 64-bits of the\n"
14648"/// result. The upper 64 bits of the result are copied from the upper\n"
14649"/// double-precision value of the first operand.\n"
14650"///\n"
14651"/// \\headerfile <x86intrin.h>\n"
14652"///\n"
14653"/// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.\n"
14654"///\n"
14655"/// \\param __a\n"
14656"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14657"/// lower 64 bits of this operand are used in the comparison.\n"
14658"/// \\param __b\n"
14659"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14660"/// lower 64 bits of this operand are used in the comparison.\n"
14661"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14662"/// minimum value between both operands. The upper 64 bits are copied from\n"
14663"/// the upper 64 bits of the first source operand.\n"
14664"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14665"_mm_min_sd(__m128d __a, __m128d __b)\n"
14666"{\n"
14667" return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);\n"
14668"}\n"
14669"\n"
14670"/// Performs element-by-element comparison of the two 128-bit vectors of\n"
14671"/// [2 x double] and returns the vector containing the lesser of each pair of\n"
14672"/// values.\n"
14673"///\n"
14674"/// \\headerfile <x86intrin.h>\n"
14675"///\n"
14676"/// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.\n"
14677"///\n"
14678"/// \\param __a\n"
14679"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14680"/// \\param __b\n"
14681"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14682"/// \\returns A 128-bit vector of [2 x double] containing the minimum values\n"
14683"/// between both operands.\n"
14684"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14685"_mm_min_pd(__m128d __a, __m128d __b)\n"
14686"{\n"
14687" return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);\n"
14688"}\n"
14689"\n"
14690"/// Compares lower 64-bit double-precision values of both operands, and\n"
14691"/// returns the greater of the pair of values in the lower 64-bits of the\n"
14692"/// result. The upper 64 bits of the result are copied from the upper\n"
14693"/// double-precision value of the first operand.\n"
14694"///\n"
14695"/// \\headerfile <x86intrin.h>\n"
14696"///\n"
14697"/// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.\n"
14698"///\n"
14699"/// \\param __a\n"
14700"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14701"/// lower 64 bits of this operand are used in the comparison.\n"
14702"/// \\param __b\n"
14703"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14704"/// lower 64 bits of this operand are used in the comparison.\n"
14705"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14706"/// maximum value between both operands. The upper 64 bits are copied from\n"
14707"/// the upper 64 bits of the first source operand.\n"
14708"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14709"_mm_max_sd(__m128d __a, __m128d __b)\n"
14710"{\n"
14711" return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);\n"
14712"}\n"
14713"\n"
14714"/// Performs element-by-element comparison of the two 128-bit vectors of\n"
14715"/// [2 x double] and returns the vector containing the greater of each pair\n"
14716"/// of values.\n"
14717"///\n"
14718"/// \\headerfile <x86intrin.h>\n"
14719"///\n"
14720"/// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.\n"
14721"///\n"
14722"/// \\param __a\n"
14723"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14724"/// \\param __b\n"
14725"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14726"/// \\returns A 128-bit vector of [2 x double] containing the maximum values\n"
14727"/// between both operands.\n"
14728"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14729"_mm_max_pd(__m128d __a, __m128d __b)\n"
14730"{\n"
14731" return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);\n"
14732"}\n"
14733"\n"
14734"/// Performs a bitwise AND of two 128-bit vectors of [2 x double].\n"
14735"///\n"
14736"/// \\headerfile <x86intrin.h>\n"
14737"///\n"
14738"/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n"
14739"///\n"
14740"/// \\param __a\n"
14741"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14742"/// \\param __b\n"
14743"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14744"/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n"
14745"/// values between both operands.\n"
14746"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14747"_mm_and_pd(__m128d __a, __m128d __b)\n"
14748"{\n"
14749" return (__m128d)((__v2du)__a & (__v2du)__b);\n"
14750"}\n"
14751"\n"
14752"/// Performs a bitwise AND of two 128-bit vectors of [2 x double], using\n"
14753"/// the one's complement of the values contained in the first source operand.\n"
14754"///\n"
14755"/// \\headerfile <x86intrin.h>\n"
14756"///\n"
14757"/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n"
14758"///\n"
14759"/// \\param __a\n"
14760"/// A 128-bit vector of [2 x double] containing the left source operand. The\n"
14761"/// one's complement of this value is used in the bitwise AND.\n"
14762"/// \\param __b\n"
14763"/// A 128-bit vector of [2 x double] containing the right source operand.\n"
14764"/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n"
14765"/// values in the second operand and the one's complement of the first\n"
14766"/// operand.\n"
14767"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14768"_mm_andnot_pd(__m128d __a, __m128d __b)\n"
14769"{\n"
14770" return (__m128d)(~(__v2du)__a & (__v2du)__b);\n"
14771"}\n"
14772"\n"
14773"/// Performs a bitwise OR of two 128-bit vectors of [2 x double].\n"
14774"///\n"
14775"/// \\headerfile <x86intrin.h>\n"
14776"///\n"
14777"/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n"
14778"///\n"
14779"/// \\param __a\n"
14780"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14781"/// \\param __b\n"
14782"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14783"/// \\returns A 128-bit vector of [2 x double] containing the bitwise OR of the\n"
14784"/// values between both operands.\n"
14785"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14786"_mm_or_pd(__m128d __a, __m128d __b)\n"
14787"{\n"
14788" return (__m128d)((__v2du)__a | (__v2du)__b);\n"
14789"}\n"
14790"\n"
14791"/// Performs a bitwise XOR of two 128-bit vectors of [2 x double].\n"
14792"///\n"
14793"/// \\headerfile <x86intrin.h>\n"
14794"///\n"
14795"/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n"
14796"///\n"
14797"/// \\param __a\n"
14798"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14799"/// \\param __b\n"
14800"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14801"/// \\returns A 128-bit vector of [2 x double] containing the bitwise XOR of the\n"
14802"/// values between both operands.\n"
14803"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14804"_mm_xor_pd(__m128d __a, __m128d __b)\n"
14805"{\n"
14806" return (__m128d)((__v2du)__a ^ (__v2du)__b);\n"
14807"}\n"
14808"\n"
14809"/// Compares each of the corresponding double-precision values of the\n"
14810"/// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0\n"
14811"/// for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14812"///\n"
14813"/// \\headerfile <x86intrin.h>\n"
14814"///\n"
14815"/// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.\n"
14816"///\n"
14817"/// \\param __a\n"
14818"/// A 128-bit vector of [2 x double].\n"
14819"/// \\param __b\n"
14820"/// A 128-bit vector of [2 x double].\n"
14821"/// \\returns A 128-bit vector containing the comparison results.\n"
14822"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14823"_mm_cmpeq_pd(__m128d __a, __m128d __b)\n"
14824"{\n"
14825" return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);\n"
14826"}\n"
14827"\n"
14828"/// Compares each of the corresponding double-precision values of the\n"
14829"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14830"/// operand are less than those in the second operand. Each comparison\n"
14831"/// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14832"///\n"
14833"/// \\headerfile <x86intrin.h>\n"
14834"///\n"
14835"/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n"
14836"///\n"
14837"/// \\param __a\n"
14838"/// A 128-bit vector of [2 x double].\n"
14839"/// \\param __b\n"
14840"/// A 128-bit vector of [2 x double].\n"
14841"/// \\returns A 128-bit vector containing the comparison results.\n"
14842"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14843"_mm_cmplt_pd(__m128d __a, __m128d __b)\n"
14844"{\n"
14845" return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);\n"
14846"}\n"
14847"\n"
14848"/// Compares each of the corresponding double-precision values of the\n"
14849"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14850"/// operand are less than or equal to those in the second operand.\n"
14851"///\n"
14852"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14853"///\n"
14854"/// \\headerfile <x86intrin.h>\n"
14855"///\n"
14856"/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n"
14857"///\n"
14858"/// \\param __a\n"
14859"/// A 128-bit vector of [2 x double].\n"
14860"/// \\param __b\n"
14861"/// A 128-bit vector of [2 x double].\n"
14862"/// \\returns A 128-bit vector containing the comparison results.\n"
14863"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14864"_mm_cmple_pd(__m128d __a, __m128d __b)\n"
14865"{\n"
14866" return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);\n"
14867"}\n"
14868"\n"
14869"/// Compares each of the corresponding double-precision values of the\n"
14870"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14871"/// operand are greater than those in the second operand.\n"
14872"///\n"
14873"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14874"///\n"
14875"/// \\headerfile <x86intrin.h>\n"
14876"///\n"
14877"/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n"
14878"///\n"
14879"/// \\param __a\n"
14880"/// A 128-bit vector of [2 x double].\n"
14881"/// \\param __b\n"
14882"/// A 128-bit vector of [2 x double].\n"
14883"/// \\returns A 128-bit vector containing the comparison results.\n"
14884"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14885"_mm_cmpgt_pd(__m128d __a, __m128d __b)\n"
14886"{\n"
14887" return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);\n"
14888"}\n"
14889"\n"
14890"/// Compares each of the corresponding double-precision values of the\n"
14891"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14892"/// operand are greater than or equal to those in the second operand.\n"
14893"///\n"
14894"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14895"///\n"
14896"/// \\headerfile <x86intrin.h>\n"
14897"///\n"
14898"/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n"
14899"///\n"
14900"/// \\param __a\n"
14901"/// A 128-bit vector of [2 x double].\n"
14902"/// \\param __b\n"
14903"/// A 128-bit vector of [2 x double].\n"
14904"/// \\returns A 128-bit vector containing the comparison results.\n"
14905"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14906"_mm_cmpge_pd(__m128d __a, __m128d __b)\n"
14907"{\n"
14908" return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);\n"
14909"}\n"
14910"\n"
14911"/// Compares each of the corresponding double-precision values of the\n"
14912"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14913"/// operand are ordered with respect to those in the second operand.\n"
14914"///\n"
14915"/// A pair of double-precision values are \"ordered\" with respect to each\n"
14916"/// other if neither value is a NaN. Each comparison yields 0x0 for false,\n"
14917"/// 0xFFFFFFFFFFFFFFFF for true.\n"
14918"///\n"
14919"/// \\headerfile <x86intrin.h>\n"
14920"///\n"
14921"/// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.\n"
14922"///\n"
14923"/// \\param __a\n"
14924"/// A 128-bit vector of [2 x double].\n"
14925"/// \\param __b\n"
14926"/// A 128-bit vector of [2 x double].\n"
14927"/// \\returns A 128-bit vector containing the comparison results.\n"
14928"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14929"_mm_cmpord_pd(__m128d __a, __m128d __b)\n"
14930"{\n"
14931" return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);\n"
14932"}\n"
14933"\n"
14934"/// Compares each of the corresponding double-precision values of the\n"
14935"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14936"/// operand are unordered with respect to those in the second operand.\n"
14937"///\n"
14938"/// A pair of double-precision values are \"unordered\" with respect to each\n"
14939"/// other if one or both values are NaN. Each comparison yields 0x0 for\n"
14940"/// false, 0xFFFFFFFFFFFFFFFF for true.\n"
14941"///\n"
14942"/// \\headerfile <x86intrin.h>\n"
14943"///\n"
14944"/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>\n"
14945"/// instruction.\n"
14946"///\n"
14947"/// \\param __a\n"
14948"/// A 128-bit vector of [2 x double].\n"
14949"/// \\param __b\n"
14950"/// A 128-bit vector of [2 x double].\n"
14951"/// \\returns A 128-bit vector containing the comparison results.\n"
14952"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14953"_mm_cmpunord_pd(__m128d __a, __m128d __b)\n"
14954"{\n"
14955" return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);\n"
14956"}\n"
14957"\n"
14958"/// Compares each of the corresponding double-precision values of the\n"
14959"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14960"/// operand are unequal to those in the second operand.\n"
14961"///\n"
14962"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14963"///\n"
14964"/// \\headerfile <x86intrin.h>\n"
14965"///\n"
14966"/// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.\n"
14967"///\n"
14968"/// \\param __a\n"
14969"/// A 128-bit vector of [2 x double].\n"
14970"/// \\param __b\n"
14971"/// A 128-bit vector of [2 x double].\n"
14972"/// \\returns A 128-bit vector containing the comparison results.\n"
14973"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14974"_mm_cmpneq_pd(__m128d __a, __m128d __b)\n"
14975"{\n"
14976" return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);\n"
14977"}\n"
14978"\n"
14979"/// Compares each of the corresponding double-precision values of the\n"
14980"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14981"/// operand are not less than those in the second operand.\n"
14982"///\n"
14983"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14984"///\n"
14985"/// \\headerfile <x86intrin.h>\n"
14986"///\n"
14987"/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n"
14988"///\n"
14989"/// \\param __a\n"
14990"/// A 128-bit vector of [2 x double].\n"
14991"/// \\param __b\n"
14992"/// A 128-bit vector of [2 x double].\n"
14993"/// \\returns A 128-bit vector containing the comparison results.\n"
14994"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14995"_mm_cmpnlt_pd(__m128d __a, __m128d __b)\n"
14996"{\n"
14997" return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);\n"
14998"}\n"
14999"\n"
15000"/// Compares each of the corresponding double-precision values of the\n"
15001"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
15002"/// operand are not less than or equal to those in the second operand.\n"
15003"///\n"
15004"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15005"///\n"
15006"/// \\headerfile <x86intrin.h>\n"
15007"///\n"
15008"/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n"
15009"///\n"
15010"/// \\param __a\n"
15011"/// A 128-bit vector of [2 x double].\n"
15012"/// \\param __b\n"
15013"/// A 128-bit vector of [2 x double].\n"
15014"/// \\returns A 128-bit vector containing the comparison results.\n"
15015"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15016"_mm_cmpnle_pd(__m128d __a, __m128d __b)\n"
15017"{\n"
15018" return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);\n"
15019"}\n"
15020"\n"
15021"/// Compares each of the corresponding double-precision values of the\n"
15022"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
15023"/// operand are not greater than those in the second operand.\n"
15024"///\n"
15025"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15026"///\n"
15027"/// \\headerfile <x86intrin.h>\n"
15028"///\n"
15029"/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n"
15030"///\n"
15031"/// \\param __a\n"
15032"/// A 128-bit vector of [2 x double].\n"
15033"/// \\param __b\n"
15034"/// A 128-bit vector of [2 x double].\n"
15035"/// \\returns A 128-bit vector containing the comparison results.\n"
15036"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15037"_mm_cmpngt_pd(__m128d __a, __m128d __b)\n"
15038"{\n"
15039" return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);\n"
15040"}\n"
15041"\n"
15042"/// Compares each of the corresponding double-precision values of the\n"
15043"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
15044"/// operand are not greater than or equal to those in the second operand.\n"
15045"///\n"
15046"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15047"///\n"
15048"/// \\headerfile <x86intrin.h>\n"
15049"///\n"
15050"/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n"
15051"///\n"
15052"/// \\param __a\n"
15053"/// A 128-bit vector of [2 x double].\n"
15054"/// \\param __b\n"
15055"/// A 128-bit vector of [2 x double].\n"
15056"/// \\returns A 128-bit vector containing the comparison results.\n"
15057"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15058"_mm_cmpnge_pd(__m128d __a, __m128d __b)\n"
15059"{\n"
15060" return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);\n"
15061"}\n"
15062"\n"
15063"/// Compares the lower double-precision floating-point values in each of\n"
15064"/// the two 128-bit floating-point vectors of [2 x double] for equality.\n"
15065"///\n"
15066"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15067"///\n"
15068"/// \\headerfile <x86intrin.h>\n"
15069"///\n"
15070"/// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.\n"
15071"///\n"
15072"/// \\param __a\n"
15073"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15074"/// compared to the lower double-precision value of \\a __b.\n"
15075"/// \\param __b\n"
15076"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15077"/// compared to the lower double-precision value of \\a __a.\n"
15078"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15079"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15080"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15081"_mm_cmpeq_sd(__m128d __a, __m128d __b)\n"
15082"{\n"
15083" return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);\n"
15084"}\n"
15085"\n"
15086"/// Compares the lower double-precision floating-point values in each of\n"
15087"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15088"/// the value in the first parameter is less than the corresponding value in\n"
15089"/// the second parameter.\n"
15090"///\n"
15091"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15092"///\n"
15093"/// \\headerfile <x86intrin.h>\n"
15094"///\n"
15095"/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n"
15096"///\n"
15097"/// \\param __a\n"
15098"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15099"/// compared to the lower double-precision value of \\a __b.\n"
15100"/// \\param __b\n"
15101"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15102"/// compared to the lower double-precision value of \\a __a.\n"
15103"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15104"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15105"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15106"_mm_cmplt_sd(__m128d __a, __m128d __b)\n"
15107"{\n"
15108" return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);\n"
15109"}\n"
15110"\n"
15111"/// Compares the lower double-precision floating-point values in each of\n"
15112"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15113"/// the value in the first parameter is less than or equal to the\n"
15114"/// corresponding value in the second parameter.\n"
15115"///\n"
15116"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15117"///\n"
15118"/// \\headerfile <x86intrin.h>\n"
15119"///\n"
15120"/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n"
15121"///\n"
15122"/// \\param __a\n"
15123"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15124"/// compared to the lower double-precision value of \\a __b.\n"
15125"/// \\param __b\n"
15126"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15127"/// compared to the lower double-precision value of \\a __a.\n"
15128"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15129"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15130"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15131"_mm_cmple_sd(__m128d __a, __m128d __b)\n"
15132"{\n"
15133" return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);\n"
15134"}\n"
15135"\n"
15136"/// Compares the lower double-precision floating-point values in each of\n"
15137"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15138"/// the value in the first parameter is greater than the corresponding value\n"
15139"/// in the second parameter.\n"
15140"///\n"
15141"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15142"///\n"
15143"/// \\headerfile <x86intrin.h>\n"
15144"///\n"
15145"/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n"
15146"///\n"
15147"/// \\param __a\n"
15148"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15149"/// compared to the lower double-precision value of \\a __b.\n"
15150"/// \\param __b\n"
15151"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15152"/// compared to the lower double-precision value of \\a __a.\n"
15153"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15154"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15155"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15156"_mm_cmpgt_sd(__m128d __a, __m128d __b)\n"
15157"{\n"
15158" __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);\n"
15159" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15160"}\n"
15161"\n"
15162"/// Compares the lower double-precision floating-point values in each of\n"
15163"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15164"/// the value in the first parameter is greater than or equal to the\n"
15165"/// corresponding value in the second parameter.\n"
15166"///\n"
15167"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15168"///\n"
15169"/// \\headerfile <x86intrin.h>\n"
15170"///\n"
15171"/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n"
15172"///\n"
15173"/// \\param __a\n"
15174"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15175"/// compared to the lower double-precision value of \\a __b.\n"
15176"/// \\param __b\n"
15177"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15178"/// compared to the lower double-precision value of \\a __a.\n"
15179"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15180"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15181"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15182"_mm_cmpge_sd(__m128d __a, __m128d __b)\n"
15183"{\n"
15184" __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);\n"
15185" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15186"}\n"
15187"\n"
15188"/// Compares the lower double-precision floating-point values in each of\n"
15189"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15190"/// the value in the first parameter is \"ordered\" with respect to the\n"
15191"/// corresponding value in the second parameter.\n"
15192"///\n"
15193"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n"
15194"/// of double-precision values are \"ordered\" with respect to each other if\n"
15195"/// neither value is a NaN.\n"
15196"///\n"
15197"/// \\headerfile <x86intrin.h>\n"
15198"///\n"
15199"/// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.\n"
15200"///\n"
15201"/// \\param __a\n"
15202"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15203"/// compared to the lower double-precision value of \\a __b.\n"
15204"/// \\param __b\n"
15205"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15206"/// compared to the lower double-precision value of \\a __a.\n"
15207"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15208"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15209"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15210"_mm_cmpord_sd(__m128d __a, __m128d __b)\n"
15211"{\n"
15212" return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);\n"
15213"}\n"
15214"\n"
15215"/// Compares the lower double-precision floating-point values in each of\n"
15216"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15217"/// the value in the first parameter is \"unordered\" with respect to the\n"
15218"/// corresponding value in the second parameter.\n"
15219"///\n"
15220"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n"
15221"/// of double-precision values are \"unordered\" with respect to each other if\n"
15222"/// one or both values are NaN.\n"
15223"///\n"
15224"/// \\headerfile <x86intrin.h>\n"
15225"///\n"
15226"/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>\n"
15227"/// instruction.\n"
15228"///\n"
15229"/// \\param __a\n"
15230"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15231"/// compared to the lower double-precision value of \\a __b.\n"
15232"/// \\param __b\n"
15233"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15234"/// compared to the lower double-precision value of \\a __a.\n"
15235"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15236"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15237"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15238"_mm_cmpunord_sd(__m128d __a, __m128d __b)\n"
15239"{\n"
15240" return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);\n"
15241"}\n"
15242"\n"
15243"/// Compares the lower double-precision floating-point values in each of\n"
15244"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15245"/// the value in the first parameter is unequal to the corresponding value in\n"
15246"/// the second parameter.\n"
15247"///\n"
15248"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15249"///\n"
15250"/// \\headerfile <x86intrin.h>\n"
15251"///\n"
15252"/// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.\n"
15253"///\n"
15254"/// \\param __a\n"
15255"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15256"/// compared to the lower double-precision value of \\a __b.\n"
15257"/// \\param __b\n"
15258"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15259"/// compared to the lower double-precision value of \\a __a.\n"
15260"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15261"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15262"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15263"_mm_cmpneq_sd(__m128d __a, __m128d __b)\n"
15264"{\n"
15265" return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);\n"
15266"}\n"
15267"\n"
15268"/// Compares the lower double-precision floating-point values in each of\n"
15269"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15270"/// the value in the first parameter is not less than the corresponding\n"
15271"/// value in the second parameter.\n"
15272"///\n"
15273"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15274"///\n"
15275"/// \\headerfile <x86intrin.h>\n"
15276"///\n"
15277"/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n"
15278"///\n"
15279"/// \\param __a\n"
15280"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15281"/// compared to the lower double-precision value of \\a __b.\n"
15282"/// \\param __b\n"
15283"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15284"/// compared to the lower double-precision value of \\a __a.\n"
15285"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15286"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15287"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15288"_mm_cmpnlt_sd(__m128d __a, __m128d __b)\n"
15289"{\n"
15290" return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);\n"
15291"}\n"
15292"\n"
15293"/// Compares the lower double-precision floating-point values in each of\n"
15294"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15295"/// the value in the first parameter is not less than or equal to the\n"
15296"/// corresponding value in the second parameter.\n"
15297"///\n"
15298"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15299"///\n"
15300"/// \\headerfile <x86intrin.h>\n"
15301"///\n"
15302"/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n"
15303"///\n"
15304"/// \\param __a\n"
15305"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15306"/// compared to the lower double-precision value of \\a __b.\n"
15307"/// \\param __b\n"
15308"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15309"/// compared to the lower double-precision value of \\a __a.\n"
15310"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15311"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15312"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15313"_mm_cmpnle_sd(__m128d __a, __m128d __b)\n"
15314"{\n"
15315" return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);\n"
15316"}\n"
15317"\n"
15318"/// Compares the lower double-precision floating-point values in each of\n"
15319"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15320"/// the value in the first parameter is not greater than the corresponding\n"
15321"/// value in the second parameter.\n"
15322"///\n"
15323"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15324"///\n"
15325"/// \\headerfile <x86intrin.h>\n"
15326"///\n"
15327"/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n"
15328"///\n"
15329"/// \\param __a\n"
15330"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15331"/// compared to the lower double-precision value of \\a __b.\n"
15332"/// \\param __b\n"
15333"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15334"/// compared to the lower double-precision value of \\a __a.\n"
15335"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15336"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15337"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15338"_mm_cmpngt_sd(__m128d __a, __m128d __b)\n"
15339"{\n"
15340" __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);\n"
15341" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15342"}\n"
15343"\n"
15344"/// Compares the lower double-precision floating-point values in each of\n"
15345"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15346"/// the value in the first parameter is not greater than or equal to the\n"
15347"/// corresponding value in the second parameter.\n"
15348"///\n"
15349"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15350"///\n"
15351"/// \\headerfile <x86intrin.h>\n"
15352"///\n"
15353"/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n"
15354"///\n"
15355"/// \\param __a\n"
15356"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15357"/// compared to the lower double-precision value of \\a __b.\n"
15358"/// \\param __b\n"
15359"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15360"/// compared to the lower double-precision value of \\a __a.\n"
15361"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15362"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15363"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15364"_mm_cmpnge_sd(__m128d __a, __m128d __b)\n"
15365"{\n"
15366" __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);\n"
15367" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15368"}\n"
15369"\n"
15370"/// Compares the lower double-precision floating-point values in each of\n"
15371"/// the two 128-bit floating-point vectors of [2 x double] for equality.\n"
15372"///\n"
15373"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15374"/// lower double-precision values is NaN, 0 is returned.\n"
15375"///\n"
15376"/// \\headerfile <x86intrin.h>\n"
15377"///\n"
15378"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15379"///\n"
15380"/// \\param __a\n"
15381"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15382"/// compared to the lower double-precision value of \\a __b.\n"
15383"/// \\param __b\n"
15384"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15385"/// compared to the lower double-precision value of \\a __a.\n"
15386"/// \\returns An integer containing the comparison results. If either of the two\n"
15387"/// lower double-precision values is NaN, 0 is returned.\n"
15388"static __inline__ int __DEFAULT_FN_ATTRS\n"
15389"_mm_comieq_sd(__m128d __a, __m128d __b)\n"
15390"{\n"
15391" return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);\n"
15392"}\n"
15393"\n"
15394"/// Compares the lower double-precision floating-point values in each of\n"
15395"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15396"/// the value in the first parameter is less than the corresponding value in\n"
15397"/// the second parameter.\n"
15398"///\n"
15399"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15400"/// lower double-precision values is NaN, 0 is returned.\n"
15401"///\n"
15402"/// \\headerfile <x86intrin.h>\n"
15403"///\n"
15404"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15405"///\n"
15406"/// \\param __a\n"
15407"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15408"/// compared to the lower double-precision value of \\a __b.\n"
15409"/// \\param __b\n"
15410"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15411"/// compared to the lower double-precision value of \\a __a.\n"
15412"/// \\returns An integer containing the comparison results. If either of the two\n"
15413"/// lower double-precision values is NaN, 0 is returned.\n"
15414"static __inline__ int __DEFAULT_FN_ATTRS\n"
15415"_mm_comilt_sd(__m128d __a, __m128d __b)\n"
15416"{\n"
15417" return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);\n"
15418"}\n"
15419"\n"
15420"/// Compares the lower double-precision floating-point values in each of\n"
15421"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15422"/// the value in the first parameter is less than or equal to the\n"
15423"/// corresponding value in the second parameter.\n"
15424"///\n"
15425"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15426"/// lower double-precision values is NaN, 0 is returned.\n"
15427"///\n"
15428"/// \\headerfile <x86intrin.h>\n"
15429"///\n"
15430"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15431"///\n"
15432"/// \\param __a\n"
15433"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15434"/// compared to the lower double-precision value of \\a __b.\n"
15435"/// \\param __b\n"
15436"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15437"/// compared to the lower double-precision value of \\a __a.\n"
15438"/// \\returns An integer containing the comparison results. If either of the two\n"
15439"/// lower double-precision values is NaN, 0 is returned.\n"
15440"static __inline__ int __DEFAULT_FN_ATTRS\n"
15441"_mm_comile_sd(__m128d __a, __m128d __b)\n"
15442"{\n"
15443" return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);\n"
15444"}\n"
15445"\n"
15446"/// Compares the lower double-precision floating-point values in each of\n"
15447"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15448"/// the value in the first parameter is greater than the corresponding value\n"
15449"/// in the second parameter.\n"
15450"///\n"
15451"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15452"/// lower double-precision values is NaN, 0 is returned.\n"
15453"///\n"
15454"/// \\headerfile <x86intrin.h>\n"
15455"///\n"
15456"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15457"///\n"
15458"/// \\param __a\n"
15459"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15460"/// compared to the lower double-precision value of \\a __b.\n"
15461"/// \\param __b\n"
15462"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15463"/// compared to the lower double-precision value of \\a __a.\n"
15464"/// \\returns An integer containing the comparison results. If either of the two\n"
15465"/// lower double-precision values is NaN, 0 is returned.\n"
15466"static __inline__ int __DEFAULT_FN_ATTRS\n"
15467"_mm_comigt_sd(__m128d __a, __m128d __b)\n"
15468"{\n"
15469" return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);\n"
15470"}\n"
15471"\n"
15472"/// Compares the lower double-precision floating-point values in each of\n"
15473"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15474"/// the value in the first parameter is greater than or equal to the\n"
15475"/// corresponding value in the second parameter.\n"
15476"///\n"
15477"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15478"/// lower double-precision values is NaN, 0 is returned.\n"
15479"///\n"
15480"/// \\headerfile <x86intrin.h>\n"
15481"///\n"
15482"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15483"///\n"
15484"/// \\param __a\n"
15485"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15486"/// compared to the lower double-precision value of \\a __b.\n"
15487"/// \\param __b\n"
15488"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15489"/// compared to the lower double-precision value of \\a __a.\n"
15490"/// \\returns An integer containing the comparison results. If either of the two\n"
15491"/// lower double-precision values is NaN, 0 is returned.\n"
15492"static __inline__ int __DEFAULT_FN_ATTRS\n"
15493"_mm_comige_sd(__m128d __a, __m128d __b)\n"
15494"{\n"
15495" return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);\n"
15496"}\n"
15497"\n"
15498"/// Compares the lower double-precision floating-point values in each of\n"
15499"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15500"/// the value in the first parameter is unequal to the corresponding value in\n"
15501"/// the second parameter.\n"
15502"///\n"
15503"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15504"/// lower double-precision values is NaN, 1 is returned.\n"
15505"///\n"
15506"/// \\headerfile <x86intrin.h>\n"
15507"///\n"
15508"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15509"///\n"
15510"/// \\param __a\n"
15511"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15512"/// compared to the lower double-precision value of \\a __b.\n"
15513"/// \\param __b\n"
15514"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15515"/// compared to the lower double-precision value of \\a __a.\n"
15516"/// \\returns An integer containing the comparison results. If either of the two\n"
15517"/// lower double-precision values is NaN, 1 is returned.\n"
15518"static __inline__ int __DEFAULT_FN_ATTRS\n"
15519"_mm_comineq_sd(__m128d __a, __m128d __b)\n"
15520"{\n"
15521" return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);\n"
15522"}\n"
15523"\n"
15524"/// Compares the lower double-precision floating-point values in each of\n"
15525"/// the two 128-bit floating-point vectors of [2 x double] for equality. The\n"
15526"/// comparison yields 0 for false, 1 for true.\n"
15527"///\n"
15528"/// If either of the two lower double-precision values is NaN, 0 is returned.\n"
15529"///\n"
15530"/// \\headerfile <x86intrin.h>\n"
15531"///\n"
15532"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15533"///\n"
15534"/// \\param __a\n"
15535"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15536"/// compared to the lower double-precision value of \\a __b.\n"
15537"/// \\param __b\n"
15538"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15539"/// compared to the lower double-precision value of \\a __a.\n"
15540"/// \\returns An integer containing the comparison results. If either of the two\n"
15541"/// lower double-precision values is NaN, 0 is returned.\n"
15542"static __inline__ int __DEFAULT_FN_ATTRS\n"
15543"_mm_ucomieq_sd(__m128d __a, __m128d __b)\n"
15544"{\n"
15545" return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);\n"
15546"}\n"
15547"\n"
15548"/// Compares the lower double-precision floating-point values in each of\n"
15549"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15550"/// the value in the first parameter is less than the corresponding value in\n"
15551"/// the second parameter.\n"
15552"///\n"
15553"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15554"/// double-precision values is NaN, 0 is returned.\n"
15555"///\n"
15556"/// \\headerfile <x86intrin.h>\n"
15557"///\n"
15558"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15559"///\n"
15560"/// \\param __a\n"
15561"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15562"/// compared to the lower double-precision value of \\a __b.\n"
15563"/// \\param __b\n"
15564"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15565"/// compared to the lower double-precision value of \\a __a.\n"
15566"/// \\returns An integer containing the comparison results. If either of the two\n"
15567"/// lower double-precision values is NaN, 0 is returned.\n"
15568"static __inline__ int __DEFAULT_FN_ATTRS\n"
15569"_mm_ucomilt_sd(__m128d __a, __m128d __b)\n"
15570"{\n"
15571" return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);\n"
15572"}\n"
15573"\n"
15574"/// Compares the lower double-precision floating-point values in each of\n"
15575"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15576"/// the value in the first parameter is less than or equal to the\n"
15577"/// corresponding value in the second parameter.\n"
15578"///\n"
15579"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15580"/// double-precision values is NaN, 0 is returned.\n"
15581"///\n"
15582"/// \\headerfile <x86intrin.h>\n"
15583"///\n"
15584"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15585"///\n"
15586"/// \\param __a\n"
15587"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15588"/// compared to the lower double-precision value of \\a __b.\n"
15589"/// \\param __b\n"
15590"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15591"/// compared to the lower double-precision value of \\a __a.\n"
15592"/// \\returns An integer containing the comparison results. If either of the two\n"
15593"/// lower double-precision values is NaN, 0 is returned.\n"
15594"static __inline__ int __DEFAULT_FN_ATTRS\n"
15595"_mm_ucomile_sd(__m128d __a, __m128d __b)\n"
15596"{\n"
15597" return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);\n"
15598"}\n"
15599"\n"
15600"/// Compares the lower double-precision floating-point values in each of\n"
15601"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15602"/// the value in the first parameter is greater than the corresponding value\n"
15603"/// in the second parameter.\n"
15604"///\n"
15605"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15606"/// double-precision values is NaN, 0 is returned.\n"
15607"///\n"
15608"/// \\headerfile <x86intrin.h>\n"
15609"///\n"
15610"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15611"///\n"
15612"/// \\param __a\n"
15613"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15614"/// compared to the lower double-precision value of \\a __b.\n"
15615"/// \\param __b\n"
15616"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15617"/// compared to the lower double-precision value of \\a __a.\n"
15618"/// \\returns An integer containing the comparison results. If either of the two\n"
15619"/// lower double-precision values is NaN, 0 is returned.\n"
15620"static __inline__ int __DEFAULT_FN_ATTRS\n"
15621"_mm_ucomigt_sd(__m128d __a, __m128d __b)\n"
15622"{\n"
15623" return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);\n"
15624"}\n"
15625"\n"
15626"/// Compares the lower double-precision floating-point values in each of\n"
15627"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15628"/// the value in the first parameter is greater than or equal to the\n"
15629"/// corresponding value in the second parameter.\n"
15630"///\n"
15631"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15632"/// lower double-precision values is NaN, 0 is returned.\n"
15633"///\n"
15634"/// \\headerfile <x86intrin.h>\n"
15635"///\n"
15636"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15637"///\n"
15638"/// \\param __a\n"
15639"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15640"/// compared to the lower double-precision value of \\a __b.\n"
15641"/// \\param __b\n"
15642"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15643"/// compared to the lower double-precision value of \\a __a.\n"
15644"/// \\returns An integer containing the comparison results. If either of the two\n"
15645"/// lower double-precision values is NaN, 0 is returned.\n"
15646"static __inline__ int __DEFAULT_FN_ATTRS\n"
15647"_mm_ucomige_sd(__m128d __a, __m128d __b)\n"
15648"{\n"
15649" return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);\n"
15650"}\n"
15651"\n"
15652"/// Compares the lower double-precision floating-point values in each of\n"
15653"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15654"/// the value in the first parameter is unequal to the corresponding value in\n"
15655"/// the second parameter.\n"
15656"///\n"
15657"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15658"/// double-precision values is NaN, 1 is returned.\n"
15659"///\n"
15660"/// \\headerfile <x86intrin.h>\n"
15661"///\n"
15662"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15663"///\n"
15664"/// \\param __a\n"
15665"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15666"/// compared to the lower double-precision value of \\a __b.\n"
15667"/// \\param __b\n"
15668"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15669"/// compared to the lower double-precision value of \\a __a.\n"
15670"/// \\returns An integer containing the comparison result. If either of the two\n"
15671"/// lower double-precision values is NaN, 1 is returned.\n"
15672"static __inline__ int __DEFAULT_FN_ATTRS\n"
15673"_mm_ucomineq_sd(__m128d __a, __m128d __b)\n"
15674"{\n"
15675" return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);\n"
15676"}\n"
15677"\n"
15678"/// Converts the two double-precision floating-point elements of a\n"
15679"/// 128-bit vector of [2 x double] into two single-precision floating-point\n"
15680"/// values, returned in the lower 64 bits of a 128-bit vector of [4 x float].\n"
15681"/// The upper 64 bits of the result vector are set to zero.\n"
15682"///\n"
15683"/// \\headerfile <x86intrin.h>\n"
15684"///\n"
15685"/// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.\n"
15686"///\n"
15687"/// \\param __a\n"
15688"/// A 128-bit vector of [2 x double].\n"
15689"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
15690"/// converted values. The upper 64 bits are set to zero.\n"
15691"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
15692"_mm_cvtpd_ps(__m128d __a)\n"
15693"{\n"
15694" return __builtin_ia32_cvtpd2ps((__v2df)__a);\n"
15695"}\n"
15696"\n"
15697"/// Converts the lower two single-precision floating-point elements of a\n"
15698"/// 128-bit vector of [4 x float] into two double-precision floating-point\n"
15699"/// values, returned in a 128-bit vector of [2 x double]. The upper two\n"
15700"/// elements of the input vector are unused.\n"
15701"///\n"
15702"/// \\headerfile <x86intrin.h>\n"
15703"///\n"
15704"/// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.\n"
15705"///\n"
15706"/// \\param __a\n"
15707"/// A 128-bit vector of [4 x float]. The lower two single-precision\n"
15708"/// floating-point elements are converted to double-precision values. The\n"
15709"/// upper two elements are unused.\n"
15710"/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n"
15711"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15712"_mm_cvtps_pd(__m128 __a)\n"
15713"{\n"
15714" return (__m128d) __builtin_convertvector(\n"
15715" __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);\n"
15716"}\n"
15717"\n"
15718"/// Converts the lower two integer elements of a 128-bit vector of\n"
15719"/// [4 x i32] into two double-precision floating-point values, returned in a\n"
15720"/// 128-bit vector of [2 x double].\n"
15721"///\n"
15722"/// The upper two elements of the input vector are unused.\n"
15723"///\n"
15724"/// \\headerfile <x86intrin.h>\n"
15725"///\n"
15726"/// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.\n"
15727"///\n"
15728"/// \\param __a\n"
15729"/// A 128-bit integer vector of [4 x i32]. The lower two integer elements are\n"
15730"/// converted to double-precision values.\n"
15731"///\n"
15732"/// The upper two elements are unused.\n"
15733"/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n"
15734"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15735"_mm_cvtepi32_pd(__m128i __a)\n"
15736"{\n"
15737" return (__m128d) __builtin_convertvector(\n"
15738" __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);\n"
15739"}\n"
15740"\n"
15741"/// Converts the two double-precision floating-point elements of a\n"
15742"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15743"/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper\n"
15744"/// 64 bits of the result vector are set to zero.\n"
15745"///\n"
15746"/// \\headerfile <x86intrin.h>\n"
15747"///\n"
15748"/// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.\n"
15749"///\n"
15750"/// \\param __a\n"
15751"/// A 128-bit vector of [2 x double].\n"
15752"/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n"
15753"/// converted values. The upper 64 bits are set to zero.\n"
15754"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
15755"_mm_cvtpd_epi32(__m128d __a)\n"
15756"{\n"
15757" return __builtin_ia32_cvtpd2dq((__v2df)__a);\n"
15758"}\n"
15759"\n"
15760"/// Converts the low-order element of a 128-bit vector of [2 x double]\n"
15761"/// into a 32-bit signed integer value.\n"
15762"///\n"
15763"/// \\headerfile <x86intrin.h>\n"
15764"///\n"
15765"/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n"
15766"///\n"
15767"/// \\param __a\n"
15768"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
15769"/// conversion.\n"
15770"/// \\returns A 32-bit signed integer containing the converted value.\n"
15771"static __inline__ int __DEFAULT_FN_ATTRS\n"
15772"_mm_cvtsd_si32(__m128d __a)\n"
15773"{\n"
15774" return __builtin_ia32_cvtsd2si((__v2df)__a);\n"
15775"}\n"
15776"\n"
15777"/// Converts the lower double-precision floating-point element of a\n"
15778"/// 128-bit vector of [2 x double], in the second parameter, into a\n"
15779"/// single-precision floating-point value, returned in the lower 32 bits of a\n"
15780"/// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are\n"
15781"/// copied from the upper 96 bits of the first parameter.\n"
15782"///\n"
15783"/// \\headerfile <x86intrin.h>\n"
15784"///\n"
15785"/// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.\n"
15786"///\n"
15787"/// \\param __a\n"
15788"/// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are\n"
15789"/// copied to the upper 96 bits of the result.\n"
15790"/// \\param __b\n"
15791"/// A 128-bit vector of [2 x double]. The lower double-precision\n"
15792"/// floating-point element is used in the conversion.\n"
15793"/// \\returns A 128-bit vector of [4 x float]. The lower 32 bits contain the\n"
15794"/// converted value from the second parameter. The upper 96 bits are copied\n"
15795"/// from the upper 96 bits of the first parameter.\n"
15796"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
15797"_mm_cvtsd_ss(__m128 __a, __m128d __b)\n"
15798"{\n"
15799" return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);\n"
15800"}\n"
15801"\n"
15802"/// Converts a 32-bit signed integer value, in the second parameter, into\n"
15803"/// a double-precision floating-point value, returned in the lower 64 bits of\n"
15804"/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n"
15805"/// are copied from the upper 64 bits of the first parameter.\n"
15806"///\n"
15807"/// \\headerfile <x86intrin.h>\n"
15808"///\n"
15809"/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n"
15810"///\n"
15811"/// \\param __a\n"
15812"/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n"
15813"/// copied to the upper 64 bits of the result.\n"
15814"/// \\param __b\n"
15815"/// A 32-bit signed integer containing the value to be converted.\n"
15816"/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n"
15817"/// converted value from the second parameter. The upper 64 bits are copied\n"
15818"/// from the upper 64 bits of the first parameter.\n"
15819"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15820"_mm_cvtsi32_sd(__m128d __a, int __b)\n"
15821"{\n"
15822" __a[0] = __b;\n"
15823" return __a;\n"
15824"}\n"
15825"\n"
15826"/// Converts the lower single-precision floating-point element of a\n"
15827"/// 128-bit vector of [4 x float], in the second parameter, into a\n"
15828"/// double-precision floating-point value, returned in the lower 64 bits of\n"
15829"/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n"
15830"/// are copied from the upper 64 bits of the first parameter.\n"
15831"///\n"
15832"/// \\headerfile <x86intrin.h>\n"
15833"///\n"
15834"/// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.\n"
15835"///\n"
15836"/// \\param __a\n"
15837"/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n"
15838"/// copied to the upper 64 bits of the result.\n"
15839"/// \\param __b\n"
15840"/// A 128-bit vector of [4 x float]. The lower single-precision\n"
15841"/// floating-point element is used in the conversion.\n"
15842"/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n"
15843"/// converted value from the second parameter. The upper 64 bits are copied\n"
15844"/// from the upper 64 bits of the first parameter.\n"
15845"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15846"_mm_cvtss_sd(__m128d __a, __m128 __b)\n"
15847"{\n"
15848" __a[0] = __b[0];\n"
15849" return __a;\n"
15850"}\n"
15851"\n"
15852"/// Converts the two double-precision floating-point elements of a\n"
15853"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15854"/// returned in the lower 64 bits of a 128-bit vector of [4 x i32].\n"
15855"///\n"
15856"/// If the result of either conversion is inexact, the result is truncated\n"
15857"/// (rounded towards zero) regardless of the current MXCSR setting. The upper\n"
15858"/// 64 bits of the result vector are set to zero.\n"
15859"///\n"
15860"/// \\headerfile <x86intrin.h>\n"
15861"///\n"
15862"/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>\n"
15863"/// instruction.\n"
15864"///\n"
15865"/// \\param __a\n"
15866"/// A 128-bit vector of [2 x double].\n"
15867"/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n"
15868"/// converted values. The upper 64 bits are set to zero.\n"
15869"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
15870"_mm_cvttpd_epi32(__m128d __a)\n"
15871"{\n"
15872" return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);\n"
15873"}\n"
15874"\n"
15875"/// Converts the low-order element of a [2 x double] vector into a 32-bit\n"
15876"/// signed integer value, truncating the result when it is inexact.\n"
15877"///\n"
15878"/// \\headerfile <x86intrin.h>\n"
15879"///\n"
15880"/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n"
15881"/// instruction.\n"
15882"///\n"
15883"/// \\param __a\n"
15884"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
15885"/// conversion.\n"
15886"/// \\returns A 32-bit signed integer containing the converted value.\n"
15887"static __inline__ int __DEFAULT_FN_ATTRS\n"
15888"_mm_cvttsd_si32(__m128d __a)\n"
15889"{\n"
15890" return __builtin_ia32_cvttsd2si((__v2df)__a);\n"
15891"}\n"
15892"\n"
15893"/// Converts the two double-precision floating-point elements of a\n"
15894"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15895"/// returned in a 64-bit vector of [2 x i32].\n"
15896"///\n"
15897"/// \\headerfile <x86intrin.h>\n"
15898"///\n"
15899"/// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.\n"
15900"///\n"
15901"/// \\param __a\n"
15902"/// A 128-bit vector of [2 x double].\n"
15903"/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n"
15904"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
15905"_mm_cvtpd_pi32(__m128d __a)\n"
15906"{\n"
15907" return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);\n"
15908"}\n"
15909"\n"
15910"/// Converts the two double-precision floating-point elements of a\n"
15911"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15912"/// returned in a 64-bit vector of [2 x i32].\n"
15913"///\n"
15914"/// If the result of either conversion is inexact, the result is truncated\n"
15915"/// (rounded towards zero) regardless of the current MXCSR setting.\n"
15916"///\n"
15917"/// \\headerfile <x86intrin.h>\n"
15918"///\n"
15919"/// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.\n"
15920"///\n"
15921"/// \\param __a\n"
15922"/// A 128-bit vector of [2 x double].\n"
15923"/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n"
15924"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
15925"_mm_cvttpd_pi32(__m128d __a)\n"
15926"{\n"
15927" return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);\n"
15928"}\n"
15929"\n"
15930"/// Converts the two signed 32-bit integer elements of a 64-bit vector of\n"
15931"/// [2 x i32] into two double-precision floating-point values, returned in a\n"
15932"/// 128-bit vector of [2 x double].\n"
15933"///\n"
15934"/// \\headerfile <x86intrin.h>\n"
15935"///\n"
15936"/// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.\n"
15937"///\n"
15938"/// \\param __a\n"
15939"/// A 64-bit vector of [2 x i32].\n"
15940"/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n"
15941"static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX\n"
15942"_mm_cvtpi32_pd(__m64 __a)\n"
15943"{\n"
15944" return __builtin_ia32_cvtpi2pd((__v2si)__a);\n"
15945"}\n"
15946"\n"
15947"/// Returns the low-order element of a 128-bit vector of [2 x double] as\n"
15948"/// a double-precision floating-point value.\n"
15949"///\n"
15950"/// \\headerfile <x86intrin.h>\n"
15951"///\n"
15952"/// This intrinsic has no corresponding instruction.\n"
15953"///\n"
15954"/// \\param __a\n"
15955"/// A 128-bit vector of [2 x double]. The lower 64 bits are returned.\n"
15956"/// \\returns A double-precision floating-point value copied from the lower 64\n"
15957"/// bits of \\a __a.\n"
15958"static __inline__ double __DEFAULT_FN_ATTRS\n"
15959"_mm_cvtsd_f64(__m128d __a)\n"
15960"{\n"
15961" return __a[0];\n"
15962"}\n"
15963"\n"
15964"/// Loads a 128-bit floating-point vector of [2 x double] from an aligned\n"
15965"/// memory location.\n"
15966"///\n"
15967"/// \\headerfile <x86intrin.h>\n"
15968"///\n"
15969"/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.\n"
15970"///\n"
15971"/// \\param __dp\n"
15972"/// A pointer to a 128-bit memory location. The address of the memory\n"
15973"/// location has to be 16-byte aligned.\n"
15974"/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n"
15975"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15976"_mm_load_pd(double const *__dp)\n"
15977"{\n"
15978" return *(__m128d*)__dp;\n"
15979"}\n"
15980"\n"
15981"/// Loads a double-precision floating-point value from a specified memory\n"
15982"/// location and duplicates it to both vector elements of a 128-bit vector of\n"
15983"/// [2 x double].\n"
15984"///\n"
15985"/// \\headerfile <x86intrin.h>\n"
15986"///\n"
15987"/// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.\n"
15988"///\n"
15989"/// \\param __dp\n"
15990"/// A pointer to a memory location containing a double-precision value.\n"
15991"/// \\returns A 128-bit vector of [2 x double] containing the loaded and\n"
15992"/// duplicated values.\n"
15993"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15994"_mm_load1_pd(double const *__dp)\n"
15995"{\n"
15996" struct __mm_load1_pd_struct {\n"
15997" double __u;\n"
15998" } __attribute__((__packed__, __may_alias__));\n"
15999" double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;\n"
16000" return __extension__ (__m128d){ __u, __u };\n"
16001"}\n"
16002"\n"
16003"#define _mm_load_pd1(dp) _mm_load1_pd(dp)\n"
16004"\n"
16005"/// Loads two double-precision values, in reverse order, from an aligned\n"
16006"/// memory location into a 128-bit vector of [2 x double].\n"
16007"///\n"
16008"/// \\headerfile <x86intrin.h>\n"
16009"///\n"
16010"/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +\n"
16011"/// needed shuffling instructions. In AVX mode, the shuffling may be combined\n"
16012"/// with the \\c VMOVAPD, resulting in only a \\c VPERMILPD instruction.\n"
16013"///\n"
16014"/// \\param __dp\n"
16015"/// A 16-byte aligned pointer to an array of double-precision values to be\n"
16016"/// loaded in reverse order.\n"
16017"/// \\returns A 128-bit vector of [2 x double] containing the reversed loaded\n"
16018"/// values.\n"
16019"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16020"_mm_loadr_pd(double const *__dp)\n"
16021"{\n"
16022" __m128d __u = *(__m128d*)__dp;\n"
16023" return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);\n"
16024"}\n"
16025"\n"
16026"/// Loads a 128-bit floating-point vector of [2 x double] from an\n"
16027"/// unaligned memory location.\n"
16028"///\n"
16029"/// \\headerfile <x86intrin.h>\n"
16030"///\n"
16031"/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n"
16032"///\n"
16033"/// \\param __dp\n"
16034"/// A pointer to a 128-bit memory location. The address of the memory\n"
16035"/// location does not have to be aligned.\n"
16036"/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n"
16037"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16038"_mm_loadu_pd(double const *__dp)\n"
16039"{\n"
16040" struct __loadu_pd {\n"
16041" __m128d __v;\n"
16042" } __attribute__((__packed__, __may_alias__));\n"
16043" return ((struct __loadu_pd*)__dp)->__v;\n"
16044"}\n"
16045"\n"
16046"/// Loads a 64-bit integer value to the low element of a 128-bit integer\n"
16047"/// vector and clears the upper element.\n"
16048"///\n"
16049"/// \\headerfile <x86intrin.h>\n"
16050"///\n"
16051"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
16052"///\n"
16053"/// \\param __a\n"
16054"/// A pointer to a 64-bit memory location. The address of the memory\n"
16055"/// location does not have to be aligned.\n"
16056"/// \\returns A 128-bit vector of [2 x i64] containing the loaded value.\n"
16057"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16058"_mm_loadu_si64(void const *__a)\n"
16059"{\n"
16060" struct __loadu_si64 {\n"
16061" long long __v;\n"
16062" } __attribute__((__packed__, __may_alias__));\n"
16063" long long __u = ((struct __loadu_si64*)__a)->__v;\n"
16064" return __extension__ (__m128i)(__v2di){__u, 0LL};\n"
16065"}\n"
16066"\n"
16067"/// Loads a 32-bit integer value to the low element of a 128-bit integer\n"
16068"/// vector and clears the upper element.\n"
16069"///\n"
16070"/// \\headerfile <x86intrin.h>\n"
16071"///\n"
16072"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
16073"///\n"
16074"/// \\param __a\n"
16075"/// A pointer to a 32-bit memory location. The address of the memory\n"
16076"/// location does not have to be aligned.\n"
16077"/// \\returns A 128-bit vector of [4 x i32] containing the loaded value.\n"
16078"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16079"_mm_loadu_si32(void const *__a)\n"
16080"{\n"
16081" struct __loadu_si32 {\n"
16082" int __v;\n"
16083" } __attribute__((__packed__, __may_alias__));\n"
16084" int __u = ((struct __loadu_si32*)__a)->__v;\n"
16085" return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};\n"
16086"}\n"
16087"\n"
16088"/// Loads a 16-bit integer value to the low element of a 128-bit integer\n"
16089"/// vector and clears the upper element.\n"
16090"///\n"
16091"/// \\headerfile <x86intrin.h>\n"
16092"///\n"
16093"/// This intrinsic does not correspond to a specific instruction.\n"
16094"///\n"
16095"/// \\param __a\n"
16096"/// A pointer to a 16-bit memory location. The address of the memory\n"
16097"/// location does not have to be aligned.\n"
16098"/// \\returns A 128-bit vector of [8 x i16] containing the loaded value.\n"
16099"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16100"_mm_loadu_si16(void const *__a)\n"
16101"{\n"
16102" struct __loadu_si16 {\n"
16103" short __v;\n"
16104" } __attribute__((__packed__, __may_alias__));\n"
16105" short __u = ((struct __loadu_si16*)__a)->__v;\n"
16106" return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};\n"
16107"}\n"
16108"\n"
16109"/// Loads a 64-bit double-precision value to the low element of a\n"
16110"/// 128-bit integer vector and clears the upper element.\n"
16111"///\n"
16112"/// \\headerfile <x86intrin.h>\n"
16113"///\n"
16114"/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n"
16115"///\n"
16116"/// \\param __dp\n"
16117"/// A pointer to a memory location containing a double-precision value.\n"
16118"/// The address of the memory location does not have to be aligned.\n"
16119"/// \\returns A 128-bit vector of [2 x double] containing the loaded value.\n"
16120"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16121"_mm_load_sd(double const *__dp)\n"
16122"{\n"
16123" struct __mm_load_sd_struct {\n"
16124" double __u;\n"
16125" } __attribute__((__packed__, __may_alias__));\n"
16126" double __u = ((struct __mm_load_sd_struct*)__dp)->__u;\n"
16127" return __extension__ (__m128d){ __u, 0 };\n"
16128"}\n"
16129"\n"
16130"/// Loads a double-precision value into the high-order bits of a 128-bit\n"
16131"/// vector of [2 x double]. The low-order bits are copied from the low-order\n"
16132"/// bits of the first operand.\n"
16133"///\n"
16134"/// \\headerfile <x86intrin.h>\n"
16135"///\n"
16136"/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n"
16137"///\n"
16138"/// \\param __a\n"
16139"/// A 128-bit vector of [2 x double]. \\n\n"
16140"/// Bits [63:0] are written to bits [63:0] of the result.\n"
16141"/// \\param __dp\n"
16142"/// A pointer to a 64-bit memory location containing a double-precision\n"
16143"/// floating-point value that is loaded. The loaded value is written to bits\n"
16144"/// [127:64] of the result. The address of the memory location does not have\n"
16145"/// to be aligned.\n"
16146"/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n"
16147"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16148"_mm_loadh_pd(__m128d __a, double const *__dp)\n"
16149"{\n"
16150" struct __mm_loadh_pd_struct {\n"
16151" double __u;\n"
16152" } __attribute__((__packed__, __may_alias__));\n"
16153" double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;\n"
16154" return __extension__ (__m128d){ __a[0], __u };\n"
16155"}\n"
16156"\n"
16157"/// Loads a double-precision value into the low-order bits of a 128-bit\n"
16158"/// vector of [2 x double]. The high-order bits are copied from the\n"
16159"/// high-order bits of the first operand.\n"
16160"///\n"
16161"/// \\headerfile <x86intrin.h>\n"
16162"///\n"
16163"/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n"
16164"///\n"
16165"/// \\param __a\n"
16166"/// A 128-bit vector of [2 x double]. \\n\n"
16167"/// Bits [127:64] are written to bits [127:64] of the result.\n"
16168"/// \\param __dp\n"
16169"/// A pointer to a 64-bit memory location containing a double-precision\n"
16170"/// floating-point value that is loaded. The loaded value is written to bits\n"
16171"/// [63:0] of the result. The address of the memory location does not have to\n"
16172"/// be aligned.\n"
16173"/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n"
16174"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16175"_mm_loadl_pd(__m128d __a, double const *__dp)\n"
16176"{\n"
16177" struct __mm_loadl_pd_struct {\n"
16178" double __u;\n"
16179" } __attribute__((__packed__, __may_alias__));\n"
16180" double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;\n"
16181" return __extension__ (__m128d){ __u, __a[1] };\n"
16182"}\n"
16183"\n"
16184"/// Constructs a 128-bit floating-point vector of [2 x double] with\n"
16185"/// unspecified content. This could be used as an argument to another\n"
16186"/// intrinsic function where the argument is required but the value is not\n"
16187"/// actually used.\n"
16188"///\n"
16189"/// \\headerfile <x86intrin.h>\n"
16190"///\n"
16191"/// This intrinsic has no corresponding instruction.\n"
16192"///\n"
16193"/// \\returns A 128-bit floating-point vector of [2 x double] with unspecified\n"
16194"/// content.\n"
16195"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16196"_mm_undefined_pd(void)\n"
16197"{\n"
16198" return (__m128d)__builtin_ia32_undef128();\n"
16199"}\n"
16200"\n"
16201"/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n"
16202"/// 64 bits of the vector are initialized with the specified double-precision\n"
16203"/// floating-point value. The upper 64 bits are set to zero.\n"
16204"///\n"
16205"/// \\headerfile <x86intrin.h>\n"
16206"///\n"
16207"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
16208"///\n"
16209"/// \\param __w\n"
16210"/// A double-precision floating-point value used to initialize the lower 64\n"
16211"/// bits of the result.\n"
16212"/// \\returns An initialized 128-bit floating-point vector of [2 x double]. The\n"
16213"/// lower 64 bits contain the value of the parameter. The upper 64 bits are\n"
16214"/// set to zero.\n"
16215"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16216"_mm_set_sd(double __w)\n"
16217"{\n"
16218" return __extension__ (__m128d){ __w, 0 };\n"
16219"}\n"
16220"\n"
16221"/// Constructs a 128-bit floating-point vector of [2 x double], with each\n"
16222"/// of the two double-precision floating-point vector elements set to the\n"
16223"/// specified double-precision floating-point value.\n"
16224"///\n"
16225"/// \\headerfile <x86intrin.h>\n"
16226"///\n"
16227"/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n"
16228"///\n"
16229"/// \\param __w\n"
16230"/// A double-precision floating-point value used to initialize each vector\n"
16231"/// element of the result.\n"
16232"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16233"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16234"_mm_set1_pd(double __w)\n"
16235"{\n"
16236" return __extension__ (__m128d){ __w, __w };\n"
16237"}\n"
16238"\n"
16239"/// Constructs a 128-bit floating-point vector of [2 x double], with each\n"
16240"/// of the two double-precision floating-point vector elements set to the\n"
16241"/// specified double-precision floating-point value.\n"
16242"///\n"
16243"/// \\headerfile <x86intrin.h>\n"
16244"///\n"
16245"/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n"
16246"///\n"
16247"/// \\param __w\n"
16248"/// A double-precision floating-point value used to initialize each vector\n"
16249"/// element of the result.\n"
16250"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16251"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16252"_mm_set_pd1(double __w)\n"
16253"{\n"
16254" return _mm_set1_pd(__w);\n"
16255"}\n"
16256"\n"
16257"/// Constructs a 128-bit floating-point vector of [2 x double]\n"
16258"/// initialized with the specified double-precision floating-point values.\n"
16259"///\n"
16260"/// \\headerfile <x86intrin.h>\n"
16261"///\n"
16262"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
16263"///\n"
16264"/// \\param __w\n"
16265"/// A double-precision floating-point value used to initialize the upper 64\n"
16266"/// bits of the result.\n"
16267"/// \\param __x\n"
16268"/// A double-precision floating-point value used to initialize the lower 64\n"
16269"/// bits of the result.\n"
16270"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16271"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16272"_mm_set_pd(double __w, double __x)\n"
16273"{\n"
16274" return __extension__ (__m128d){ __x, __w };\n"
16275"}\n"
16276"\n"
16277"/// Constructs a 128-bit floating-point vector of [2 x double],\n"
16278"/// initialized in reverse order with the specified double-precision\n"
16279"/// floating-point values.\n"
16280"///\n"
16281"/// \\headerfile <x86intrin.h>\n"
16282"///\n"
16283"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
16284"///\n"
16285"/// \\param __w\n"
16286"/// A double-precision floating-point value used to initialize the lower 64\n"
16287"/// bits of the result.\n"
16288"/// \\param __x\n"
16289"/// A double-precision floating-point value used to initialize the upper 64\n"
16290"/// bits of the result.\n"
16291"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16292"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16293"_mm_setr_pd(double __w, double __x)\n"
16294"{\n"
16295" return __extension__ (__m128d){ __w, __x };\n"
16296"}\n"
16297"\n"
16298"/// Constructs a 128-bit floating-point vector of [2 x double]\n"
16299"/// initialized to zero.\n"
16300"///\n"
16301"/// \\headerfile <x86intrin.h>\n"
16302"///\n"
16303"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n"
16304"///\n"
16305"/// \\returns An initialized 128-bit floating-point vector of [2 x double] with\n"
16306"/// all elements set to zero.\n"
16307"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16308"_mm_setzero_pd(void)\n"
16309"{\n"
16310" return __extension__ (__m128d){ 0, 0 };\n"
16311"}\n"
16312"\n"
16313"/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n"
16314"/// 64 bits are set to the lower 64 bits of the second parameter. The upper\n"
16315"/// 64 bits are set to the upper 64 bits of the first parameter.\n"
16316"///\n"
16317"/// \\headerfile <x86intrin.h>\n"
16318"///\n"
16319"/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n"
16320"///\n"
16321"/// \\param __a\n"
16322"/// A 128-bit vector of [2 x double]. The upper 64 bits are written to the\n"
16323"/// upper 64 bits of the result.\n"
16324"/// \\param __b\n"
16325"/// A 128-bit vector of [2 x double]. The lower 64 bits are written to the\n"
16326"/// lower 64 bits of the result.\n"
16327"/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n"
16328"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16329"_mm_move_sd(__m128d __a, __m128d __b)\n"
16330"{\n"
16331" __a[0] = __b[0];\n"
16332" return __a;\n"
16333"}\n"
16334"\n"
16335"/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n"
16336"/// memory location.\n"
16337"///\n"
16338"/// \\headerfile <x86intrin.h>\n"
16339"///\n"
16340"/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n"
16341"///\n"
16342"/// \\param __dp\n"
16343"/// A pointer to a 64-bit memory location.\n"
16344"/// \\param __a\n"
16345"/// A 128-bit vector of [2 x double] containing the value to be stored.\n"
16346"static __inline__ void __DEFAULT_FN_ATTRS\n"
16347"_mm_store_sd(double *__dp, __m128d __a)\n"
16348"{\n"
16349" struct __mm_store_sd_struct {\n"
16350" double __u;\n"
16351" } __attribute__((__packed__, __may_alias__));\n"
16352" ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];\n"
16353"}\n"
16354"\n"
16355"/// Moves packed double-precision values from a 128-bit vector of\n"
16356"/// [2 x double] to a memory location.\n"
16357"///\n"
16358"/// \\headerfile <x86intrin.h>\n"
16359"///\n"
16360"/// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction.\n"
16361"///\n"
16362"/// \\param __dp\n"
16363"/// A pointer to an aligned memory location that can store two\n"
16364"/// double-precision values.\n"
16365"/// \\param __a\n"
16366"/// A packed 128-bit vector of [2 x double] containing the values to be\n"
16367"/// moved.\n"
16368"static __inline__ void __DEFAULT_FN_ATTRS\n"
16369"_mm_store_pd(double *__dp, __m128d __a)\n"
16370"{\n"
16371" *(__m128d*)__dp = __a;\n"
16372"}\n"
16373"\n"
16374"/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n"
16375"/// the upper and lower 64 bits of a memory location.\n"
16376"///\n"
16377"/// \\headerfile <x86intrin.h>\n"
16378"///\n"
16379"/// This intrinsic corresponds to the\n"
16380"/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n"
16381"///\n"
16382"/// \\param __dp\n"
16383"/// A pointer to a memory location that can store two double-precision\n"
16384"/// values.\n"
16385"/// \\param __a\n"
16386"/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n"
16387"/// of the values in \\a __dp.\n"
16388"static __inline__ void __DEFAULT_FN_ATTRS\n"
16389"_mm_store1_pd(double *__dp, __m128d __a)\n"
16390"{\n"
16391" __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n"
16392" _mm_store_pd(__dp, __a);\n"
16393"}\n"
16394"\n"
16395"/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n"
16396"/// the upper and lower 64 bits of a memory location.\n"
16397"///\n"
16398"/// \\headerfile <x86intrin.h>\n"
16399"///\n"
16400"/// This intrinsic corresponds to the\n"
16401"/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n"
16402"///\n"
16403"/// \\param __dp\n"
16404"/// A pointer to a memory location that can store two double-precision\n"
16405"/// values.\n"
16406"/// \\param __a\n"
16407"/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n"
16408"/// of the values in \\a __dp.\n"
16409"static __inline__ void __DEFAULT_FN_ATTRS\n"
16410"_mm_store_pd1(double *__dp, __m128d __a)\n"
16411"{\n"
16412" _mm_store1_pd(__dp, __a);\n"
16413"}\n"
16414"\n"
16415"/// Stores a 128-bit vector of [2 x double] into an unaligned memory\n"
16416"/// location.\n"
16417"///\n"
16418"/// \\headerfile <x86intrin.h>\n"
16419"///\n"
16420"/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n"
16421"///\n"
16422"/// \\param __dp\n"
16423"/// A pointer to a 128-bit memory location. The address of the memory\n"
16424"/// location does not have to be aligned.\n"
16425"/// \\param __a\n"
16426"/// A 128-bit vector of [2 x double] containing the values to be stored.\n"
16427"static __inline__ void __DEFAULT_FN_ATTRS\n"
16428"_mm_storeu_pd(double *__dp, __m128d __a)\n"
16429"{\n"
16430" struct __storeu_pd {\n"
16431" __m128d __v;\n"
16432" } __attribute__((__packed__, __may_alias__));\n"
16433" ((struct __storeu_pd*)__dp)->__v = __a;\n"
16434"}\n"
16435"\n"
16436"/// Stores two double-precision values, in reverse order, from a 128-bit\n"
16437"/// vector of [2 x double] to a 16-byte aligned memory location.\n"
16438"///\n"
16439"/// \\headerfile <x86intrin.h>\n"
16440"///\n"
16441"/// This intrinsic corresponds to a shuffling instruction followed by a\n"
16442"/// <c> VMOVAPD / MOVAPD </c> instruction.\n"
16443"///\n"
16444"/// \\param __dp\n"
16445"/// A pointer to a 16-byte aligned memory location that can store two\n"
16446"/// double-precision values.\n"
16447"/// \\param __a\n"
16448"/// A 128-bit vector of [2 x double] containing the values to be reversed and\n"
16449"/// stored.\n"
16450"static __inline__ void __DEFAULT_FN_ATTRS\n"
16451"_mm_storer_pd(double *__dp, __m128d __a)\n"
16452"{\n"
16453" __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);\n"
16454" *(__m128d *)__dp = __a;\n"
16455"}\n"
16456"\n"
16457"/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a\n"
16458"/// memory location.\n"
16459"///\n"
16460"/// \\headerfile <x86intrin.h>\n"
16461"///\n"
16462"/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n"
16463"///\n"
16464"/// \\param __dp\n"
16465"/// A pointer to a 64-bit memory location.\n"
16466"/// \\param __a\n"
16467"/// A 128-bit vector of [2 x double] containing the value to be stored.\n"
16468"static __inline__ void __DEFAULT_FN_ATTRS\n"
16469"_mm_storeh_pd(double *__dp, __m128d __a)\n"
16470"{\n"
16471" struct __mm_storeh_pd_struct {\n"
16472" double __u;\n"
16473" } __attribute__((__packed__, __may_alias__));\n"
16474" ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];\n"
16475"}\n"
16476"\n"
16477"/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n"
16478"/// memory location.\n"
16479"///\n"
16480"/// \\headerfile <x86intrin.h>\n"
16481"///\n"
16482"/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n"
16483"///\n"
16484"/// \\param __dp\n"
16485"/// A pointer to a 64-bit memory location.\n"
16486"/// \\param __a\n"
16487"/// A 128-bit vector of [2 x double] containing the value to be stored.\n"
16488"static __inline__ void __DEFAULT_FN_ATTRS\n"
16489"_mm_storel_pd(double *__dp, __m128d __a)\n"
16490"{\n"
16491" struct __mm_storeh_pd_struct {\n"
16492" double __u;\n"
16493" } __attribute__((__packed__, __may_alias__));\n"
16494" ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];\n"
16495"}\n"
16496"\n"
16497"/// Adds the corresponding elements of two 128-bit vectors of [16 x i8],\n"
16498"/// saving the lower 8 bits of each sum in the corresponding element of a\n"
16499"/// 128-bit result vector of [16 x i8].\n"
16500"///\n"
16501"/// The integer elements of both parameters can be either signed or unsigned.\n"
16502"///\n"
16503"/// \\headerfile <x86intrin.h>\n"
16504"///\n"
16505"/// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.\n"
16506"///\n"
16507"/// \\param __a\n"
16508"/// A 128-bit vector of [16 x i8].\n"
16509"/// \\param __b\n"
16510"/// A 128-bit vector of [16 x i8].\n"
16511"/// \\returns A 128-bit vector of [16 x i8] containing the sums of both\n"
16512"/// parameters.\n"
16513"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16514"_mm_add_epi8(__m128i __a, __m128i __b)\n"
16515"{\n"
16516" return (__m128i)((__v16qu)__a + (__v16qu)__b);\n"
16517"}\n"
16518"\n"
16519"/// Adds the corresponding elements of two 128-bit vectors of [8 x i16],\n"
16520"/// saving the lower 16 bits of each sum in the corresponding element of a\n"
16521"/// 128-bit result vector of [8 x i16].\n"
16522"///\n"
16523"/// The integer elements of both parameters can be either signed or unsigned.\n"
16524"///\n"
16525"/// \\headerfile <x86intrin.h>\n"
16526"///\n"
16527"/// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.\n"
16528"///\n"
16529"/// \\param __a\n"
16530"/// A 128-bit vector of [8 x i16].\n"
16531"/// \\param __b\n"
16532"/// A 128-bit vector of [8 x i16].\n"
16533"/// \\returns A 128-bit vector of [8 x i16] containing the sums of both\n"
16534"/// parameters.\n"
16535"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16536"_mm_add_epi16(__m128i __a, __m128i __b)\n"
16537"{\n"
16538" return (__m128i)((__v8hu)__a + (__v8hu)__b);\n"
16539"}\n"
16540"\n"
16541"/// Adds the corresponding elements of two 128-bit vectors of [4 x i32],\n"
16542"/// saving the lower 32 bits of each sum in the corresponding element of a\n"
16543"/// 128-bit result vector of [4 x i32].\n"
16544"///\n"
16545"/// The integer elements of both parameters can be either signed or unsigned.\n"
16546"///\n"
16547"/// \\headerfile <x86intrin.h>\n"
16548"///\n"
16549"/// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.\n"
16550"///\n"
16551"/// \\param __a\n"
16552"/// A 128-bit vector of [4 x i32].\n"
16553"/// \\param __b\n"
16554"/// A 128-bit vector of [4 x i32].\n"
16555"/// \\returns A 128-bit vector of [4 x i32] containing the sums of both\n"
16556"/// parameters.\n"
16557"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16558"_mm_add_epi32(__m128i __a, __m128i __b)\n"
16559"{\n"
16560" return (__m128i)((__v4su)__a + (__v4su)__b);\n"
16561"}\n"
16562"\n"
16563"/// Adds two signed or unsigned 64-bit integer values, returning the\n"
16564"/// lower 64 bits of the sum.\n"
16565"///\n"
16566"/// \\headerfile <x86intrin.h>\n"
16567"///\n"
16568"/// This intrinsic corresponds to the <c> PADDQ </c> instruction.\n"
16569"///\n"
16570"/// \\param __a\n"
16571"/// A 64-bit integer.\n"
16572"/// \\param __b\n"
16573"/// A 64-bit integer.\n"
16574"/// \\returns A 64-bit integer containing the sum of both parameters.\n"
16575"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
16576"_mm_add_si64(__m64 __a, __m64 __b)\n"
16577"{\n"
16578" return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);\n"
16579"}\n"
16580"\n"
16581"/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],\n"
16582"/// saving the lower 64 bits of each sum in the corresponding element of a\n"
16583"/// 128-bit result vector of [2 x i64].\n"
16584"///\n"
16585"/// The integer elements of both parameters can be either signed or unsigned.\n"
16586"///\n"
16587"/// \\headerfile <x86intrin.h>\n"
16588"///\n"
16589"/// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.\n"
16590"///\n"
16591"/// \\param __a\n"
16592"/// A 128-bit vector of [2 x i64].\n"
16593"/// \\param __b\n"
16594"/// A 128-bit vector of [2 x i64].\n"
16595"/// \\returns A 128-bit vector of [2 x i64] containing the sums of both\n"
16596"/// parameters.\n"
16597"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16598"_mm_add_epi64(__m128i __a, __m128i __b)\n"
16599"{\n"
16600" return (__m128i)((__v2du)__a + (__v2du)__b);\n"
16601"}\n"
16602"\n"
16603"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16604"/// signed [16 x i8] vectors, saving each sum in the corresponding element of\n"
16605"/// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are\n"
16606"/// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80.\n"
16607"///\n"
16608"/// \\headerfile <x86intrin.h>\n"
16609"///\n"
16610"/// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.\n"
16611"///\n"
16612"/// \\param __a\n"
16613"/// A 128-bit signed [16 x i8] vector.\n"
16614"/// \\param __b\n"
16615"/// A 128-bit signed [16 x i8] vector.\n"
16616"/// \\returns A 128-bit signed [16 x i8] vector containing the saturated sums of\n"
16617"/// both parameters.\n"
16618"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16619"_mm_adds_epi8(__m128i __a, __m128i __b)\n"
16620"{\n"
16621" return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);\n"
16622"}\n"
16623"\n"
16624"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16625"/// signed [8 x i16] vectors, saving each sum in the corresponding element of\n"
16626"/// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF\n"
16627"/// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n"
16628"/// 0x8000.\n"
16629"///\n"
16630"/// \\headerfile <x86intrin.h>\n"
16631"///\n"
16632"/// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.\n"
16633"///\n"
16634"/// \\param __a\n"
16635"/// A 128-bit signed [8 x i16] vector.\n"
16636"/// \\param __b\n"
16637"/// A 128-bit signed [8 x i16] vector.\n"
16638"/// \\returns A 128-bit signed [8 x i16] vector containing the saturated sums of\n"
16639"/// both parameters.\n"
16640"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16641"_mm_adds_epi16(__m128i __a, __m128i __b)\n"
16642"{\n"
16643" return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);\n"
16644"}\n"
16645"\n"
16646"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16647"/// unsigned [16 x i8] vectors, saving each sum in the corresponding element\n"
16648"/// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF\n"
16649"/// are saturated to 0xFF. Negative sums are saturated to 0x00.\n"
16650"///\n"
16651"/// \\headerfile <x86intrin.h>\n"
16652"///\n"
16653"/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n"
16654"///\n"
16655"/// \\param __a\n"
16656"/// A 128-bit unsigned [16 x i8] vector.\n"
16657"/// \\param __b\n"
16658"/// A 128-bit unsigned [16 x i8] vector.\n"
16659"/// \\returns A 128-bit unsigned [16 x i8] vector containing the saturated sums\n"
16660"/// of both parameters.\n"
16661"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16662"_mm_adds_epu8(__m128i __a, __m128i __b)\n"
16663"{\n"
16664" return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);\n"
16665"}\n"
16666"\n"
16667"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16668"/// unsigned [8 x i16] vectors, saving each sum in the corresponding element\n"
16669"/// of a 128-bit result vector of [8 x i16]. Positive sums greater than\n"
16670"/// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000.\n"
16671"///\n"
16672"/// \\headerfile <x86intrin.h>\n"
16673"///\n"
16674"/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n"
16675"///\n"
16676"/// \\param __a\n"
16677"/// A 128-bit unsigned [8 x i16] vector.\n"
16678"/// \\param __b\n"
16679"/// A 128-bit unsigned [8 x i16] vector.\n"
16680"/// \\returns A 128-bit unsigned [8 x i16] vector containing the saturated sums\n"
16681"/// of both parameters.\n"
16682"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16683"_mm_adds_epu16(__m128i __a, __m128i __b)\n"
16684"{\n"
16685" return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);\n"
16686"}\n"
16687"\n"
16688"/// Computes the rounded avarages of corresponding elements of two\n"
16689"/// 128-bit unsigned [16 x i8] vectors, saving each result in the\n"
16690"/// corresponding element of a 128-bit result vector of [16 x i8].\n"
16691"///\n"
16692"/// \\headerfile <x86intrin.h>\n"
16693"///\n"
16694"/// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.\n"
16695"///\n"
16696"/// \\param __a\n"
16697"/// A 128-bit unsigned [16 x i8] vector.\n"
16698"/// \\param __b\n"
16699"/// A 128-bit unsigned [16 x i8] vector.\n"
16700"/// \\returns A 128-bit unsigned [16 x i8] vector containing the rounded\n"
16701"/// averages of both parameters.\n"
16702"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16703"_mm_avg_epu8(__m128i __a, __m128i __b)\n"
16704"{\n"
16705" typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n"
16706" return (__m128i)__builtin_convertvector(\n"
16707" ((__builtin_convertvector((__v16qu)__a, __v16hu) +\n"
16708" __builtin_convertvector((__v16qu)__b, __v16hu)) + 1)\n"
16709" >> 1, __v16qu);\n"
16710"}\n"
16711"\n"
16712"/// Computes the rounded avarages of corresponding elements of two\n"
16713"/// 128-bit unsigned [8 x i16] vectors, saving each result in the\n"
16714"/// corresponding element of a 128-bit result vector of [8 x i16].\n"
16715"///\n"
16716"/// \\headerfile <x86intrin.h>\n"
16717"///\n"
16718"/// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.\n"
16719"///\n"
16720"/// \\param __a\n"
16721"/// A 128-bit unsigned [8 x i16] vector.\n"
16722"/// \\param __b\n"
16723"/// A 128-bit unsigned [8 x i16] vector.\n"
16724"/// \\returns A 128-bit unsigned [8 x i16] vector containing the rounded\n"
16725"/// averages of both parameters.\n"
16726"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16727"_mm_avg_epu16(__m128i __a, __m128i __b)\n"
16728"{\n"
16729" typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n"
16730" return (__m128i)__builtin_convertvector(\n"
16731" ((__builtin_convertvector((__v8hu)__a, __v8su) +\n"
16732" __builtin_convertvector((__v8hu)__b, __v8su)) + 1)\n"
16733" >> 1, __v8hu);\n"
16734"}\n"
16735"\n"
16736"/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]\n"
16737"/// vectors, producing eight intermediate 32-bit signed integer products, and\n"
16738"/// adds the consecutive pairs of 32-bit products to form a 128-bit signed\n"
16739"/// [4 x i32] vector.\n"
16740"///\n"
16741"/// For example, bits [15:0] of both parameters are multiplied producing a\n"
16742"/// 32-bit product, bits [31:16] of both parameters are multiplied producing\n"
16743"/// a 32-bit product, and the sum of those two products becomes bits [31:0]\n"
16744"/// of the result.\n"
16745"///\n"
16746"/// \\headerfile <x86intrin.h>\n"
16747"///\n"
16748"/// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.\n"
16749"///\n"
16750"/// \\param __a\n"
16751"/// A 128-bit signed [8 x i16] vector.\n"
16752"/// \\param __b\n"
16753"/// A 128-bit signed [8 x i16] vector.\n"
16754"/// \\returns A 128-bit signed [4 x i32] vector containing the sums of products\n"
16755"/// of both parameters.\n"
16756"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16757"_mm_madd_epi16(__m128i __a, __m128i __b)\n"
16758"{\n"
16759" return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);\n"
16760"}\n"
16761"\n"
16762"/// Compares corresponding elements of two 128-bit signed [8 x i16]\n"
16763"/// vectors, saving the greater value from each comparison in the\n"
16764"/// corresponding element of a 128-bit result vector of [8 x i16].\n"
16765"///\n"
16766"/// \\headerfile <x86intrin.h>\n"
16767"///\n"
16768"/// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.\n"
16769"///\n"
16770"/// \\param __a\n"
16771"/// A 128-bit signed [8 x i16] vector.\n"
16772"/// \\param __b\n"
16773"/// A 128-bit signed [8 x i16] vector.\n"
16774"/// \\returns A 128-bit signed [8 x i16] vector containing the greater value of\n"
16775"/// each comparison.\n"
16776"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16777"_mm_max_epi16(__m128i __a, __m128i __b)\n"
16778"{\n"
16779" return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);\n"
16780"}\n"
16781"\n"
16782"/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n"
16783"/// vectors, saving the greater value from each comparison in the\n"
16784"/// corresponding element of a 128-bit result vector of [16 x i8].\n"
16785"///\n"
16786"/// \\headerfile <x86intrin.h>\n"
16787"///\n"
16788"/// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.\n"
16789"///\n"
16790"/// \\param __a\n"
16791"/// A 128-bit unsigned [16 x i8] vector.\n"
16792"/// \\param __b\n"
16793"/// A 128-bit unsigned [16 x i8] vector.\n"
16794"/// \\returns A 128-bit unsigned [16 x i8] vector containing the greater value of\n"
16795"/// each comparison.\n"
16796"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16797"_mm_max_epu8(__m128i __a, __m128i __b)\n"
16798"{\n"
16799" return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);\n"
16800"}\n"
16801"\n"
16802"/// Compares corresponding elements of two 128-bit signed [8 x i16]\n"
16803"/// vectors, saving the smaller value from each comparison in the\n"
16804"/// corresponding element of a 128-bit result vector of [8 x i16].\n"
16805"///\n"
16806"/// \\headerfile <x86intrin.h>\n"
16807"///\n"
16808"/// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.\n"
16809"///\n"
16810"/// \\param __a\n"
16811"/// A 128-bit signed [8 x i16] vector.\n"
16812"/// \\param __b\n"
16813"/// A 128-bit signed [8 x i16] vector.\n"
16814"/// \\returns A 128-bit signed [8 x i16] vector containing the smaller value of\n"
16815"/// each comparison.\n"
16816"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16817"_mm_min_epi16(__m128i __a, __m128i __b)\n"
16818"{\n"
16819" return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);\n"
16820"}\n"
16821"\n"
16822"/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n"
16823"/// vectors, saving the smaller value from each comparison in the\n"
16824"/// corresponding element of a 128-bit result vector of [16 x i8].\n"
16825"///\n"
16826"/// \\headerfile <x86intrin.h>\n"
16827"///\n"
16828"/// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.\n"
16829"///\n"
16830"/// \\param __a\n"
16831"/// A 128-bit unsigned [16 x i8] vector.\n"
16832"/// \\param __b\n"
16833"/// A 128-bit unsigned [16 x i8] vector.\n"
16834"/// \\returns A 128-bit unsigned [16 x i8] vector containing the smaller value of\n"
16835"/// each comparison.\n"
16836"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16837"_mm_min_epu8(__m128i __a, __m128i __b)\n"
16838"{\n"
16839" return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);\n"
16840"}\n"
16841"\n"
16842"/// Multiplies the corresponding elements of two signed [8 x i16]\n"
16843"/// vectors, saving the upper 16 bits of each 32-bit product in the\n"
16844"/// corresponding element of a 128-bit signed [8 x i16] result vector.\n"
16845"///\n"
16846"/// \\headerfile <x86intrin.h>\n"
16847"///\n"
16848"/// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.\n"
16849"///\n"
16850"/// \\param __a\n"
16851"/// A 128-bit signed [8 x i16] vector.\n"
16852"/// \\param __b\n"
16853"/// A 128-bit signed [8 x i16] vector.\n"
16854"/// \\returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of\n"
16855"/// each of the eight 32-bit products.\n"
16856"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16857"_mm_mulhi_epi16(__m128i __a, __m128i __b)\n"
16858"{\n"
16859" return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);\n"
16860"}\n"
16861"\n"
16862"/// Multiplies the corresponding elements of two unsigned [8 x i16]\n"
16863"/// vectors, saving the upper 16 bits of each 32-bit product in the\n"
16864"/// corresponding element of a 128-bit unsigned [8 x i16] result vector.\n"
16865"///\n"
16866"/// \\headerfile <x86intrin.h>\n"
16867"///\n"
16868"/// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.\n"
16869"///\n"
16870"/// \\param __a\n"
16871"/// A 128-bit unsigned [8 x i16] vector.\n"
16872"/// \\param __b\n"
16873"/// A 128-bit unsigned [8 x i16] vector.\n"
16874"/// \\returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits\n"
16875"/// of each of the eight 32-bit products.\n"
16876"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16877"_mm_mulhi_epu16(__m128i __a, __m128i __b)\n"
16878"{\n"
16879" return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);\n"
16880"}\n"
16881"\n"
16882"/// Multiplies the corresponding elements of two signed [8 x i16]\n"
16883"/// vectors, saving the lower 16 bits of each 32-bit product in the\n"
16884"/// corresponding element of a 128-bit signed [8 x i16] result vector.\n"
16885"///\n"
16886"/// \\headerfile <x86intrin.h>\n"
16887"///\n"
16888"/// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.\n"
16889"///\n"
16890"/// \\param __a\n"
16891"/// A 128-bit signed [8 x i16] vector.\n"
16892"/// \\param __b\n"
16893"/// A 128-bit signed [8 x i16] vector.\n"
16894"/// \\returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of\n"
16895"/// each of the eight 32-bit products.\n"
16896"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16897"_mm_mullo_epi16(__m128i __a, __m128i __b)\n"
16898"{\n"
16899" return (__m128i)((__v8hu)__a * (__v8hu)__b);\n"
16900"}\n"
16901"\n"
16902"/// Multiplies 32-bit unsigned integer values contained in the lower bits\n"
16903"/// of the two 64-bit integer vectors and returns the 64-bit unsigned\n"
16904"/// product.\n"
16905"///\n"
16906"/// \\headerfile <x86intrin.h>\n"
16907"///\n"
16908"/// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.\n"
16909"///\n"
16910"/// \\param __a\n"
16911"/// A 64-bit integer containing one of the source operands.\n"
16912"/// \\param __b\n"
16913"/// A 64-bit integer containing one of the source operands.\n"
16914"/// \\returns A 64-bit integer vector containing the product of both operands.\n"
16915"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
16916"_mm_mul_su32(__m64 __a, __m64 __b)\n"
16917"{\n"
16918" return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);\n"
16919"}\n"
16920"\n"
16921"/// Multiplies 32-bit unsigned integer values contained in the lower\n"
16922"/// bits of the corresponding elements of two [2 x i64] vectors, and returns\n"
16923"/// the 64-bit products in the corresponding elements of a [2 x i64] vector.\n"
16924"///\n"
16925"/// \\headerfile <x86intrin.h>\n"
16926"///\n"
16927"/// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.\n"
16928"///\n"
16929"/// \\param __a\n"
16930"/// A [2 x i64] vector containing one of the source operands.\n"
16931"/// \\param __b\n"
16932"/// A [2 x i64] vector containing one of the source operands.\n"
16933"/// \\returns A [2 x i64] vector containing the product of both operands.\n"
16934"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16935"_mm_mul_epu32(__m128i __a, __m128i __b)\n"
16936"{\n"
16937" return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);\n"
16938"}\n"
16939"\n"
16940"/// Computes the absolute differences of corresponding 8-bit integer\n"
16941"/// values in two 128-bit vectors. Sums the first 8 absolute differences, and\n"
16942"/// separately sums the second 8 absolute differences. Packs these two\n"
16943"/// unsigned 16-bit integer sums into the upper and lower elements of a\n"
16944"/// [2 x i64] vector.\n"
16945"///\n"
16946"/// \\headerfile <x86intrin.h>\n"
16947"///\n"
16948"/// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.\n"
16949"///\n"
16950"/// \\param __a\n"
16951"/// A 128-bit integer vector containing one of the source operands.\n"
16952"/// \\param __b\n"
16953"/// A 128-bit integer vector containing one of the source operands.\n"
16954"/// \\returns A [2 x i64] vector containing the sums of the sets of absolute\n"
16955"/// differences between both operands.\n"
16956"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16957"_mm_sad_epu8(__m128i __a, __m128i __b)\n"
16958"{\n"
16959" return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);\n"
16960"}\n"
16961"\n"
16962"/// Subtracts the corresponding 8-bit integer values in the operands.\n"
16963"///\n"
16964"/// \\headerfile <x86intrin.h>\n"
16965"///\n"
16966"/// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.\n"
16967"///\n"
16968"/// \\param __a\n"
16969"/// A 128-bit integer vector containing the minuends.\n"
16970"/// \\param __b\n"
16971"/// A 128-bit integer vector containing the subtrahends.\n"
16972"/// \\returns A 128-bit integer vector containing the differences of the values\n"
16973"/// in the operands.\n"
16974"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16975"_mm_sub_epi8(__m128i __a, __m128i __b)\n"
16976"{\n"
16977" return (__m128i)((__v16qu)__a - (__v16qu)__b);\n"
16978"}\n"
16979"\n"
16980"/// Subtracts the corresponding 16-bit integer values in the operands.\n"
16981"///\n"
16982"/// \\headerfile <x86intrin.h>\n"
16983"///\n"
16984"/// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.\n"
16985"///\n"
16986"/// \\param __a\n"
16987"/// A 128-bit integer vector containing the minuends.\n"
16988"/// \\param __b\n"
16989"/// A 128-bit integer vector containing the subtrahends.\n"
16990"/// \\returns A 128-bit integer vector containing the differences of the values\n"
16991"/// in the operands.\n"
16992"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16993"_mm_sub_epi16(__m128i __a, __m128i __b)\n"
16994"{\n"
16995" return (__m128i)((__v8hu)__a - (__v8hu)__b);\n"
16996"}\n"
16997"\n"
16998"/// Subtracts the corresponding 32-bit integer values in the operands.\n"
16999"///\n"
17000"/// \\headerfile <x86intrin.h>\n"
17001"///\n"
17002"/// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.\n"
17003"///\n"
17004"/// \\param __a\n"
17005"/// A 128-bit integer vector containing the minuends.\n"
17006"/// \\param __b\n"
17007"/// A 128-bit integer vector containing the subtrahends.\n"
17008"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17009"/// in the operands.\n"
17010"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17011"_mm_sub_epi32(__m128i __a, __m128i __b)\n"
17012"{\n"
17013" return (__m128i)((__v4su)__a - (__v4su)__b);\n"
17014"}\n"
17015"\n"
17016"/// Subtracts signed or unsigned 64-bit integer values and writes the\n"
17017"/// difference to the corresponding bits in the destination.\n"
17018"///\n"
17019"/// \\headerfile <x86intrin.h>\n"
17020"///\n"
17021"/// This intrinsic corresponds to the <c> PSUBQ </c> instruction.\n"
17022"///\n"
17023"/// \\param __a\n"
17024"/// A 64-bit integer vector containing the minuend.\n"
17025"/// \\param __b\n"
17026"/// A 64-bit integer vector containing the subtrahend.\n"
17027"/// \\returns A 64-bit integer vector containing the difference of the values in\n"
17028"/// the operands.\n"
17029"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
17030"_mm_sub_si64(__m64 __a, __m64 __b)\n"
17031"{\n"
17032" return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);\n"
17033"}\n"
17034"\n"
17035"/// Subtracts the corresponding elements of two [2 x i64] vectors.\n"
17036"///\n"
17037"/// \\headerfile <x86intrin.h>\n"
17038"///\n"
17039"/// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.\n"
17040"///\n"
17041"/// \\param __a\n"
17042"/// A 128-bit integer vector containing the minuends.\n"
17043"/// \\param __b\n"
17044"/// A 128-bit integer vector containing the subtrahends.\n"
17045"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17046"/// in the operands.\n"
17047"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17048"_mm_sub_epi64(__m128i __a, __m128i __b)\n"
17049"{\n"
17050" return (__m128i)((__v2du)__a - (__v2du)__b);\n"
17051"}\n"
17052"\n"
17053"/// Subtracts corresponding 8-bit signed integer values in the input and\n"
17054"/// returns the differences in the corresponding bytes in the destination.\n"
17055"/// Differences greater than 0x7F are saturated to 0x7F, and differences less\n"
17056"/// than 0x80 are saturated to 0x80.\n"
17057"///\n"
17058"/// \\headerfile <x86intrin.h>\n"
17059"///\n"
17060"/// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.\n"
17061"///\n"
17062"/// \\param __a\n"
17063"/// A 128-bit integer vector containing the minuends.\n"
17064"/// \\param __b\n"
17065"/// A 128-bit integer vector containing the subtrahends.\n"
17066"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17067"/// in the operands.\n"
17068"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17069"_mm_subs_epi8(__m128i __a, __m128i __b)\n"
17070"{\n"
17071" return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);\n"
17072"}\n"
17073"\n"
17074"/// Subtracts corresponding 16-bit signed integer values in the input and\n"
17075"/// returns the differences in the corresponding bytes in the destination.\n"
17076"/// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less\n"
17077"/// than 0x8000 are saturated to 0x8000.\n"
17078"///\n"
17079"/// \\headerfile <x86intrin.h>\n"
17080"///\n"
17081"/// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.\n"
17082"///\n"
17083"/// \\param __a\n"
17084"/// A 128-bit integer vector containing the minuends.\n"
17085"/// \\param __b\n"
17086"/// A 128-bit integer vector containing the subtrahends.\n"
17087"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17088"/// in the operands.\n"
17089"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17090"_mm_subs_epi16(__m128i __a, __m128i __b)\n"
17091"{\n"
17092" return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);\n"
17093"}\n"
17094"\n"
17095"/// Subtracts corresponding 8-bit unsigned integer values in the input\n"
17096"/// and returns the differences in the corresponding bytes in the\n"
17097"/// destination. Differences less than 0x00 are saturated to 0x00.\n"
17098"///\n"
17099"/// \\headerfile <x86intrin.h>\n"
17100"///\n"
17101"/// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.\n"
17102"///\n"
17103"/// \\param __a\n"
17104"/// A 128-bit integer vector containing the minuends.\n"
17105"/// \\param __b\n"
17106"/// A 128-bit integer vector containing the subtrahends.\n"
17107"/// \\returns A 128-bit integer vector containing the unsigned integer\n"
17108"/// differences of the values in the operands.\n"
17109"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17110"_mm_subs_epu8(__m128i __a, __m128i __b)\n"
17111"{\n"
17112" return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);\n"
17113"}\n"
17114"\n"
17115"/// Subtracts corresponding 16-bit unsigned integer values in the input\n"
17116"/// and returns the differences in the corresponding bytes in the\n"
17117"/// destination. Differences less than 0x0000 are saturated to 0x0000.\n"
17118"///\n"
17119"/// \\headerfile <x86intrin.h>\n"
17120"///\n"
17121"/// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.\n"
17122"///\n"
17123"/// \\param __a\n"
17124"/// A 128-bit integer vector containing the minuends.\n"
17125"/// \\param __b\n"
17126"/// A 128-bit integer vector containing the subtrahends.\n"
17127"/// \\returns A 128-bit integer vector containing the unsigned integer\n"
17128"/// differences of the values in the operands.\n"
17129"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17130"_mm_subs_epu16(__m128i __a, __m128i __b)\n"
17131"{\n"
17132" return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);\n"
17133"}\n"
17134"\n"
17135"/// Performs a bitwise AND of two 128-bit integer vectors.\n"
17136"///\n"
17137"/// \\headerfile <x86intrin.h>\n"
17138"///\n"
17139"/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n"
17140"///\n"
17141"/// \\param __a\n"
17142"/// A 128-bit integer vector containing one of the source operands.\n"
17143"/// \\param __b\n"
17144"/// A 128-bit integer vector containing one of the source operands.\n"
17145"/// \\returns A 128-bit integer vector containing the bitwise AND of the values\n"
17146"/// in both operands.\n"
17147"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17148"_mm_and_si128(__m128i __a, __m128i __b)\n"
17149"{\n"
17150" return (__m128i)((__v2du)__a & (__v2du)__b);\n"
17151"}\n"
17152"\n"
17153"/// Performs a bitwise AND of two 128-bit integer vectors, using the\n"
17154"/// one's complement of the values contained in the first source operand.\n"
17155"///\n"
17156"/// \\headerfile <x86intrin.h>\n"
17157"///\n"
17158"/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n"
17159"///\n"
17160"/// \\param __a\n"
17161"/// A 128-bit vector containing the left source operand. The one's complement\n"
17162"/// of this value is used in the bitwise AND.\n"
17163"/// \\param __b\n"
17164"/// A 128-bit vector containing the right source operand.\n"
17165"/// \\returns A 128-bit integer vector containing the bitwise AND of the one's\n"
17166"/// complement of the first operand and the values in the second operand.\n"
17167"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17168"_mm_andnot_si128(__m128i __a, __m128i __b)\n"
17169"{\n"
17170" return (__m128i)(~(__v2du)__a & (__v2du)__b);\n"
17171"}\n"
17172"/// Performs a bitwise OR of two 128-bit integer vectors.\n"
17173"///\n"
17174"/// \\headerfile <x86intrin.h>\n"
17175"///\n"
17176"/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n"
17177"///\n"
17178"/// \\param __a\n"
17179"/// A 128-bit integer vector containing one of the source operands.\n"
17180"/// \\param __b\n"
17181"/// A 128-bit integer vector containing one of the source operands.\n"
17182"/// \\returns A 128-bit integer vector containing the bitwise OR of the values\n"
17183"/// in both operands.\n"
17184"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17185"_mm_or_si128(__m128i __a, __m128i __b)\n"
17186"{\n"
17187" return (__m128i)((__v2du)__a | (__v2du)__b);\n"
17188"}\n"
17189"\n"
17190"/// Performs a bitwise exclusive OR of two 128-bit integer vectors.\n"
17191"///\n"
17192"/// \\headerfile <x86intrin.h>\n"
17193"///\n"
17194"/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n"
17195"///\n"
17196"/// \\param __a\n"
17197"/// A 128-bit integer vector containing one of the source operands.\n"
17198"/// \\param __b\n"
17199"/// A 128-bit integer vector containing one of the source operands.\n"
17200"/// \\returns A 128-bit integer vector containing the bitwise exclusive OR of the\n"
17201"/// values in both operands.\n"
17202"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17203"_mm_xor_si128(__m128i __a, __m128i __b)\n"
17204"{\n"
17205" return (__m128i)((__v2du)__a ^ (__v2du)__b);\n"
17206"}\n"
17207"\n"
17208"/// Left-shifts the 128-bit integer vector operand by the specified\n"
17209"/// number of bytes. Low-order bits are cleared.\n"
17210"///\n"
17211"/// \\headerfile <x86intrin.h>\n"
17212"///\n"
17213"/// \\code\n"
17214"/// __m128i _mm_slli_si128(__m128i a, const int imm);\n"
17215"/// \\endcode\n"
17216"///\n"
17217"/// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.\n"
17218"///\n"
17219"/// \\param a\n"
17220"/// A 128-bit integer vector containing the source operand.\n"
17221"/// \\param imm\n"
17222"/// An immediate value specifying the number of bytes to left-shift operand\n"
17223"/// \\a a.\n"
17224"/// \\returns A 128-bit integer vector containing the left-shifted value.\n"
17225"#define _mm_slli_si128(a, imm) \\\n"
17226" (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17227"\n"
17228"#define _mm_bslli_si128(a, imm) \\\n"
17229" (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17230"\n"
17231"/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n"
17232"/// by the specified number of bits. Low-order bits are cleared.\n"
17233"///\n"
17234"/// \\headerfile <x86intrin.h>\n"
17235"///\n"
17236"/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n"
17237"///\n"
17238"/// \\param __a\n"
17239"/// A 128-bit integer vector containing the source operand.\n"
17240"/// \\param __count\n"
17241"/// An integer value specifying the number of bits to left-shift each value\n"
17242"/// in operand \\a __a.\n"
17243"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17244"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17245"_mm_slli_epi16(__m128i __a, int __count)\n"
17246"{\n"
17247" return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);\n"
17248"}\n"
17249"\n"
17250"/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n"
17251"/// by the specified number of bits. Low-order bits are cleared.\n"
17252"///\n"
17253"/// \\headerfile <x86intrin.h>\n"
17254"///\n"
17255"/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n"
17256"///\n"
17257"/// \\param __a\n"
17258"/// A 128-bit integer vector containing the source operand.\n"
17259"/// \\param __count\n"
17260"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17261"/// to left-shift each value in operand \\a __a.\n"
17262"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17263"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17264"_mm_sll_epi16(__m128i __a, __m128i __count)\n"
17265"{\n"
17266" return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);\n"
17267"}\n"
17268"\n"
17269"/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n"
17270"/// by the specified number of bits. Low-order bits are cleared.\n"
17271"///\n"
17272"/// \\headerfile <x86intrin.h>\n"
17273"///\n"
17274"/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n"
17275"///\n"
17276"/// \\param __a\n"
17277"/// A 128-bit integer vector containing the source operand.\n"
17278"/// \\param __count\n"
17279"/// An integer value specifying the number of bits to left-shift each value\n"
17280"/// in operand \\a __a.\n"
17281"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17282"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17283"_mm_slli_epi32(__m128i __a, int __count)\n"
17284"{\n"
17285" return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);\n"
17286"}\n"
17287"\n"
17288"/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n"
17289"/// by the specified number of bits. Low-order bits are cleared.\n"
17290"///\n"
17291"/// \\headerfile <x86intrin.h>\n"
17292"///\n"
17293"/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n"
17294"///\n"
17295"/// \\param __a\n"
17296"/// A 128-bit integer vector containing the source operand.\n"
17297"/// \\param __count\n"
17298"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17299"/// to left-shift each value in operand \\a __a.\n"
17300"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17301"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17302"_mm_sll_epi32(__m128i __a, __m128i __count)\n"
17303"{\n"
17304" return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);\n"
17305"}\n"
17306"\n"
17307"/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n"
17308"/// by the specified number of bits. Low-order bits are cleared.\n"
17309"///\n"
17310"/// \\headerfile <x86intrin.h>\n"
17311"///\n"
17312"/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n"
17313"///\n"
17314"/// \\param __a\n"
17315"/// A 128-bit integer vector containing the source operand.\n"
17316"/// \\param __count\n"
17317"/// An integer value specifying the number of bits to left-shift each value\n"
17318"/// in operand \\a __a.\n"
17319"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17320"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17321"_mm_slli_epi64(__m128i __a, int __count)\n"
17322"{\n"
17323" return __builtin_ia32_psllqi128((__v2di)__a, __count);\n"
17324"}\n"
17325"\n"
17326"/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n"
17327"/// by the specified number of bits. Low-order bits are cleared.\n"
17328"///\n"
17329"/// \\headerfile <x86intrin.h>\n"
17330"///\n"
17331"/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n"
17332"///\n"
17333"/// \\param __a\n"
17334"/// A 128-bit integer vector containing the source operand.\n"
17335"/// \\param __count\n"
17336"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17337"/// to left-shift each value in operand \\a __a.\n"
17338"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17339"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17340"_mm_sll_epi64(__m128i __a, __m128i __count)\n"
17341"{\n"
17342" return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);\n"
17343"}\n"
17344"\n"
17345"/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n"
17346"/// by the specified number of bits. High-order bits are filled with the sign\n"
17347"/// bit of the initial value.\n"
17348"///\n"
17349"/// \\headerfile <x86intrin.h>\n"
17350"///\n"
17351"/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n"
17352"///\n"
17353"/// \\param __a\n"
17354"/// A 128-bit integer vector containing the source operand.\n"
17355"/// \\param __count\n"
17356"/// An integer value specifying the number of bits to right-shift each value\n"
17357"/// in operand \\a __a.\n"
17358"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17359"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17360"_mm_srai_epi16(__m128i __a, int __count)\n"
17361"{\n"
17362" return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);\n"
17363"}\n"
17364"\n"
17365"/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n"
17366"/// by the specified number of bits. High-order bits are filled with the sign\n"
17367"/// bit of the initial value.\n"
17368"///\n"
17369"/// \\headerfile <x86intrin.h>\n"
17370"///\n"
17371"/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n"
17372"///\n"
17373"/// \\param __a\n"
17374"/// A 128-bit integer vector containing the source operand.\n"
17375"/// \\param __count\n"
17376"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17377"/// to right-shift each value in operand \\a __a.\n"
17378"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17379"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17380"_mm_sra_epi16(__m128i __a, __m128i __count)\n"
17381"{\n"
17382" return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);\n"
17383"}\n"
17384"\n"
17385"/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n"
17386"/// by the specified number of bits. High-order bits are filled with the sign\n"
17387"/// bit of the initial value.\n"
17388"///\n"
17389"/// \\headerfile <x86intrin.h>\n"
17390"///\n"
17391"/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n"
17392"///\n"
17393"/// \\param __a\n"
17394"/// A 128-bit integer vector containing the source operand.\n"
17395"/// \\param __count\n"
17396"/// An integer value specifying the number of bits to right-shift each value\n"
17397"/// in operand \\a __a.\n"
17398"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17399"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17400"_mm_srai_epi32(__m128i __a, int __count)\n"
17401"{\n"
17402" return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);\n"
17403"}\n"
17404"\n"
17405"/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n"
17406"/// by the specified number of bits. High-order bits are filled with the sign\n"
17407"/// bit of the initial value.\n"
17408"///\n"
17409"/// \\headerfile <x86intrin.h>\n"
17410"///\n"
17411"/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n"
17412"///\n"
17413"/// \\param __a\n"
17414"/// A 128-bit integer vector containing the source operand.\n"
17415"/// \\param __count\n"
17416"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17417"/// to right-shift each value in operand \\a __a.\n"
17418"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17419"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17420"_mm_sra_epi32(__m128i __a, __m128i __count)\n"
17421"{\n"
17422" return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);\n"
17423"}\n"
17424"\n"
17425"/// Right-shifts the 128-bit integer vector operand by the specified\n"
17426"/// number of bytes. High-order bits are cleared.\n"
17427"///\n"
17428"/// \\headerfile <x86intrin.h>\n"
17429"///\n"
17430"/// \\code\n"
17431"/// __m128i _mm_srli_si128(__m128i a, const int imm);\n"
17432"/// \\endcode\n"
17433"///\n"
17434"/// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.\n"
17435"///\n"
17436"/// \\param a\n"
17437"/// A 128-bit integer vector containing the source operand.\n"
17438"/// \\param imm\n"
17439"/// An immediate value specifying the number of bytes to right-shift operand\n"
17440"/// \\a a.\n"
17441"/// \\returns A 128-bit integer vector containing the right-shifted value.\n"
17442"#define _mm_srli_si128(a, imm) \\\n"
17443" (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17444"\n"
17445"#define _mm_bsrli_si128(a, imm) \\\n"
17446" (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17447"\n"
17448"/// Right-shifts each of 16-bit values in the 128-bit integer vector\n"
17449"/// operand by the specified number of bits. High-order bits are cleared.\n"
17450"///\n"
17451"/// \\headerfile <x86intrin.h>\n"
17452"///\n"
17453"/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n"
17454"///\n"
17455"/// \\param __a\n"
17456"/// A 128-bit integer vector containing the source operand.\n"
17457"/// \\param __count\n"
17458"/// An integer value specifying the number of bits to right-shift each value\n"
17459"/// in operand \\a __a.\n"
17460"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17461"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17462"_mm_srli_epi16(__m128i __a, int __count)\n"
17463"{\n"
17464" return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);\n"
17465"}\n"
17466"\n"
17467"/// Right-shifts each of 16-bit values in the 128-bit integer vector\n"
17468"/// operand by the specified number of bits. High-order bits are cleared.\n"
17469"///\n"
17470"/// \\headerfile <x86intrin.h>\n"
17471"///\n"
17472"/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n"
17473"///\n"
17474"/// \\param __a\n"
17475"/// A 128-bit integer vector containing the source operand.\n"
17476"/// \\param __count\n"
17477"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17478"/// to right-shift each value in operand \\a __a.\n"
17479"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17480"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17481"_mm_srl_epi16(__m128i __a, __m128i __count)\n"
17482"{\n"
17483" return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);\n"
17484"}\n"
17485"\n"
17486"/// Right-shifts each of 32-bit values in the 128-bit integer vector\n"
17487"/// operand by the specified number of bits. High-order bits are cleared.\n"
17488"///\n"
17489"/// \\headerfile <x86intrin.h>\n"
17490"///\n"
17491"/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n"
17492"///\n"
17493"/// \\param __a\n"
17494"/// A 128-bit integer vector containing the source operand.\n"
17495"/// \\param __count\n"
17496"/// An integer value specifying the number of bits to right-shift each value\n"
17497"/// in operand \\a __a.\n"
17498"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17499"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17500"_mm_srli_epi32(__m128i __a, int __count)\n"
17501"{\n"
17502" return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);\n"
17503"}\n"
17504"\n"
17505"/// Right-shifts each of 32-bit values in the 128-bit integer vector\n"
17506"/// operand by the specified number of bits. High-order bits are cleared.\n"
17507"///\n"
17508"/// \\headerfile <x86intrin.h>\n"
17509"///\n"
17510"/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n"
17511"///\n"
17512"/// \\param __a\n"
17513"/// A 128-bit integer vector containing the source operand.\n"
17514"/// \\param __count\n"
17515"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17516"/// to right-shift each value in operand \\a __a.\n"
17517"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17518"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17519"_mm_srl_epi32(__m128i __a, __m128i __count)\n"
17520"{\n"
17521" return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);\n"
17522"}\n"
17523"\n"
17524"/// Right-shifts each of 64-bit values in the 128-bit integer vector\n"
17525"/// operand by the specified number of bits. High-order bits are cleared.\n"
17526"///\n"
17527"/// \\headerfile <x86intrin.h>\n"
17528"///\n"
17529"/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n"
17530"///\n"
17531"/// \\param __a\n"
17532"/// A 128-bit integer vector containing the source operand.\n"
17533"/// \\param __count\n"
17534"/// An integer value specifying the number of bits to right-shift each value\n"
17535"/// in operand \\a __a.\n"
17536"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17537"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17538"_mm_srli_epi64(__m128i __a, int __count)\n"
17539"{\n"
17540" return __builtin_ia32_psrlqi128((__v2di)__a, __count);\n"
17541"}\n"
17542"\n"
17543"/// Right-shifts each of 64-bit values in the 128-bit integer vector\n"
17544"/// operand by the specified number of bits. High-order bits are cleared.\n"
17545"///\n"
17546"/// \\headerfile <x86intrin.h>\n"
17547"///\n"
17548"/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n"
17549"///\n"
17550"/// \\param __a\n"
17551"/// A 128-bit integer vector containing the source operand.\n"
17552"/// \\param __count\n"
17553"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17554"/// to right-shift each value in operand \\a __a.\n"
17555"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17556"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17557"_mm_srl_epi64(__m128i __a, __m128i __count)\n"
17558"{\n"
17559" return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);\n"
17560"}\n"
17561"\n"
17562"/// Compares each of the corresponding 8-bit values of the 128-bit\n"
17563"/// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF\n"
17564"/// for true.\n"
17565"///\n"
17566"/// \\headerfile <x86intrin.h>\n"
17567"///\n"
17568"/// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.\n"
17569"///\n"
17570"/// \\param __a\n"
17571"/// A 128-bit integer vector.\n"
17572"/// \\param __b\n"
17573"/// A 128-bit integer vector.\n"
17574"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17575"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17576"_mm_cmpeq_epi8(__m128i __a, __m128i __b)\n"
17577"{\n"
17578" return (__m128i)((__v16qi)__a == (__v16qi)__b);\n"
17579"}\n"
17580"\n"
17581"/// Compares each of the corresponding 16-bit values of the 128-bit\n"
17582"/// integer vectors for equality. Each comparison yields 0x0 for false,\n"
17583"/// 0xFFFF for true.\n"
17584"///\n"
17585"/// \\headerfile <x86intrin.h>\n"
17586"///\n"
17587"/// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.\n"
17588"///\n"
17589"/// \\param __a\n"
17590"/// A 128-bit integer vector.\n"
17591"/// \\param __b\n"
17592"/// A 128-bit integer vector.\n"
17593"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17594"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17595"_mm_cmpeq_epi16(__m128i __a, __m128i __b)\n"
17596"{\n"
17597" return (__m128i)((__v8hi)__a == (__v8hi)__b);\n"
17598"}\n"
17599"\n"
17600"/// Compares each of the corresponding 32-bit values of the 128-bit\n"
17601"/// integer vectors for equality. Each comparison yields 0x0 for false,\n"
17602"/// 0xFFFFFFFF for true.\n"
17603"///\n"
17604"/// \\headerfile <x86intrin.h>\n"
17605"///\n"
17606"/// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.\n"
17607"///\n"
17608"/// \\param __a\n"
17609"/// A 128-bit integer vector.\n"
17610"/// \\param __b\n"
17611"/// A 128-bit integer vector.\n"
17612"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17613"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17614"_mm_cmpeq_epi32(__m128i __a, __m128i __b)\n"
17615"{\n"
17616" return (__m128i)((__v4si)__a == (__v4si)__b);\n"
17617"}\n"
17618"\n"
17619"/// Compares each of the corresponding signed 8-bit values of the 128-bit\n"
17620"/// integer vectors to determine if the values in the first operand are\n"
17621"/// greater than those in the second operand. Each comparison yields 0x0 for\n"
17622"/// false, 0xFF for true.\n"
17623"///\n"
17624"/// \\headerfile <x86intrin.h>\n"
17625"///\n"
17626"/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n"
17627"///\n"
17628"/// \\param __a\n"
17629"/// A 128-bit integer vector.\n"
17630"/// \\param __b\n"
17631"/// A 128-bit integer vector.\n"
17632"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17633"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17634"_mm_cmpgt_epi8(__m128i __a, __m128i __b)\n"
17635"{\n"
17636" /* This function always performs a signed comparison, but __v16qi is a char\n"
17637" which may be signed or unsigned, so use __v16qs. */\n"
17638" return (__m128i)((__v16qs)__a > (__v16qs)__b);\n"
17639"}\n"
17640"\n"
17641"/// Compares each of the corresponding signed 16-bit values of the\n"
17642"/// 128-bit integer vectors to determine if the values in the first operand\n"
17643"/// are greater than those in the second operand.\n"
17644"///\n"
17645"/// Each comparison yields 0x0 for false, 0xFFFF for true.\n"
17646"///\n"
17647"/// \\headerfile <x86intrin.h>\n"
17648"///\n"
17649"/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n"
17650"///\n"
17651"/// \\param __a\n"
17652"/// A 128-bit integer vector.\n"
17653"/// \\param __b\n"
17654"/// A 128-bit integer vector.\n"
17655"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17656"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17657"_mm_cmpgt_epi16(__m128i __a, __m128i __b)\n"
17658"{\n"
17659" return (__m128i)((__v8hi)__a > (__v8hi)__b);\n"
17660"}\n"
17661"\n"
17662"/// Compares each of the corresponding signed 32-bit values of the\n"
17663"/// 128-bit integer vectors to determine if the values in the first operand\n"
17664"/// are greater than those in the second operand.\n"
17665"///\n"
17666"/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n"
17667"///\n"
17668"/// \\headerfile <x86intrin.h>\n"
17669"///\n"
17670"/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n"
17671"///\n"
17672"/// \\param __a\n"
17673"/// A 128-bit integer vector.\n"
17674"/// \\param __b\n"
17675"/// A 128-bit integer vector.\n"
17676"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17677"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17678"_mm_cmpgt_epi32(__m128i __a, __m128i __b)\n"
17679"{\n"
17680" return (__m128i)((__v4si)__a > (__v4si)__b);\n"
17681"}\n"
17682"\n"
17683"/// Compares each of the corresponding signed 8-bit values of the 128-bit\n"
17684"/// integer vectors to determine if the values in the first operand are less\n"
17685"/// than those in the second operand.\n"
17686"///\n"
17687"/// Each comparison yields 0x0 for false, 0xFF for true.\n"
17688"///\n"
17689"/// \\headerfile <x86intrin.h>\n"
17690"///\n"
17691"/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n"
17692"///\n"
17693"/// \\param __a\n"
17694"/// A 128-bit integer vector.\n"
17695"/// \\param __b\n"
17696"/// A 128-bit integer vector.\n"
17697"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17698"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17699"_mm_cmplt_epi8(__m128i __a, __m128i __b)\n"
17700"{\n"
17701" return _mm_cmpgt_epi8(__b, __a);\n"
17702"}\n"
17703"\n"
17704"/// Compares each of the corresponding signed 16-bit values of the\n"
17705"/// 128-bit integer vectors to determine if the values in the first operand\n"
17706"/// are less than those in the second operand.\n"
17707"///\n"
17708"/// Each comparison yields 0x0 for false, 0xFFFF for true.\n"
17709"///\n"
17710"/// \\headerfile <x86intrin.h>\n"
17711"///\n"
17712"/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n"
17713"///\n"
17714"/// \\param __a\n"
17715"/// A 128-bit integer vector.\n"
17716"/// \\param __b\n"
17717"/// A 128-bit integer vector.\n"
17718"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17719"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17720"_mm_cmplt_epi16(__m128i __a, __m128i __b)\n"
17721"{\n"
17722" return _mm_cmpgt_epi16(__b, __a);\n"
17723"}\n"
17724"\n"
17725"/// Compares each of the corresponding signed 32-bit values of the\n"
17726"/// 128-bit integer vectors to determine if the values in the first operand\n"
17727"/// are less than those in the second operand.\n"
17728"///\n"
17729"/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n"
17730"///\n"
17731"/// \\headerfile <x86intrin.h>\n"
17732"///\n"
17733"/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n"
17734"///\n"
17735"/// \\param __a\n"
17736"/// A 128-bit integer vector.\n"
17737"/// \\param __b\n"
17738"/// A 128-bit integer vector.\n"
17739"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17740"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17741"_mm_cmplt_epi32(__m128i __a, __m128i __b)\n"
17742"{\n"
17743" return _mm_cmpgt_epi32(__b, __a);\n"
17744"}\n"
17745"\n"
17746"#ifdef __x86_64__\n"
17747"/// Converts a 64-bit signed integer value from the second operand into a\n"
17748"/// double-precision value and returns it in the lower element of a [2 x\n"
17749"/// double] vector; the upper element of the returned vector is copied from\n"
17750"/// the upper element of the first operand.\n"
17751"///\n"
17752"/// \\headerfile <x86intrin.h>\n"
17753"///\n"
17754"/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n"
17755"///\n"
17756"/// \\param __a\n"
17757"/// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are\n"
17758"/// copied to the upper 64 bits of the destination.\n"
17759"/// \\param __b\n"
17760"/// A 64-bit signed integer operand containing the value to be converted.\n"
17761"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
17762"/// converted value of the second operand. The upper 64 bits are copied from\n"
17763"/// the upper 64 bits of the first operand.\n"
17764"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
17765"_mm_cvtsi64_sd(__m128d __a, long long __b)\n"
17766"{\n"
17767" __a[0] = __b;\n"
17768" return __a;\n"
17769"}\n"
17770"\n"
17771"/// Converts the first (lower) element of a vector of [2 x double] into a\n"
17772"/// 64-bit signed integer value, according to the current rounding mode.\n"
17773"///\n"
17774"/// \\headerfile <x86intrin.h>\n"
17775"///\n"
17776"/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n"
17777"///\n"
17778"/// \\param __a\n"
17779"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
17780"/// conversion.\n"
17781"/// \\returns A 64-bit signed integer containing the converted value.\n"
17782"static __inline__ long long __DEFAULT_FN_ATTRS\n"
17783"_mm_cvtsd_si64(__m128d __a)\n"
17784"{\n"
17785" return __builtin_ia32_cvtsd2si64((__v2df)__a);\n"
17786"}\n"
17787"\n"
17788"/// Converts the first (lower) element of a vector of [2 x double] into a\n"
17789"/// 64-bit signed integer value, truncating the result when it is inexact.\n"
17790"///\n"
17791"/// \\headerfile <x86intrin.h>\n"
17792"///\n"
17793"/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n"
17794"/// instruction.\n"
17795"///\n"
17796"/// \\param __a\n"
17797"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
17798"/// conversion.\n"
17799"/// \\returns A 64-bit signed integer containing the converted value.\n"
17800"static __inline__ long long __DEFAULT_FN_ATTRS\n"
17801"_mm_cvttsd_si64(__m128d __a)\n"
17802"{\n"
17803" return __builtin_ia32_cvttsd2si64((__v2df)__a);\n"
17804"}\n"
17805"#endif\n"
17806"\n"
17807"/// Converts a vector of [4 x i32] into a vector of [4 x float].\n"
17808"///\n"
17809"/// \\headerfile <x86intrin.h>\n"
17810"///\n"
17811"/// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.\n"
17812"///\n"
17813"/// \\param __a\n"
17814"/// A 128-bit integer vector.\n"
17815"/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n"
17816"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
17817"_mm_cvtepi32_ps(__m128i __a)\n"
17818"{\n"
17819" return (__m128)__builtin_convertvector((__v4si)__a, __v4sf);\n"
17820"}\n"
17821"\n"
17822"/// Converts a vector of [4 x float] into a vector of [4 x i32].\n"
17823"///\n"
17824"/// \\headerfile <x86intrin.h>\n"
17825"///\n"
17826"/// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.\n"
17827"///\n"
17828"/// \\param __a\n"
17829"/// A 128-bit vector of [4 x float].\n"
17830"/// \\returns A 128-bit integer vector of [4 x i32] containing the converted\n"
17831"/// values.\n"
17832"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17833"_mm_cvtps_epi32(__m128 __a)\n"
17834"{\n"
17835" return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);\n"
17836"}\n"
17837"\n"
17838"/// Converts a vector of [4 x float] into a vector of [4 x i32],\n"
17839"/// truncating the result when it is inexact.\n"
17840"///\n"
17841"/// \\headerfile <x86intrin.h>\n"
17842"///\n"
17843"/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>\n"
17844"/// instruction.\n"
17845"///\n"
17846"/// \\param __a\n"
17847"/// A 128-bit vector of [4 x float].\n"
17848"/// \\returns A 128-bit vector of [4 x i32] containing the converted values.\n"
17849"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17850"_mm_cvttps_epi32(__m128 __a)\n"
17851"{\n"
17852" return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);\n"
17853"}\n"
17854"\n"
17855"/// Returns a vector of [4 x i32] where the lowest element is the input\n"
17856"/// operand and the remaining elements are zero.\n"
17857"///\n"
17858"/// \\headerfile <x86intrin.h>\n"
17859"///\n"
17860"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
17861"///\n"
17862"/// \\param __a\n"
17863"/// A 32-bit signed integer operand.\n"
17864"/// \\returns A 128-bit vector of [4 x i32].\n"
17865"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17866"_mm_cvtsi32_si128(int __a)\n"
17867"{\n"
17868" return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 };\n"
17869"}\n"
17870"\n"
17871"#ifdef __x86_64__\n"
17872"/// Returns a vector of [2 x i64] where the lower element is the input\n"
17873"/// operand and the upper element is zero.\n"
17874"///\n"
17875"/// \\headerfile <x86intrin.h>\n"
17876"///\n"
17877"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
17878"///\n"
17879"/// \\param __a\n"
17880"/// A 64-bit signed integer operand containing the value to be converted.\n"
17881"/// \\returns A 128-bit vector of [2 x i64] containing the converted value.\n"
17882"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17883"_mm_cvtsi64_si128(long long __a)\n"
17884"{\n"
17885" return __extension__ (__m128i)(__v2di){ __a, 0 };\n"
17886"}\n"
17887"#endif\n"
17888"\n"
17889"/// Moves the least significant 32 bits of a vector of [4 x i32] to a\n"
17890"/// 32-bit signed integer value.\n"
17891"///\n"
17892"/// \\headerfile <x86intrin.h>\n"
17893"///\n"
17894"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
17895"///\n"
17896"/// \\param __a\n"
17897"/// A vector of [4 x i32]. The least significant 32 bits are moved to the\n"
17898"/// destination.\n"
17899"/// \\returns A 32-bit signed integer containing the moved value.\n"
17900"static __inline__ int __DEFAULT_FN_ATTRS\n"
17901"_mm_cvtsi128_si32(__m128i __a)\n"
17902"{\n"
17903" __v4si __b = (__v4si)__a;\n"
17904" return __b[0];\n"
17905"}\n"
17906"\n"
17907"#ifdef __x86_64__\n"
17908"/// Moves the least significant 64 bits of a vector of [2 x i64] to a\n"
17909"/// 64-bit signed integer value.\n"
17910"///\n"
17911"/// \\headerfile <x86intrin.h>\n"
17912"///\n"
17913"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
17914"///\n"
17915"/// \\param __a\n"
17916"/// A vector of [2 x i64]. The least significant 64 bits are moved to the\n"
17917"/// destination.\n"
17918"/// \\returns A 64-bit signed integer containing the moved value.\n"
17919"static __inline__ long long __DEFAULT_FN_ATTRS\n"
17920"_mm_cvtsi128_si64(__m128i __a)\n"
17921"{\n"
17922" return __a[0];\n"
17923"}\n"
17924"#endif\n"
17925"\n"
17926"/// Moves packed integer values from an aligned 128-bit memory location\n"
17927"/// to elements in a 128-bit integer vector.\n"
17928"///\n"
17929"/// \\headerfile <x86intrin.h>\n"
17930"///\n"
17931"/// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.\n"
17932"///\n"
17933"/// \\param __p\n"
17934"/// An aligned pointer to a memory location containing integer values.\n"
17935"/// \\returns A 128-bit integer vector containing the moved values.\n"
17936"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17937"_mm_load_si128(__m128i const *__p)\n"
17938"{\n"
17939" return *__p;\n"
17940"}\n"
17941"\n"
17942"/// Moves packed integer values from an unaligned 128-bit memory location\n"
17943"/// to elements in a 128-bit integer vector.\n"
17944"///\n"
17945"/// \\headerfile <x86intrin.h>\n"
17946"///\n"
17947"/// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.\n"
17948"///\n"
17949"/// \\param __p\n"
17950"/// A pointer to a memory location containing integer values.\n"
17951"/// \\returns A 128-bit integer vector containing the moved values.\n"
17952"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17953"_mm_loadu_si128(__m128i const *__p)\n"
17954"{\n"
17955" struct __loadu_si128 {\n"
17956" __m128i __v;\n"
17957" } __attribute__((__packed__, __may_alias__));\n"
17958" return ((struct __loadu_si128*)__p)->__v;\n"
17959"}\n"
17960"\n"
17961"/// Returns a vector of [2 x i64] where the lower element is taken from\n"
17962"/// the lower element of the operand, and the upper element is zero.\n"
17963"///\n"
17964"/// \\headerfile <x86intrin.h>\n"
17965"///\n"
17966"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
17967"///\n"
17968"/// \\param __p\n"
17969"/// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of\n"
17970"/// the destination.\n"
17971"/// \\returns A 128-bit vector of [2 x i64]. The lower order bits contain the\n"
17972"/// moved value. The higher order bits are cleared.\n"
17973"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17974"_mm_loadl_epi64(__m128i const *__p)\n"
17975"{\n"
17976" struct __mm_loadl_epi64_struct {\n"
17977" long long __u;\n"
17978" } __attribute__((__packed__, __may_alias__));\n"
17979" return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};\n"
17980"}\n"
17981"\n"
17982"/// Generates a 128-bit vector of [4 x i32] with unspecified content.\n"
17983"/// This could be used as an argument to another intrinsic function where the\n"
17984"/// argument is required but the value is not actually used.\n"
17985"///\n"
17986"/// \\headerfile <x86intrin.h>\n"
17987"///\n"
17988"/// This intrinsic has no corresponding instruction.\n"
17989"///\n"
17990"/// \\returns A 128-bit vector of [4 x i32] with unspecified content.\n"
17991"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17992"_mm_undefined_si128(void)\n"
17993"{\n"
17994" return (__m128i)__builtin_ia32_undef128();\n"
17995"}\n"
17996"\n"
17997"/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n"
17998"/// the specified 64-bit integer values.\n"
17999"///\n"
18000"/// \\headerfile <x86intrin.h>\n"
18001"///\n"
18002"/// This intrinsic is a utility function and does not correspond to a specific\n"
18003"/// instruction.\n"
18004"///\n"
18005"/// \\param __q1\n"
18006"/// A 64-bit integer value used to initialize the upper 64 bits of the\n"
18007"/// destination vector of [2 x i64].\n"
18008"/// \\param __q0\n"
18009"/// A 64-bit integer value used to initialize the lower 64 bits of the\n"
18010"/// destination vector of [2 x i64].\n"
18011"/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n"
18012"/// provided in the operands.\n"
18013"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18014"_mm_set_epi64x(long long __q1, long long __q0)\n"
18015"{\n"
18016" return __extension__ (__m128i)(__v2di){ __q0, __q1 };\n"
18017"}\n"
18018"\n"
18019"/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n"
18020"/// the specified 64-bit integer values.\n"
18021"///\n"
18022"/// \\headerfile <x86intrin.h>\n"
18023"///\n"
18024"/// This intrinsic is a utility function and does not correspond to a specific\n"
18025"/// instruction.\n"
18026"///\n"
18027"/// \\param __q1\n"
18028"/// A 64-bit integer value used to initialize the upper 64 bits of the\n"
18029"/// destination vector of [2 x i64].\n"
18030"/// \\param __q0\n"
18031"/// A 64-bit integer value used to initialize the lower 64 bits of the\n"
18032"/// destination vector of [2 x i64].\n"
18033"/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n"
18034"/// provided in the operands.\n"
18035"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18036"_mm_set_epi64(__m64 __q1, __m64 __q0)\n"
18037"{\n"
18038" return _mm_set_epi64x((long long)__q1, (long long)__q0);\n"
18039"}\n"
18040"\n"
18041"/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with\n"
18042"/// the specified 32-bit integer values.\n"
18043"///\n"
18044"/// \\headerfile <x86intrin.h>\n"
18045"///\n"
18046"/// This intrinsic is a utility function and does not correspond to a specific\n"
18047"/// instruction.\n"
18048"///\n"
18049"/// \\param __i3\n"
18050"/// A 32-bit integer value used to initialize bits [127:96] of the\n"
18051"/// destination vector.\n"
18052"/// \\param __i2\n"
18053"/// A 32-bit integer value used to initialize bits [95:64] of the destination\n"
18054"/// vector.\n"
18055"/// \\param __i1\n"
18056"/// A 32-bit integer value used to initialize bits [63:32] of the destination\n"
18057"/// vector.\n"
18058"/// \\param __i0\n"
18059"/// A 32-bit integer value used to initialize bits [31:0] of the destination\n"
18060"/// vector.\n"
18061"/// \\returns An initialized 128-bit vector of [4 x i32] containing the values\n"
18062"/// provided in the operands.\n"
18063"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18064"_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)\n"
18065"{\n"
18066" return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3};\n"
18067"}\n"
18068"\n"
18069"/// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with\n"
18070"/// the specified 16-bit integer values.\n"
18071"///\n"
18072"/// \\headerfile <x86intrin.h>\n"
18073"///\n"
18074"/// This intrinsic is a utility function and does not correspond to a specific\n"
18075"/// instruction.\n"
18076"///\n"
18077"/// \\param __w7\n"
18078"/// A 16-bit integer value used to initialize bits [127:112] of the\n"
18079"/// destination vector.\n"
18080"/// \\param __w6\n"
18081"/// A 16-bit integer value used to initialize bits [111:96] of the\n"
18082"/// destination vector.\n"
18083"/// \\param __w5\n"
18084"/// A 16-bit integer value used to initialize bits [95:80] of the destination\n"
18085"/// vector.\n"
18086"/// \\param __w4\n"
18087"/// A 16-bit integer value used to initialize bits [79:64] of the destination\n"
18088"/// vector.\n"
18089"/// \\param __w3\n"
18090"/// A 16-bit integer value used to initialize bits [63:48] of the destination\n"
18091"/// vector.\n"
18092"/// \\param __w2\n"
18093"/// A 16-bit integer value used to initialize bits [47:32] of the destination\n"
18094"/// vector.\n"
18095"/// \\param __w1\n"
18096"/// A 16-bit integer value used to initialize bits [31:16] of the destination\n"
18097"/// vector.\n"
18098"/// \\param __w0\n"
18099"/// A 16-bit integer value used to initialize bits [15:0] of the destination\n"
18100"/// vector.\n"
18101"/// \\returns An initialized 128-bit vector of [8 x i16] containing the values\n"
18102"/// provided in the operands.\n"
18103"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18104"_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)\n"
18105"{\n"
18106" return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };\n"
18107"}\n"
18108"\n"
18109"/// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with\n"
18110"/// the specified 8-bit integer values.\n"
18111"///\n"
18112"/// \\headerfile <x86intrin.h>\n"
18113"///\n"
18114"/// This intrinsic is a utility function and does not correspond to a specific\n"
18115"/// instruction.\n"
18116"///\n"
18117"/// \\param __b15\n"
18118"/// Initializes bits [127:120] of the destination vector.\n"
18119"/// \\param __b14\n"
18120"/// Initializes bits [119:112] of the destination vector.\n"
18121"/// \\param __b13\n"
18122"/// Initializes bits [111:104] of the destination vector.\n"
18123"/// \\param __b12\n"
18124"/// Initializes bits [103:96] of the destination vector.\n"
18125"/// \\param __b11\n"
18126"/// Initializes bits [95:88] of the destination vector.\n"
18127"/// \\param __b10\n"
18128"/// Initializes bits [87:80] of the destination vector.\n"
18129"/// \\param __b9\n"
18130"/// Initializes bits [79:72] of the destination vector.\n"
18131"/// \\param __b8\n"
18132"/// Initializes bits [71:64] of the destination vector.\n"
18133"/// \\param __b7\n"
18134"/// Initializes bits [63:56] of the destination vector.\n"
18135"/// \\param __b6\n"
18136"/// Initializes bits [55:48] of the destination vector.\n"
18137"/// \\param __b5\n"
18138"/// Initializes bits [47:40] of the destination vector.\n"
18139"/// \\param __b4\n"
18140"/// Initializes bits [39:32] of the destination vector.\n"
18141"/// \\param __b3\n"
18142"/// Initializes bits [31:24] of the destination vector.\n"
18143"/// \\param __b2\n"
18144"/// Initializes bits [23:16] of the destination vector.\n"
18145"/// \\param __b1\n"
18146"/// Initializes bits [15:8] of the destination vector.\n"
18147"/// \\param __b0\n"
18148"/// Initializes bits [7:0] of the destination vector.\n"
18149"/// \\returns An initialized 128-bit vector of [16 x i8] containing the values\n"
18150"/// provided in the operands.\n"
18151"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18152"_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)\n"
18153"{\n"
18154" return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };\n"
18155"}\n"
18156"\n"
18157"/// Initializes both values in a 128-bit integer vector with the\n"
18158"/// specified 64-bit integer value.\n"
18159"///\n"
18160"/// \\headerfile <x86intrin.h>\n"
18161"///\n"
18162"/// This intrinsic is a utility function and does not correspond to a specific\n"
18163"/// instruction.\n"
18164"///\n"
18165"/// \\param __q\n"
18166"/// Integer value used to initialize the elements of the destination integer\n"
18167"/// vector.\n"
18168"/// \\returns An initialized 128-bit integer vector of [2 x i64] with both\n"
18169"/// elements containing the value provided in the operand.\n"
18170"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18171"_mm_set1_epi64x(long long __q)\n"
18172"{\n"
18173" return _mm_set_epi64x(__q, __q);\n"
18174"}\n"
18175"\n"
18176"/// Initializes both values in a 128-bit vector of [2 x i64] with the\n"
18177"/// specified 64-bit value.\n"
18178"///\n"
18179"/// \\headerfile <x86intrin.h>\n"
18180"///\n"
18181"/// This intrinsic is a utility function and does not correspond to a specific\n"
18182"/// instruction.\n"
18183"///\n"
18184"/// \\param __q\n"
18185"/// A 64-bit value used to initialize the elements of the destination integer\n"
18186"/// vector.\n"
18187"/// \\returns An initialized 128-bit vector of [2 x i64] with all elements\n"
18188"/// containing the value provided in the operand.\n"
18189"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18190"_mm_set1_epi64(__m64 __q)\n"
18191"{\n"
18192" return _mm_set_epi64(__q, __q);\n"
18193"}\n"
18194"\n"
18195"/// Initializes all values in a 128-bit vector of [4 x i32] with the\n"
18196"/// specified 32-bit value.\n"
18197"///\n"
18198"/// \\headerfile <x86intrin.h>\n"
18199"///\n"
18200"/// This intrinsic is a utility function and does not correspond to a specific\n"
18201"/// instruction.\n"
18202"///\n"
18203"/// \\param __i\n"
18204"/// A 32-bit value used to initialize the elements of the destination integer\n"
18205"/// vector.\n"
18206"/// \\returns An initialized 128-bit vector of [4 x i32] with all elements\n"
18207"/// containing the value provided in the operand.\n"
18208"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18209"_mm_set1_epi32(int __i)\n"
18210"{\n"
18211" return _mm_set_epi32(__i, __i, __i, __i);\n"
18212"}\n"
18213"\n"
18214"/// Initializes all values in a 128-bit vector of [8 x i16] with the\n"
18215"/// specified 16-bit value.\n"
18216"///\n"
18217"/// \\headerfile <x86intrin.h>\n"
18218"///\n"
18219"/// This intrinsic is a utility function and does not correspond to a specific\n"
18220"/// instruction.\n"
18221"///\n"
18222"/// \\param __w\n"
18223"/// A 16-bit value used to initialize the elements of the destination integer\n"
18224"/// vector.\n"
18225"/// \\returns An initialized 128-bit vector of [8 x i16] with all elements\n"
18226"/// containing the value provided in the operand.\n"
18227"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18228"_mm_set1_epi16(short __w)\n"
18229"{\n"
18230" return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);\n"
18231"}\n"
18232"\n"
18233"/// Initializes all values in a 128-bit vector of [16 x i8] with the\n"
18234"/// specified 8-bit value.\n"
18235"///\n"
18236"/// \\headerfile <x86intrin.h>\n"
18237"///\n"
18238"/// This intrinsic is a utility function and does not correspond to a specific\n"
18239"/// instruction.\n"
18240"///\n"
18241"/// \\param __b\n"
18242"/// An 8-bit value used to initialize the elements of the destination integer\n"
18243"/// vector.\n"
18244"/// \\returns An initialized 128-bit vector of [16 x i8] with all elements\n"
18245"/// containing the value provided in the operand.\n"
18246"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18247"_mm_set1_epi8(char __b)\n"
18248"{\n"
18249" return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b);\n"
18250"}\n"
18251"\n"
18252"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18253"/// with the specified 64-bit integral values.\n"
18254"///\n"
18255"/// \\headerfile <x86intrin.h>\n"
18256"///\n"
18257"/// This intrinsic does not correspond to a specific instruction.\n"
18258"///\n"
18259"/// \\param __q0\n"
18260"/// A 64-bit integral value used to initialize the lower 64 bits of the\n"
18261"/// result.\n"
18262"/// \\param __q1\n"
18263"/// A 64-bit integral value used to initialize the upper 64 bits of the\n"
18264"/// result.\n"
18265"/// \\returns An initialized 128-bit integer vector.\n"
18266"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18267"_mm_setr_epi64(__m64 __q0, __m64 __q1)\n"
18268"{\n"
18269" return _mm_set_epi64(__q1, __q0);\n"
18270"}\n"
18271"\n"
18272"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18273"/// with the specified 32-bit integral values.\n"
18274"///\n"
18275"/// \\headerfile <x86intrin.h>\n"
18276"///\n"
18277"/// This intrinsic is a utility function and does not correspond to a specific\n"
18278"/// instruction.\n"
18279"///\n"
18280"/// \\param __i0\n"
18281"/// A 32-bit integral value used to initialize bits [31:0] of the result.\n"
18282"/// \\param __i1\n"
18283"/// A 32-bit integral value used to initialize bits [63:32] of the result.\n"
18284"/// \\param __i2\n"
18285"/// A 32-bit integral value used to initialize bits [95:64] of the result.\n"
18286"/// \\param __i3\n"
18287"/// A 32-bit integral value used to initialize bits [127:96] of the result.\n"
18288"/// \\returns An initialized 128-bit integer vector.\n"
18289"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18290"_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)\n"
18291"{\n"
18292" return _mm_set_epi32(__i3, __i2, __i1, __i0);\n"
18293"}\n"
18294"\n"
18295"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18296"/// with the specified 16-bit integral values.\n"
18297"///\n"
18298"/// \\headerfile <x86intrin.h>\n"
18299"///\n"
18300"/// This intrinsic is a utility function and does not correspond to a specific\n"
18301"/// instruction.\n"
18302"///\n"
18303"/// \\param __w0\n"
18304"/// A 16-bit integral value used to initialize bits [15:0] of the result.\n"
18305"/// \\param __w1\n"
18306"/// A 16-bit integral value used to initialize bits [31:16] of the result.\n"
18307"/// \\param __w2\n"
18308"/// A 16-bit integral value used to initialize bits [47:32] of the result.\n"
18309"/// \\param __w3\n"
18310"/// A 16-bit integral value used to initialize bits [63:48] of the result.\n"
18311"/// \\param __w4\n"
18312"/// A 16-bit integral value used to initialize bits [79:64] of the result.\n"
18313"/// \\param __w5\n"
18314"/// A 16-bit integral value used to initialize bits [95:80] of the result.\n"
18315"/// \\param __w6\n"
18316"/// A 16-bit integral value used to initialize bits [111:96] of the result.\n"
18317"/// \\param __w7\n"
18318"/// A 16-bit integral value used to initialize bits [127:112] of the result.\n"
18319"/// \\returns An initialized 128-bit integer vector.\n"
18320"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18321"_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)\n"
18322"{\n"
18323" return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);\n"
18324"}\n"
18325"\n"
18326"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18327"/// with the specified 8-bit integral values.\n"
18328"///\n"
18329"/// \\headerfile <x86intrin.h>\n"
18330"///\n"
18331"/// This intrinsic is a utility function and does not correspond to a specific\n"
18332"/// instruction.\n"
18333"///\n"
18334"/// \\param __b0\n"
18335"/// An 8-bit integral value used to initialize bits [7:0] of the result.\n"
18336"/// \\param __b1\n"
18337"/// An 8-bit integral value used to initialize bits [15:8] of the result.\n"
18338"/// \\param __b2\n"
18339"/// An 8-bit integral value used to initialize bits [23:16] of the result.\n"
18340"/// \\param __b3\n"
18341"/// An 8-bit integral value used to initialize bits [31:24] of the result.\n"
18342"/// \\param __b4\n"
18343"/// An 8-bit integral value used to initialize bits [39:32] of the result.\n"
18344"/// \\param __b5\n"
18345"/// An 8-bit integral value used to initialize bits [47:40] of the result.\n"
18346"/// \\param __b6\n"
18347"/// An 8-bit integral value used to initialize bits [55:48] of the result.\n"
18348"/// \\param __b7\n"
18349"/// An 8-bit integral value used to initialize bits [63:56] of the result.\n"
18350"/// \\param __b8\n"
18351"/// An 8-bit integral value used to initialize bits [71:64] of the result.\n"
18352"/// \\param __b9\n"
18353"/// An 8-bit integral value used to initialize bits [79:72] of the result.\n"
18354"/// \\param __b10\n"
18355"/// An 8-bit integral value used to initialize bits [87:80] of the result.\n"
18356"/// \\param __b11\n"
18357"/// An 8-bit integral value used to initialize bits [95:88] of the result.\n"
18358"/// \\param __b12\n"
18359"/// An 8-bit integral value used to initialize bits [103:96] of the result.\n"
18360"/// \\param __b13\n"
18361"/// An 8-bit integral value used to initialize bits [111:104] of the result.\n"
18362"/// \\param __b14\n"
18363"/// An 8-bit integral value used to initialize bits [119:112] of the result.\n"
18364"/// \\param __b15\n"
18365"/// An 8-bit integral value used to initialize bits [127:120] of the result.\n"
18366"/// \\returns An initialized 128-bit integer vector.\n"
18367"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18368"_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)\n"
18369"{\n"
18370" return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n"
18371"}\n"
18372"\n"
18373"/// Creates a 128-bit integer vector initialized to zero.\n"
18374"///\n"
18375"/// \\headerfile <x86intrin.h>\n"
18376"///\n"
18377"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n"
18378"///\n"
18379"/// \\returns An initialized 128-bit integer vector with all elements set to\n"
18380"/// zero.\n"
18381"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18382"_mm_setzero_si128(void)\n"
18383"{\n"
18384" return __extension__ (__m128i)(__v2di){ 0LL, 0LL };\n"
18385"}\n"
18386"\n"
18387"/// Stores a 128-bit integer vector to a memory location aligned on a\n"
18388"/// 128-bit boundary.\n"
18389"///\n"
18390"/// \\headerfile <x86intrin.h>\n"
18391"///\n"
18392"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n"
18393"///\n"
18394"/// \\param __p\n"
18395"/// A pointer to an aligned memory location that will receive the integer\n"
18396"/// values.\n"
18397"/// \\param __b\n"
18398"/// A 128-bit integer vector containing the values to be moved.\n"
18399"static __inline__ void __DEFAULT_FN_ATTRS\n"
18400"_mm_store_si128(__m128i *__p, __m128i __b)\n"
18401"{\n"
18402" *__p = __b;\n"
18403"}\n"
18404"\n"
18405"/// Stores a 128-bit integer vector to an unaligned memory location.\n"
18406"///\n"
18407"/// \\headerfile <x86intrin.h>\n"
18408"///\n"
18409"/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n"
18410"///\n"
18411"/// \\param __p\n"
18412"/// A pointer to a memory location that will receive the integer values.\n"
18413"/// \\param __b\n"
18414"/// A 128-bit integer vector containing the values to be moved.\n"
18415"static __inline__ void __DEFAULT_FN_ATTRS\n"
18416"_mm_storeu_si128(__m128i *__p, __m128i __b)\n"
18417"{\n"
18418" struct __storeu_si128 {\n"
18419" __m128i __v;\n"
18420" } __attribute__((__packed__, __may_alias__));\n"
18421" ((struct __storeu_si128*)__p)->__v = __b;\n"
18422"}\n"
18423"\n"
18424"/// Stores a 64-bit integer value from the low element of a 128-bit integer\n"
18425"/// vector.\n"
18426"///\n"
18427"/// \\headerfile <x86intrin.h>\n"
18428"///\n"
18429"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
18430"///\n"
18431"/// \\param __p\n"
18432"/// A pointer to a 64-bit memory location. The address of the memory\n"
18433"/// location does not have to be algned.\n"
18434"/// \\param __b\n"
18435"/// A 128-bit integer vector containing the value to be stored.\n"
18436"static __inline__ void __DEFAULT_FN_ATTRS\n"
18437"_mm_storeu_si64(void const *__p, __m128i __b)\n"
18438"{\n"
18439" struct __storeu_si64 {\n"
18440" long long __v;\n"
18441" } __attribute__((__packed__, __may_alias__));\n"
18442" ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];\n"
18443"}\n"
18444"\n"
18445"/// Stores a 32-bit integer value from the low element of a 128-bit integer\n"
18446"/// vector.\n"
18447"///\n"
18448"/// \\headerfile <x86intrin.h>\n"
18449"///\n"
18450"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
18451"///\n"
18452"/// \\param __p\n"
18453"/// A pointer to a 32-bit memory location. The address of the memory\n"
18454"/// location does not have to be aligned.\n"
18455"/// \\param __b\n"
18456"/// A 128-bit integer vector containing the value to be stored.\n"
18457"static __inline__ void __DEFAULT_FN_ATTRS\n"
18458"_mm_storeu_si32(void const *__p, __m128i __b)\n"
18459"{\n"
18460" struct __storeu_si32 {\n"
18461" int __v;\n"
18462" } __attribute__((__packed__, __may_alias__));\n"
18463" ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];\n"
18464"}\n"
18465"\n"
18466"/// Stores a 16-bit integer value from the low element of a 128-bit integer\n"
18467"/// vector.\n"
18468"///\n"
18469"/// \\headerfile <x86intrin.h>\n"
18470"///\n"
18471"/// This intrinsic does not correspond to a specific instruction.\n"
18472"///\n"
18473"/// \\param __p\n"
18474"/// A pointer to a 16-bit memory location. The address of the memory\n"
18475"/// location does not have to be aligned.\n"
18476"/// \\param __b\n"
18477"/// A 128-bit integer vector containing the value to be stored.\n"
18478"static __inline__ void __DEFAULT_FN_ATTRS\n"
18479"_mm_storeu_si16(void const *__p, __m128i __b)\n"
18480"{\n"
18481" struct __storeu_si16 {\n"
18482" short __v;\n"
18483" } __attribute__((__packed__, __may_alias__));\n"
18484" ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];\n"
18485"}\n"
18486"\n"
18487"/// Moves bytes selected by the mask from the first operand to the\n"
18488"/// specified unaligned memory location. When a mask bit is 1, the\n"
18489"/// corresponding byte is written, otherwise it is not written.\n"
18490"///\n"
18491"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18492"/// used again soon). Exception and trap behavior for elements not selected\n"
18493"/// for storage to memory are implementation dependent.\n"
18494"///\n"
18495"/// \\headerfile <x86intrin.h>\n"
18496"///\n"
18497"/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>\n"
18498"/// instruction.\n"
18499"///\n"
18500"/// \\param __d\n"
18501"/// A 128-bit integer vector containing the values to be moved.\n"
18502"/// \\param __n\n"
18503"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
18504"/// each byte represents the mask bits.\n"
18505"/// \\param __p\n"
18506"/// A pointer to an unaligned 128-bit memory location where the specified\n"
18507"/// values are moved.\n"
18508"static __inline__ void __DEFAULT_FN_ATTRS\n"
18509"_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)\n"
18510"{\n"
18511" __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);\n"
18512"}\n"
18513"\n"
18514"/// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to\n"
18515"/// a memory location.\n"
18516"///\n"
18517"/// \\headerfile <x86intrin.h>\n"
18518"///\n"
18519"/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n"
18520"///\n"
18521"/// \\param __p\n"
18522"/// A pointer to a 64-bit memory location that will receive the lower 64 bits\n"
18523"/// of the integer vector parameter.\n"
18524"/// \\param __a\n"
18525"/// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the\n"
18526"/// value to be stored.\n"
18527"static __inline__ void __DEFAULT_FN_ATTRS\n"
18528"_mm_storel_epi64(__m128i *__p, __m128i __a)\n"
18529"{\n"
18530" struct __mm_storel_epi64_struct {\n"
18531" long long __u;\n"
18532" } __attribute__((__packed__, __may_alias__));\n"
18533" ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];\n"
18534"}\n"
18535"\n"
18536"/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit\n"
18537"/// aligned memory location.\n"
18538"///\n"
18539"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18540"/// used again soon).\n"
18541"///\n"
18542"/// \\headerfile <x86intrin.h>\n"
18543"///\n"
18544"/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n"
18545"///\n"
18546"/// \\param __p\n"
18547"/// A pointer to the 128-bit aligned memory location used to store the value.\n"
18548"/// \\param __a\n"
18549"/// A vector of [2 x double] containing the 64-bit values to be stored.\n"
18550"static __inline__ void __DEFAULT_FN_ATTRS\n"
18551"_mm_stream_pd(double *__p, __m128d __a)\n"
18552"{\n"
18553" __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);\n"
18554"}\n"
18555"\n"
18556"/// Stores a 128-bit integer vector to a 128-bit aligned memory location.\n"
18557"///\n"
18558"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18559"/// used again soon).\n"
18560"///\n"
18561"/// \\headerfile <x86intrin.h>\n"
18562"///\n"
18563"/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n"
18564"///\n"
18565"/// \\param __p\n"
18566"/// A pointer to the 128-bit aligned memory location used to store the value.\n"
18567"/// \\param __a\n"
18568"/// A 128-bit integer vector containing the values to be stored.\n"
18569"static __inline__ void __DEFAULT_FN_ATTRS\n"
18570"_mm_stream_si128(__m128i *__p, __m128i __a)\n"
18571"{\n"
18572" __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);\n"
18573"}\n"
18574"\n"
18575"/// Stores a 32-bit integer value in the specified memory location.\n"
18576"///\n"
18577"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18578"/// used again soon).\n"
18579"///\n"
18580"/// \\headerfile <x86intrin.h>\n"
18581"///\n"
18582"/// This intrinsic corresponds to the <c> MOVNTI </c> instruction.\n"
18583"///\n"
18584"/// \\param __p\n"
18585"/// A pointer to the 32-bit memory location used to store the value.\n"
18586"/// \\param __a\n"
18587"/// A 32-bit integer containing the value to be stored.\n"
18588"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n"
18589"_mm_stream_si32(int *__p, int __a)\n"
18590"{\n"
18591" __builtin_ia32_movnti(__p, __a);\n"
18592"}\n"
18593"\n"
18594"#ifdef __x86_64__\n"
18595"/// Stores a 64-bit integer value in the specified memory location.\n"
18596"///\n"
18597"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18598"/// used again soon).\n"
18599"///\n"
18600"/// \\headerfile <x86intrin.h>\n"
18601"///\n"
18602"/// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.\n"
18603"///\n"
18604"/// \\param __p\n"
18605"/// A pointer to the 64-bit memory location used to store the value.\n"
18606"/// \\param __a\n"
18607"/// A 64-bit integer containing the value to be stored.\n"
18608"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n"
18609"_mm_stream_si64(long long *__p, long long __a)\n"
18610"{\n"
18611" __builtin_ia32_movnti64(__p, __a);\n"
18612"}\n"
18613"#endif\n"
18614"\n"
18615"#if defined(__cplusplus)\n"
18616"extern \"C\" {\n"
18617"#endif\n"
18618"\n"
18619"/// The cache line containing \\a __p is flushed and invalidated from all\n"
18620"/// caches in the coherency domain.\n"
18621"///\n"
18622"/// \\headerfile <x86intrin.h>\n"
18623"///\n"
18624"/// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.\n"
18625"///\n"
18626"/// \\param __p\n"
18627"/// A pointer to the memory location used to identify the cache line to be\n"
18628"/// flushed.\n"
18629"void _mm_clflush(void const * __p);\n"
18630"\n"
18631"/// Forces strong memory ordering (serialization) between load\n"
18632"/// instructions preceding this instruction and load instructions following\n"
18633"/// this instruction, ensuring the system completes all previous loads before\n"
18634"/// executing subsequent loads.\n"
18635"///\n"
18636"/// \\headerfile <x86intrin.h>\n"
18637"///\n"
18638"/// This intrinsic corresponds to the <c> LFENCE </c> instruction.\n"
18639"///\n"
18640"void _mm_lfence(void);\n"
18641"\n"
18642"/// Forces strong memory ordering (serialization) between load and store\n"
18643"/// instructions preceding this instruction and load and store instructions\n"
18644"/// following this instruction, ensuring that the system completes all\n"
18645"/// previous memory accesses before executing subsequent memory accesses.\n"
18646"///\n"
18647"/// \\headerfile <x86intrin.h>\n"
18648"///\n"
18649"/// This intrinsic corresponds to the <c> MFENCE </c> instruction.\n"
18650"///\n"
18651"void _mm_mfence(void);\n"
18652"\n"
18653"#if defined(__cplusplus)\n"
18654"} // extern \"C\"\n"
18655"#endif\n"
18656"\n"
18657"/// Converts 16-bit signed integers from both 128-bit integer vector\n"
18658"/// operands into 8-bit signed integers, and packs the results into the\n"
18659"/// destination. Positive values greater than 0x7F are saturated to 0x7F.\n"
18660"/// Negative values less than 0x80 are saturated to 0x80.\n"
18661"///\n"
18662"/// \\headerfile <x86intrin.h>\n"
18663"///\n"
18664"/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.\n"
18665"///\n"
18666"/// \\param __a\n"
18667"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18668"/// a signed integer and is converted to a 8-bit signed integer with\n"
18669"/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n"
18670"/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n"
18671"/// written to the lower 64 bits of the result.\n"
18672"/// \\param __b\n"
18673"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18674"/// a signed integer and is converted to a 8-bit signed integer with\n"
18675"/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n"
18676"/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n"
18677"/// written to the higher 64 bits of the result.\n"
18678"/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n"
18679"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18680"_mm_packs_epi16(__m128i __a, __m128i __b)\n"
18681"{\n"
18682" return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);\n"
18683"}\n"
18684"\n"
18685"/// Converts 32-bit signed integers from both 128-bit integer vector\n"
18686"/// operands into 16-bit signed integers, and packs the results into the\n"
18687"/// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n"
18688"/// Negative values less than 0x8000 are saturated to 0x8000.\n"
18689"///\n"
18690"/// \\headerfile <x86intrin.h>\n"
18691"///\n"
18692"/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.\n"
18693"///\n"
18694"/// \\param __a\n"
18695"/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n"
18696"/// a signed integer and is converted to a 16-bit signed integer with\n"
18697"/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n"
18698"/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n"
18699"/// are written to the lower 64 bits of the result.\n"
18700"/// \\param __b\n"
18701"/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n"
18702"/// a signed integer and is converted to a 16-bit signed integer with\n"
18703"/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n"
18704"/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n"
18705"/// are written to the higher 64 bits of the result.\n"
18706"/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n"
18707"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18708"_mm_packs_epi32(__m128i __a, __m128i __b)\n"
18709"{\n"
18710" return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);\n"
18711"}\n"
18712"\n"
18713"/// Converts 16-bit signed integers from both 128-bit integer vector\n"
18714"/// operands into 8-bit unsigned integers, and packs the results into the\n"
18715"/// destination. Values greater than 0xFF are saturated to 0xFF. Values less\n"
18716"/// than 0x00 are saturated to 0x00.\n"
18717"///\n"
18718"/// \\headerfile <x86intrin.h>\n"
18719"///\n"
18720"/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.\n"
18721"///\n"
18722"/// \\param __a\n"
18723"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18724"/// a signed integer and is converted to an 8-bit unsigned integer with\n"
18725"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
18726"/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n"
18727"/// written to the lower 64 bits of the result.\n"
18728"/// \\param __b\n"
18729"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18730"/// a signed integer and is converted to an 8-bit unsigned integer with\n"
18731"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
18732"/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n"
18733"/// written to the higher 64 bits of the result.\n"
18734"/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n"
18735"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18736"_mm_packus_epi16(__m128i __a, __m128i __b)\n"
18737"{\n"
18738" return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);\n"
18739"}\n"
18740"\n"
18741"/// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using\n"
18742"/// the immediate-value parameter as a selector.\n"
18743"///\n"
18744"/// \\headerfile <x86intrin.h>\n"
18745"///\n"
18746"/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n"
18747"///\n"
18748"/// \\param __a\n"
18749"/// A 128-bit integer vector.\n"
18750"/// \\param __imm\n"
18751"/// An immediate value. Bits [2:0] selects values from \\a __a to be assigned\n"
18752"/// to bits[15:0] of the result. \\n\n"
18753"/// 000: assign values from bits [15:0] of \\a __a. \\n\n"
18754"/// 001: assign values from bits [31:16] of \\a __a. \\n\n"
18755"/// 010: assign values from bits [47:32] of \\a __a. \\n\n"
18756"/// 011: assign values from bits [63:48] of \\a __a. \\n\n"
18757"/// 100: assign values from bits [79:64] of \\a __a. \\n\n"
18758"/// 101: assign values from bits [95:80] of \\a __a. \\n\n"
18759"/// 110: assign values from bits [111:96] of \\a __a. \\n\n"
18760"/// 111: assign values from bits [127:112] of \\a __a.\n"
18761"/// \\returns An integer, whose lower 16 bits are selected from the 128-bit\n"
18762"/// integer vector parameter and the remaining bits are assigned zeros.\n"
18763"#define _mm_extract_epi16(a, imm) \\\n"
18764" (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \\\n"
18765" (int)(imm))\n"
18766"\n"
18767"/// Constructs a 128-bit integer vector by first making a copy of the\n"
18768"/// 128-bit integer vector parameter, and then inserting the lower 16 bits\n"
18769"/// of an integer parameter into an offset specified by the immediate-value\n"
18770"/// parameter.\n"
18771"///\n"
18772"/// \\headerfile <x86intrin.h>\n"
18773"///\n"
18774"/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.\n"
18775"///\n"
18776"/// \\param __a\n"
18777"/// A 128-bit integer vector of [8 x i16]. This vector is copied to the\n"
18778"/// result and then one of the eight elements in the result is replaced by\n"
18779"/// the lower 16 bits of \\a __b.\n"
18780"/// \\param __b\n"
18781"/// An integer. The lower 16 bits of this parameter are written to the\n"
18782"/// result beginning at an offset specified by \\a __imm.\n"
18783"/// \\param __imm\n"
18784"/// An immediate value specifying the bit offset in the result at which the\n"
18785"/// lower 16 bits of \\a __b are written.\n"
18786"/// \\returns A 128-bit integer vector containing the constructed values.\n"
18787"#define _mm_insert_epi16(a, b, imm) \\\n"
18788" (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \\\n"
18789" (int)(imm))\n"
18790"\n"
18791"/// Copies the values of the most significant bits from each 8-bit\n"
18792"/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask\n"
18793"/// value, zero-extends the value, and writes it to the destination.\n"
18794"///\n"
18795"/// \\headerfile <x86intrin.h>\n"
18796"///\n"
18797"/// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.\n"
18798"///\n"
18799"/// \\param __a\n"
18800"/// A 128-bit integer vector containing the values with bits to be extracted.\n"
18801"/// \\returns The most significant bits from each 8-bit element in \\a __a,\n"
18802"/// written to bits [15:0]. The other bits are assigned zeros.\n"
18803"static __inline__ int __DEFAULT_FN_ATTRS\n"
18804"_mm_movemask_epi8(__m128i __a)\n"
18805"{\n"
18806" return __builtin_ia32_pmovmskb128((__v16qi)__a);\n"
18807"}\n"
18808"\n"
18809"/// Constructs a 128-bit integer vector by shuffling four 32-bit\n"
18810"/// elements of a 128-bit integer vector parameter, using the immediate-value\n"
18811"/// parameter as a specifier.\n"
18812"///\n"
18813"/// \\headerfile <x86intrin.h>\n"
18814"///\n"
18815"/// \\code\n"
18816"/// __m128i _mm_shuffle_epi32(__m128i a, const int imm);\n"
18817"/// \\endcode\n"
18818"///\n"
18819"/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.\n"
18820"///\n"
18821"/// \\param a\n"
18822"/// A 128-bit integer vector containing the values to be copied.\n"
18823"/// \\param imm\n"
18824"/// An immediate value containing an 8-bit value specifying which elements to\n"
18825"/// copy from a. The destinations within the 128-bit destination are assigned\n"
18826"/// values as follows: \\n\n"
18827"/// Bits [1:0] are used to assign values to bits [31:0] of the result. \\n\n"
18828"/// Bits [3:2] are used to assign values to bits [63:32] of the result. \\n\n"
18829"/// Bits [5:4] are used to assign values to bits [95:64] of the result. \\n\n"
18830"/// Bits [7:6] are used to assign values to bits [127:96] of the result. \\n\n"
18831"/// Bit value assignments: \\n\n"
18832"/// 00: assign values from bits [31:0] of \\a a. \\n\n"
18833"/// 01: assign values from bits [63:32] of \\a a. \\n\n"
18834"/// 10: assign values from bits [95:64] of \\a a. \\n\n"
18835"/// 11: assign values from bits [127:96] of \\a a.\n"
18836"/// \\returns A 128-bit integer vector containing the shuffled values.\n"
18837"#define _mm_shuffle_epi32(a, imm) \\\n"
18838" (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))\n"
18839"\n"
18840"/// Constructs a 128-bit integer vector by shuffling four lower 16-bit\n"
18841"/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n"
18842"/// value parameter as a specifier.\n"
18843"///\n"
18844"/// \\headerfile <x86intrin.h>\n"
18845"///\n"
18846"/// \\code\n"
18847"/// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);\n"
18848"/// \\endcode\n"
18849"///\n"
18850"/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.\n"
18851"///\n"
18852"/// \\param a\n"
18853"/// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits\n"
18854"/// [127:64] of the result.\n"
18855"/// \\param imm\n"
18856"/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n"
18857"/// Bits[1:0] are used to assign values to bits [15:0] of the result. \\n\n"
18858"/// Bits[3:2] are used to assign values to bits [31:16] of the result. \\n\n"
18859"/// Bits[5:4] are used to assign values to bits [47:32] of the result. \\n\n"
18860"/// Bits[7:6] are used to assign values to bits [63:48] of the result. \\n\n"
18861"/// Bit value assignments: \\n\n"
18862"/// 00: assign values from bits [15:0] of \\a a. \\n\n"
18863"/// 01: assign values from bits [31:16] of \\a a. \\n\n"
18864"/// 10: assign values from bits [47:32] of \\a a. \\n\n"
18865"/// 11: assign values from bits [63:48] of \\a a. \\n\n"
18866"/// \\returns A 128-bit integer vector containing the shuffled values.\n"
18867"#define _mm_shufflelo_epi16(a, imm) \\\n"
18868" (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))\n"
18869"\n"
18870"/// Constructs a 128-bit integer vector by shuffling four upper 16-bit\n"
18871"/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n"
18872"/// value parameter as a specifier.\n"
18873"///\n"
18874"/// \\headerfile <x86intrin.h>\n"
18875"///\n"
18876"/// \\code\n"
18877"/// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);\n"
18878"/// \\endcode\n"
18879"///\n"
18880"/// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.\n"
18881"///\n"
18882"/// \\param a\n"
18883"/// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits\n"
18884"/// [63:0] of the result.\n"
18885"/// \\param imm\n"
18886"/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n"
18887"/// Bits[1:0] are used to assign values to bits [79:64] of the result. \\n\n"
18888"/// Bits[3:2] are used to assign values to bits [95:80] of the result. \\n\n"
18889"/// Bits[5:4] are used to assign values to bits [111:96] of the result. \\n\n"
18890"/// Bits[7:6] are used to assign values to bits [127:112] of the result. \\n\n"
18891"/// Bit value assignments: \\n\n"
18892"/// 00: assign values from bits [79:64] of \\a a. \\n\n"
18893"/// 01: assign values from bits [95:80] of \\a a. \\n\n"
18894"/// 10: assign values from bits [111:96] of \\a a. \\n\n"
18895"/// 11: assign values from bits [127:112] of \\a a. \\n\n"
18896"/// \\returns A 128-bit integer vector containing the shuffled values.\n"
18897"#define _mm_shufflehi_epi16(a, imm) \\\n"
18898" (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))\n"
18899"\n"
18900"/// Unpacks the high-order (index 8-15) values from two 128-bit vectors\n"
18901"/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n"
18902"///\n"
18903"/// \\headerfile <x86intrin.h>\n"
18904"///\n"
18905"/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>\n"
18906"/// instruction.\n"
18907"///\n"
18908"/// \\param __a\n"
18909"/// A 128-bit vector of [16 x i8].\n"
18910"/// Bits [71:64] are written to bits [7:0] of the result. \\n\n"
18911"/// Bits [79:72] are written to bits [23:16] of the result. \\n\n"
18912"/// Bits [87:80] are written to bits [39:32] of the result. \\n\n"
18913"/// Bits [95:88] are written to bits [55:48] of the result. \\n\n"
18914"/// Bits [103:96] are written to bits [71:64] of the result. \\n\n"
18915"/// Bits [111:104] are written to bits [87:80] of the result. \\n\n"
18916"/// Bits [119:112] are written to bits [103:96] of the result. \\n\n"
18917"/// Bits [127:120] are written to bits [119:112] of the result.\n"
18918"/// \\param __b\n"
18919"/// A 128-bit vector of [16 x i8]. \\n\n"
18920"/// Bits [71:64] are written to bits [15:8] of the result. \\n\n"
18921"/// Bits [79:72] are written to bits [31:24] of the result. \\n\n"
18922"/// Bits [87:80] are written to bits [47:40] of the result. \\n\n"
18923"/// Bits [95:88] are written to bits [63:56] of the result. \\n\n"
18924"/// Bits [103:96] are written to bits [79:72] of the result. \\n\n"
18925"/// Bits [111:104] are written to bits [95:88] of the result. \\n\n"
18926"/// Bits [119:112] are written to bits [111:104] of the result. \\n\n"
18927"/// Bits [127:120] are written to bits [127:120] of the result.\n"
18928"/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n"
18929"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18930"_mm_unpackhi_epi8(__m128i __a, __m128i __b)\n"
18931"{\n"
18932" return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n"
18933"}\n"
18934"\n"
18935"/// Unpacks the high-order (index 4-7) values from two 128-bit vectors of\n"
18936"/// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].\n"
18937"///\n"
18938"/// \\headerfile <x86intrin.h>\n"
18939"///\n"
18940"/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>\n"
18941"/// instruction.\n"
18942"///\n"
18943"/// \\param __a\n"
18944"/// A 128-bit vector of [8 x i16].\n"
18945"/// Bits [79:64] are written to bits [15:0] of the result. \\n\n"
18946"/// Bits [95:80] are written to bits [47:32] of the result. \\n\n"
18947"/// Bits [111:96] are written to bits [79:64] of the result. \\n\n"
18948"/// Bits [127:112] are written to bits [111:96] of the result.\n"
18949"/// \\param __b\n"
18950"/// A 128-bit vector of [8 x i16].\n"
18951"/// Bits [79:64] are written to bits [31:16] of the result. \\n\n"
18952"/// Bits [95:80] are written to bits [63:48] of the result. \\n\n"
18953"/// Bits [111:96] are written to bits [95:80] of the result. \\n\n"
18954"/// Bits [127:112] are written to bits [127:112] of the result.\n"
18955"/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n"
18956"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18957"_mm_unpackhi_epi16(__m128i __a, __m128i __b)\n"
18958"{\n"
18959" return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);\n"
18960"}\n"
18961"\n"
18962"/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n"
18963"/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n"
18964"///\n"
18965"/// \\headerfile <x86intrin.h>\n"
18966"///\n"
18967"/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>\n"
18968"/// instruction.\n"
18969"///\n"
18970"/// \\param __a\n"
18971"/// A 128-bit vector of [4 x i32]. \\n\n"
18972"/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n"
18973"/// Bits [127:96] are written to bits [95:64] of the destination.\n"
18974"/// \\param __b\n"
18975"/// A 128-bit vector of [4 x i32]. \\n\n"
18976"/// Bits [95:64] are written to bits [64:32] of the destination. \\n\n"
18977"/// Bits [127:96] are written to bits [127:96] of the destination.\n"
18978"/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n"
18979"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18980"_mm_unpackhi_epi32(__m128i __a, __m128i __b)\n"
18981"{\n"
18982" return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);\n"
18983"}\n"
18984"\n"
18985"/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n"
18986"/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n"
18987"///\n"
18988"/// \\headerfile <x86intrin.h>\n"
18989"///\n"
18990"/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>\n"
18991"/// instruction.\n"
18992"///\n"
18993"/// \\param __a\n"
18994"/// A 128-bit vector of [2 x i64]. \\n\n"
18995"/// Bits [127:64] are written to bits [63:0] of the destination.\n"
18996"/// \\param __b\n"
18997"/// A 128-bit vector of [2 x i64]. \\n\n"
18998"/// Bits [127:64] are written to bits [127:64] of the destination.\n"
18999"/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n"
19000"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19001"_mm_unpackhi_epi64(__m128i __a, __m128i __b)\n"
19002"{\n"
19003" return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);\n"
19004"}\n"
19005"\n"
19006"/// Unpacks the low-order (index 0-7) values from two 128-bit vectors of\n"
19007"/// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n"
19008"///\n"
19009"/// \\headerfile <x86intrin.h>\n"
19010"///\n"
19011"/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>\n"
19012"/// instruction.\n"
19013"///\n"
19014"/// \\param __a\n"
19015"/// A 128-bit vector of [16 x i8]. \\n\n"
19016"/// Bits [7:0] are written to bits [7:0] of the result. \\n\n"
19017"/// Bits [15:8] are written to bits [23:16] of the result. \\n\n"
19018"/// Bits [23:16] are written to bits [39:32] of the result. \\n\n"
19019"/// Bits [31:24] are written to bits [55:48] of the result. \\n\n"
19020"/// Bits [39:32] are written to bits [71:64] of the result. \\n\n"
19021"/// Bits [47:40] are written to bits [87:80] of the result. \\n\n"
19022"/// Bits [55:48] are written to bits [103:96] of the result. \\n\n"
19023"/// Bits [63:56] are written to bits [119:112] of the result.\n"
19024"/// \\param __b\n"
19025"/// A 128-bit vector of [16 x i8].\n"
19026"/// Bits [7:0] are written to bits [15:8] of the result. \\n\n"
19027"/// Bits [15:8] are written to bits [31:24] of the result. \\n\n"
19028"/// Bits [23:16] are written to bits [47:40] of the result. \\n\n"
19029"/// Bits [31:24] are written to bits [63:56] of the result. \\n\n"
19030"/// Bits [39:32] are written to bits [79:72] of the result. \\n\n"
19031"/// Bits [47:40] are written to bits [95:88] of the result. \\n\n"
19032"/// Bits [55:48] are written to bits [111:104] of the result. \\n\n"
19033"/// Bits [63:56] are written to bits [127:120] of the result.\n"
19034"/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n"
19035"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19036"_mm_unpacklo_epi8(__m128i __a, __m128i __b)\n"
19037"{\n"
19038" return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);\n"
19039"}\n"
19040"\n"
19041"/// Unpacks the low-order (index 0-3) values from each of the two 128-bit\n"
19042"/// vectors of [8 x i16] and interleaves them into a 128-bit vector of\n"
19043"/// [8 x i16].\n"
19044"///\n"
19045"/// \\headerfile <x86intrin.h>\n"
19046"///\n"
19047"/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>\n"
19048"/// instruction.\n"
19049"///\n"
19050"/// \\param __a\n"
19051"/// A 128-bit vector of [8 x i16].\n"
19052"/// Bits [15:0] are written to bits [15:0] of the result. \\n\n"
19053"/// Bits [31:16] are written to bits [47:32] of the result. \\n\n"
19054"/// Bits [47:32] are written to bits [79:64] of the result. \\n\n"
19055"/// Bits [63:48] are written to bits [111:96] of the result.\n"
19056"/// \\param __b\n"
19057"/// A 128-bit vector of [8 x i16].\n"
19058"/// Bits [15:0] are written to bits [31:16] of the result. \\n\n"
19059"/// Bits [31:16] are written to bits [63:48] of the result. \\n\n"
19060"/// Bits [47:32] are written to bits [95:80] of the result. \\n\n"
19061"/// Bits [63:48] are written to bits [127:112] of the result.\n"
19062"/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n"
19063"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19064"_mm_unpacklo_epi16(__m128i __a, __m128i __b)\n"
19065"{\n"
19066" return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);\n"
19067"}\n"
19068"\n"
19069"/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n"
19070"/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n"
19071"///\n"
19072"/// \\headerfile <x86intrin.h>\n"
19073"///\n"
19074"/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>\n"
19075"/// instruction.\n"
19076"///\n"
19077"/// \\param __a\n"
19078"/// A 128-bit vector of [4 x i32]. \\n\n"
19079"/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n"
19080"/// Bits [63:32] are written to bits [95:64] of the destination.\n"
19081"/// \\param __b\n"
19082"/// A 128-bit vector of [4 x i32]. \\n\n"
19083"/// Bits [31:0] are written to bits [64:32] of the destination. \\n\n"
19084"/// Bits [63:32] are written to bits [127:96] of the destination.\n"
19085"/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n"
19086"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19087"_mm_unpacklo_epi32(__m128i __a, __m128i __b)\n"
19088"{\n"
19089" return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);\n"
19090"}\n"
19091"\n"
19092"/// Unpacks the low-order 64-bit elements from two 128-bit vectors of\n"
19093"/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n"
19094"///\n"
19095"/// \\headerfile <x86intrin.h>\n"
19096"///\n"
19097"/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>\n"
19098"/// instruction.\n"
19099"///\n"
19100"/// \\param __a\n"
19101"/// A 128-bit vector of [2 x i64]. \\n\n"
19102"/// Bits [63:0] are written to bits [63:0] of the destination. \\n\n"
19103"/// \\param __b\n"
19104"/// A 128-bit vector of [2 x i64]. \\n\n"
19105"/// Bits [63:0] are written to bits [127:64] of the destination. \\n\n"
19106"/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n"
19107"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19108"_mm_unpacklo_epi64(__m128i __a, __m128i __b)\n"
19109"{\n"
19110" return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);\n"
19111"}\n"
19112"\n"
19113"/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit\n"
19114"/// integer.\n"
19115"///\n"
19116"/// \\headerfile <x86intrin.h>\n"
19117"///\n"
19118"/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.\n"
19119"///\n"
19120"/// \\param __a\n"
19121"/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n"
19122"/// destination.\n"
19123"/// \\returns A 64-bit integer containing the lower 64 bits of the parameter.\n"
19124"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
19125"_mm_movepi64_pi64(__m128i __a)\n"
19126"{\n"
19127" return (__m64)__a[0];\n"
19128"}\n"
19129"\n"
19130"/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the\n"
19131"/// upper bits.\n"
19132"///\n"
19133"/// \\headerfile <x86intrin.h>\n"
19134"///\n"
19135"/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.\n"
19136"///\n"
19137"/// \\param __a\n"
19138"/// A 64-bit value.\n"
19139"/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n"
19140"/// the operand. The upper 64 bits are assigned zeros.\n"
19141"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19142"_mm_movpi64_epi64(__m64 __a)\n"
19143"{\n"
19144" return __extension__ (__m128i)(__v2di){ (long long)__a, 0 };\n"
19145"}\n"
19146"\n"
19147"/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit\n"
19148"/// integer vector, zeroing the upper bits.\n"
19149"///\n"
19150"/// \\headerfile <x86intrin.h>\n"
19151"///\n"
19152"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
19153"///\n"
19154"/// \\param __a\n"
19155"/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n"
19156"/// destination.\n"
19157"/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n"
19158"/// the operand. The upper 64 bits are assigned zeros.\n"
19159"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19160"_mm_move_epi64(__m128i __a)\n"
19161"{\n"
19162" return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);\n"
19163"}\n"
19164"\n"
19165"/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n"
19166"/// [2 x double] and interleaves them into a 128-bit vector of [2 x\n"
19167"/// double].\n"
19168"///\n"
19169"/// \\headerfile <x86intrin.h>\n"
19170"///\n"
19171"/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n"
19172"///\n"
19173"/// \\param __a\n"
19174"/// A 128-bit vector of [2 x double]. \\n\n"
19175"/// Bits [127:64] are written to bits [63:0] of the destination.\n"
19176"/// \\param __b\n"
19177"/// A 128-bit vector of [2 x double]. \\n\n"
19178"/// Bits [127:64] are written to bits [127:64] of the destination.\n"
19179"/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n"
19180"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19181"_mm_unpackhi_pd(__m128d __a, __m128d __b)\n"
19182"{\n"
19183" return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);\n"
19184"}\n"
19185"\n"
19186"/// Unpacks the low-order 64-bit elements from two 128-bit vectors\n"
19187"/// of [2 x double] and interleaves them into a 128-bit vector of [2 x\n"
19188"/// double].\n"
19189"///\n"
19190"/// \\headerfile <x86intrin.h>\n"
19191"///\n"
19192"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
19193"///\n"
19194"/// \\param __a\n"
19195"/// A 128-bit vector of [2 x double]. \\n\n"
19196"/// Bits [63:0] are written to bits [63:0] of the destination.\n"
19197"/// \\param __b\n"
19198"/// A 128-bit vector of [2 x double]. \\n\n"
19199"/// Bits [63:0] are written to bits [127:64] of the destination.\n"
19200"/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n"
19201"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19202"_mm_unpacklo_pd(__m128d __a, __m128d __b)\n"
19203"{\n"
19204" return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);\n"
19205"}\n"
19206"\n"
19207"/// Extracts the sign bits of the double-precision values in the 128-bit\n"
19208"/// vector of [2 x double], zero-extends the value, and writes it to the\n"
19209"/// low-order bits of the destination.\n"
19210"///\n"
19211"/// \\headerfile <x86intrin.h>\n"
19212"///\n"
19213"/// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.\n"
19214"///\n"
19215"/// \\param __a\n"
19216"/// A 128-bit vector of [2 x double] containing the values with sign bits to\n"
19217"/// be extracted.\n"
19218"/// \\returns The sign bits from each of the double-precision elements in \\a __a,\n"
19219"/// written to bits [1:0]. The remaining bits are assigned values of zero.\n"
19220"static __inline__ int __DEFAULT_FN_ATTRS\n"
19221"_mm_movemask_pd(__m128d __a)\n"
19222"{\n"
19223" return __builtin_ia32_movmskpd((__v2df)__a);\n"
19224"}\n"
19225"\n"
19226"\n"
19227"/// Constructs a 128-bit floating-point vector of [2 x double] from two\n"
19228"/// 128-bit vector parameters of [2 x double], using the immediate-value\n"
19229"/// parameter as a specifier.\n"
19230"///\n"
19231"/// \\headerfile <x86intrin.h>\n"
19232"///\n"
19233"/// \\code\n"
19234"/// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);\n"
19235"/// \\endcode\n"
19236"///\n"
19237"/// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.\n"
19238"///\n"
19239"/// \\param a\n"
19240"/// A 128-bit vector of [2 x double].\n"
19241"/// \\param b\n"
19242"/// A 128-bit vector of [2 x double].\n"
19243"/// \\param i\n"
19244"/// An 8-bit immediate value. The least significant two bits specify which\n"
19245"/// elements to copy from \\a a and \\a b: \\n\n"
19246"/// Bit[0] = 0: lower element of \\a a copied to lower element of result. \\n\n"
19247"/// Bit[0] = 1: upper element of \\a a copied to lower element of result. \\n\n"
19248"/// Bit[1] = 0: lower element of \\a b copied to upper element of result. \\n\n"
19249"/// Bit[1] = 1: upper element of \\a b copied to upper element of result. \\n\n"
19250"/// \\returns A 128-bit vector of [2 x double] containing the shuffled values.\n"
19251"#define _mm_shuffle_pd(a, b, i) \\\n"
19252" (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \\\n"
19253" (int)(i))\n"
19254"\n"
19255"/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n"
19256"/// floating-point vector of [4 x float].\n"
19257"///\n"
19258"/// \\headerfile <x86intrin.h>\n"
19259"///\n"
19260"/// This intrinsic has no corresponding instruction.\n"
19261"///\n"
19262"/// \\param __a\n"
19263"/// A 128-bit floating-point vector of [2 x double].\n"
19264"/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n"
19265"/// bitwise pattern as the parameter.\n"
19266"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
19267"_mm_castpd_ps(__m128d __a)\n"
19268"{\n"
19269" return (__m128)__a;\n"
19270"}\n"
19271"\n"
19272"/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n"
19273"/// integer vector.\n"
19274"///\n"
19275"/// \\headerfile <x86intrin.h>\n"
19276"///\n"
19277"/// This intrinsic has no corresponding instruction.\n"
19278"///\n"
19279"/// \\param __a\n"
19280"/// A 128-bit floating-point vector of [2 x double].\n"
19281"/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n"
19282"/// parameter.\n"
19283"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19284"_mm_castpd_si128(__m128d __a)\n"
19285"{\n"
19286" return (__m128i)__a;\n"
19287"}\n"
19288"\n"
19289"/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n"
19290"/// floating-point vector of [2 x double].\n"
19291"///\n"
19292"/// \\headerfile <x86intrin.h>\n"
19293"///\n"
19294"/// This intrinsic has no corresponding instruction.\n"
19295"///\n"
19296"/// \\param __a\n"
19297"/// A 128-bit floating-point vector of [4 x float].\n"
19298"/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n"
19299"/// bitwise pattern as the parameter.\n"
19300"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19301"_mm_castps_pd(__m128 __a)\n"
19302"{\n"
19303" return (__m128d)__a;\n"
19304"}\n"
19305"\n"
19306"/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n"
19307"/// integer vector.\n"
19308"///\n"
19309"/// \\headerfile <x86intrin.h>\n"
19310"///\n"
19311"/// This intrinsic has no corresponding instruction.\n"
19312"///\n"
19313"/// \\param __a\n"
19314"/// A 128-bit floating-point vector of [4 x float].\n"
19315"/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n"
19316"/// parameter.\n"
19317"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19318"_mm_castps_si128(__m128 __a)\n"
19319"{\n"
19320" return (__m128i)__a;\n"
19321"}\n"
19322"\n"
19323"/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n"
19324"/// of [4 x float].\n"
19325"///\n"
19326"/// \\headerfile <x86intrin.h>\n"
19327"///\n"
19328"/// This intrinsic has no corresponding instruction.\n"
19329"///\n"
19330"/// \\param __a\n"
19331"/// A 128-bit integer vector.\n"
19332"/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n"
19333"/// bitwise pattern as the parameter.\n"
19334"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
19335"_mm_castsi128_ps(__m128i __a)\n"
19336"{\n"
19337" return (__m128)__a;\n"
19338"}\n"
19339"\n"
19340"/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n"
19341"/// of [2 x double].\n"
19342"///\n"
19343"/// \\headerfile <x86intrin.h>\n"
19344"///\n"
19345"/// This intrinsic has no corresponding instruction.\n"
19346"///\n"
19347"/// \\param __a\n"
19348"/// A 128-bit integer vector.\n"
19349"/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n"
19350"/// bitwise pattern as the parameter.\n"
19351"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19352"_mm_castsi128_pd(__m128i __a)\n"
19353"{\n"
19354" return (__m128d)__a;\n"
19355"}\n"
19356"\n"
19357"#if defined(__cplusplus)\n"
19358"extern \"C\" {\n"
19359"#endif\n"
19360"\n"
19361"/// Indicates that a spin loop is being executed for the purposes of\n"
19362"/// optimizing power consumption during the loop.\n"
19363"///\n"
19364"/// \\headerfile <x86intrin.h>\n"
19365"///\n"
19366"/// This intrinsic corresponds to the <c> PAUSE </c> instruction.\n"
19367"///\n"
19368"void _mm_pause(void);\n"
19369"\n"
19370"#if defined(__cplusplus)\n"
19371"} // extern \"C\"\n"
19372"#endif\n"
19373"#undef __DEFAULT_FN_ATTRS\n"
19374"#undef __DEFAULT_FN_ATTRS_MMX\n"
19375"\n"
19376"#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))\n"
19377"\n"
19378"#define _MM_DENORMALS_ZERO_ON (0x0040)\n"
19379"#define _MM_DENORMALS_ZERO_OFF (0x0000)\n"
19380"\n"
19381"#define _MM_DENORMALS_ZERO_MASK (0x0040)\n"
19382"\n"
19383"#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)\n"
19384"#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))\n"
19385"\n"
19386"#endif /* __EMMINTRIN_H */\n"
19387"" } ,
19388 { "/builtins/f16cintrin.h" , "/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===\n"
19389" *\n"
19390" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19391" * of this software and associated documentation files (the \"Software\"), to deal\n"
19392" * in the Software without restriction, including without limitation the rights\n"
19393" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19394" * copies of the Software, and to permit persons to whom the Software is\n"
19395" * furnished to do so, subject to the following conditions:\n"
19396" *\n"
19397" * The above copyright notice and this permission notice shall be included in\n"
19398" * all copies or substantial portions of the Software.\n"
19399" *\n"
19400" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19401" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19402" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19403" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19404" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19405" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19406" * THE SOFTWARE.\n"
19407" *\n"
19408" *===-----------------------------------------------------------------------===\n"
19409" */\n"
19410"\n"
19411"#if !defined __IMMINTRIN_H\n"
19412"#error \"Never use <f16cintrin.h> directly; include <immintrin.h> instead.\"\n"
19413"#endif\n"
19414"\n"
19415"#ifndef __F16CINTRIN_H\n"
19416"#define __F16CINTRIN_H\n"
19417"\n"
19418"/* Define the default attributes for the functions in this file. */\n"
19419"#define __DEFAULT_FN_ATTRS128 \\\n"
19420" __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(128)))\n"
19421"#define __DEFAULT_FN_ATTRS256 \\\n"
19422" __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(256)))\n"
19423"\n"
19424"/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,\n"
19425" * but that's because icc can emulate these without f16c using a library call.\n"
19426" * Since we don't do that let's leave these in f16cintrin.h.\n"
19427" */\n"
19428"\n"
19429"/// Converts a 16-bit half-precision float value into a 32-bit float\n"
19430"/// value.\n"
19431"///\n"
19432"/// \\headerfile <x86intrin.h>\n"
19433"///\n"
19434"/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n"
19435"///\n"
19436"/// \\param __a\n"
19437"/// A 16-bit half-precision float value.\n"
19438"/// \\returns The converted 32-bit float value.\n"
19439"static __inline float __DEFAULT_FN_ATTRS128\n"
19440"_cvtsh_ss(unsigned short __a)\n"
19441"{\n"
19442" __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};\n"
19443" __v4sf r = __builtin_ia32_vcvtph2ps(v);\n"
19444" return r[0];\n"
19445"}\n"
19446"\n"
19447"/// Converts a 32-bit single-precision float value to a 16-bit\n"
19448"/// half-precision float value.\n"
19449"///\n"
19450"/// \\headerfile <x86intrin.h>\n"
19451"///\n"
19452"/// \\code\n"
19453"/// unsigned short _cvtss_sh(float a, const int imm);\n"
19454"/// \\endcode\n"
19455"///\n"
19456"/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n"
19457"///\n"
19458"/// \\param a\n"
19459"/// A 32-bit single-precision float value to be converted to a 16-bit\n"
19460"/// half-precision float value.\n"
19461"/// \\param imm\n"
19462"/// An immediate value controlling rounding using bits [2:0]: \\n\n"
19463"/// 000: Nearest \\n\n"
19464"/// 001: Down \\n\n"
19465"/// 010: Up \\n\n"
19466"/// 011: Truncate \\n\n"
19467"/// 1XX: Use MXCSR.RC for rounding\n"
19468"/// \\returns The converted 16-bit half-precision float value.\n"
19469"#define _cvtss_sh(a, imm) \\\n"
19470" (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \\\n"
19471" (imm)))[0])\n"
19472"\n"
19473"/// Converts a 128-bit vector containing 32-bit float values into a\n"
19474"/// 128-bit vector containing 16-bit half-precision float values.\n"
19475"///\n"
19476"/// \\headerfile <x86intrin.h>\n"
19477"///\n"
19478"/// \\code\n"
19479"/// __m128i _mm_cvtps_ph(__m128 a, const int imm);\n"
19480"/// \\endcode\n"
19481"///\n"
19482"/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n"
19483"///\n"
19484"/// \\param a\n"
19485"/// A 128-bit vector containing 32-bit float values.\n"
19486"/// \\param imm\n"
19487"/// An immediate value controlling rounding using bits [2:0]: \\n\n"
19488"/// 000: Nearest \\n\n"
19489"/// 001: Down \\n\n"
19490"/// 010: Up \\n\n"
19491"/// 011: Truncate \\n\n"
19492"/// 1XX: Use MXCSR.RC for rounding\n"
19493"/// \\returns A 128-bit vector containing converted 16-bit half-precision float\n"
19494"/// values. The lower 64 bits are used to store the converted 16-bit\n"
19495"/// half-precision floating-point values.\n"
19496"#define _mm_cvtps_ph(a, imm) \\\n"
19497" (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))\n"
19498"\n"
19499"/// Converts a 128-bit vector containing 16-bit half-precision float\n"
19500"/// values into a 128-bit vector containing 32-bit float values.\n"
19501"///\n"
19502"/// \\headerfile <x86intrin.h>\n"
19503"///\n"
19504"/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n"
19505"///\n"
19506"/// \\param __a\n"
19507"/// A 128-bit vector containing 16-bit half-precision float values. The lower\n"
19508"/// 64 bits are used in the conversion.\n"
19509"/// \\returns A 128-bit vector of [4 x float] containing converted float values.\n"
19510"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
19511"_mm_cvtph_ps(__m128i __a)\n"
19512"{\n"
19513" return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);\n"
19514"}\n"
19515"\n"
19516"/// Converts a 256-bit vector of [8 x float] into a 128-bit vector\n"
19517"/// containing 16-bit half-precision float values.\n"
19518"///\n"
19519"/// \\headerfile <x86intrin.h>\n"
19520"///\n"
19521"/// \\code\n"
19522"/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);\n"
19523"/// \\endcode\n"
19524"///\n"
19525"/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n"
19526"///\n"
19527"/// \\param a\n"
19528"/// A 256-bit vector containing 32-bit single-precision float values to be\n"
19529"/// converted to 16-bit half-precision float values.\n"
19530"/// \\param imm\n"
19531"/// An immediate value controlling rounding using bits [2:0]: \\n\n"
19532"/// 000: Nearest \\n\n"
19533"/// 001: Down \\n\n"
19534"/// 010: Up \\n\n"
19535"/// 011: Truncate \\n\n"
19536"/// 1XX: Use MXCSR.RC for rounding\n"
19537"/// \\returns A 128-bit vector containing the converted 16-bit half-precision\n"
19538"/// float values.\n"
19539"#define _mm256_cvtps_ph(a, imm) \\\n"
19540" (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))\n"
19541"\n"
19542"/// Converts a 128-bit vector containing 16-bit half-precision float\n"
19543"/// values into a 256-bit vector of [8 x float].\n"
19544"///\n"
19545"/// \\headerfile <x86intrin.h>\n"
19546"///\n"
19547"/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n"
19548"///\n"
19549"/// \\param __a\n"
19550"/// A 128-bit vector containing 16-bit half-precision float values to be\n"
19551"/// converted to 32-bit single-precision float values.\n"
19552"/// \\returns A vector of [8 x float] containing the converted 32-bit\n"
19553"/// single-precision float values.\n"
19554"static __inline __m256 __DEFAULT_FN_ATTRS256\n"
19555"_mm256_cvtph_ps(__m128i __a)\n"
19556"{\n"
19557" return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);\n"
19558"}\n"
19559"\n"
19560"#undef __DEFAULT_FN_ATTRS128\n"
19561"#undef __DEFAULT_FN_ATTRS256\n"
19562"\n"
19563"#endif /* __F16CINTRIN_H */\n"
19564"" } ,
19565 { "/builtins/float.h" , "/*===---- float.h - Characteristics of floating point types ----------------===\n"
19566" *\n"
19567" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19568" * of this software and associated documentation files (the \"Software\"), to deal\n"
19569" * in the Software without restriction, including without limitation the rights\n"
19570" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19571" * copies of the Software, and to permit persons to whom the Software is\n"
19572" * furnished to do so, subject to the following conditions:\n"
19573" *\n"
19574" * The above copyright notice and this permission notice shall be included in\n"
19575" * all copies or substantial portions of the Software.\n"
19576" *\n"
19577" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19578" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19579" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19580" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19581" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19582" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19583" * THE SOFTWARE.\n"
19584" *\n"
19585" *===-----------------------------------------------------------------------===\n"
19586" */\n"
19587"\n"
19588"#ifndef __CLANG_FLOAT_H\n"
19589"#define __CLANG_FLOAT_H\n"
19590"\n"
19591"/* If we're on MinGW, fall back to the system's float.h, which might have\n"
19592" * additional definitions provided for Windows.\n"
19593" * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx\n"
19594" *\n"
19595" * Also fall back on Darwin to allow additional definitions and\n"
19596" * implementation-defined values.\n"
19597" */\n"
19598"#if (defined(__APPLE__) || (defined(__MINGW32__) || defined(_MSC_VER))) && \\\n"
19599" __STDC_HOSTED__ && __has_include_next(<float.h>)\n"
19600"\n"
19601"/* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level\n"
19602" * of #include_next<float.h> to keep Metrowerks compilers happy. Avoid this\n"
19603" * extra indirection.\n"
19604" */\n"
19605"#ifdef __APPLE__\n"
19606"#define _FLOAT_H_\n"
19607"#endif\n"
19608"\n"
19609"# include_next <float.h>\n"
19610"\n"
19611"/* Undefine anything that we'll be redefining below. */\n"
19612"# undef FLT_EVAL_METHOD\n"
19613"# undef FLT_ROUNDS\n"
19614"# undef FLT_RADIX\n"
19615"# undef FLT_MANT_DIG\n"
19616"# undef DBL_MANT_DIG\n"
19617"# undef LDBL_MANT_DIG\n"
19618"# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n"
19619"# undef DECIMAL_DIG\n"
19620"# endif\n"
19621"# undef FLT_DIG\n"
19622"# undef DBL_DIG\n"
19623"# undef LDBL_DIG\n"
19624"# undef FLT_MIN_EXP\n"
19625"# undef DBL_MIN_EXP\n"
19626"# undef LDBL_MIN_EXP\n"
19627"# undef FLT_MIN_10_EXP\n"
19628"# undef DBL_MIN_10_EXP\n"
19629"# undef LDBL_MIN_10_EXP\n"
19630"# undef FLT_MAX_EXP\n"
19631"# undef DBL_MAX_EXP\n"
19632"# undef LDBL_MAX_EXP\n"
19633"# undef FLT_MAX_10_EXP\n"
19634"# undef DBL_MAX_10_EXP\n"
19635"# undef LDBL_MAX_10_EXP\n"
19636"# undef FLT_MAX\n"
19637"# undef DBL_MAX\n"
19638"# undef LDBL_MAX\n"
19639"# undef FLT_EPSILON\n"
19640"# undef DBL_EPSILON\n"
19641"# undef LDBL_EPSILON\n"
19642"# undef FLT_MIN\n"
19643"# undef DBL_MIN\n"
19644"# undef LDBL_MIN\n"
19645"# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n"
19646"# undef FLT_TRUE_MIN\n"
19647"# undef DBL_TRUE_MIN\n"
19648"# undef LDBL_TRUE_MIN\n"
19649"# undef FLT_DECIMAL_DIG\n"
19650"# undef DBL_DECIMAL_DIG\n"
19651"# undef LDBL_DECIMAL_DIG\n"
19652"# undef FLT_HAS_SUBNORM\n"
19653"# undef DBL_HAS_SUBNORM\n"
19654"# undef LDBL_HAS_SUBNORM\n"
19655"# endif\n"
19656"#endif\n"
19657"\n"
19658"/* Characteristics of floating point types, C99 5.2.4.2.2 */\n"
19659"\n"
19660"#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__\n"
19661"#define FLT_ROUNDS (__builtin_flt_rounds())\n"
19662"#define FLT_RADIX __FLT_RADIX__\n"
19663"\n"
19664"#define FLT_MANT_DIG __FLT_MANT_DIG__\n"
19665"#define DBL_MANT_DIG __DBL_MANT_DIG__\n"
19666"#define LDBL_MANT_DIG __LDBL_MANT_DIG__\n"
19667"\n"
19668"#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n"
19669"# define DECIMAL_DIG __DECIMAL_DIG__\n"
19670"#endif\n"
19671"\n"
19672"#define FLT_DIG __FLT_DIG__\n"
19673"#define DBL_DIG __DBL_DIG__\n"
19674"#define LDBL_DIG __LDBL_DIG__\n"
19675"\n"
19676"#define FLT_MIN_EXP __FLT_MIN_EXP__\n"
19677"#define DBL_MIN_EXP __DBL_MIN_EXP__\n"
19678"#define LDBL_MIN_EXP __LDBL_MIN_EXP__\n"
19679"\n"
19680"#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__\n"
19681"#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__\n"
19682"#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__\n"
19683"\n"
19684"#define FLT_MAX_EXP __FLT_MAX_EXP__\n"
19685"#define DBL_MAX_EXP __DBL_MAX_EXP__\n"
19686"#define LDBL_MAX_EXP __LDBL_MAX_EXP__\n"
19687"\n"
19688"#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__\n"
19689"#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__\n"
19690"#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__\n"
19691"\n"
19692"#define FLT_MAX __FLT_MAX__\n"
19693"#define DBL_MAX __DBL_MAX__\n"
19694"#define LDBL_MAX __LDBL_MAX__\n"
19695"\n"
19696"#define FLT_EPSILON __FLT_EPSILON__\n"
19697"#define DBL_EPSILON __DBL_EPSILON__\n"
19698"#define LDBL_EPSILON __LDBL_EPSILON__\n"
19699"\n"
19700"#define FLT_MIN __FLT_MIN__\n"
19701"#define DBL_MIN __DBL_MIN__\n"
19702"#define LDBL_MIN __LDBL_MIN__\n"
19703"\n"
19704"#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n"
19705"# define FLT_TRUE_MIN __FLT_DENORM_MIN__\n"
19706"# define DBL_TRUE_MIN __DBL_DENORM_MIN__\n"
19707"# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__\n"
19708"# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__\n"
19709"# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__\n"
19710"# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__\n"
19711"# define FLT_HAS_SUBNORM __FLT_HAS_DENORM__\n"
19712"# define DBL_HAS_SUBNORM __DBL_HAS_DENORM__\n"
19713"# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__\n"
19714"#endif\n"
19715"\n"
19716"#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__\n"
19717"# define FLT16_MANT_DIG __FLT16_MANT_DIG__\n"
19718"# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__\n"
19719"# define FLT16_DIG __FLT16_DIG__\n"
19720"# define FLT16_MIN_EXP __FLT16_MIN_EXP__\n"
19721"# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__\n"
19722"# define FLT16_MAX_EXP __FLT16_MAX_EXP__\n"
19723"# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__\n"
19724"# define FLT16_MAX __FLT16_MAX__\n"
19725"# define FLT16_EPSILON __FLT16_EPSILON__\n"
19726"# define FLT16_MIN __FLT16_MIN__\n"
19727"# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__\n"
19728"#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */\n"
19729"\n"
19730"#endif /* __CLANG_FLOAT_H */\n"
19731"" } ,
19732 { "/builtins/fma4intrin.h" , "/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===\n"
19733" *\n"
19734" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19735" * of this software and associated documentation files (the \"Software\"), to deal\n"
19736" * in the Software without restriction, including without limitation the rights\n"
19737" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19738" * copies of the Software, and to permit persons to whom the Software is\n"
19739" * furnished to do so, subject to the following conditions:\n"
19740" *\n"
19741" * The above copyright notice and this permission notice shall be included in\n"
19742" * all copies or substantial portions of the Software.\n"
19743" *\n"
19744" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19745" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19746" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19747" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19748" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19749" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19750" * THE SOFTWARE.\n"
19751" *\n"
19752" *===-----------------------------------------------------------------------===\n"
19753" */\n"
19754"\n"
19755"#ifndef __X86INTRIN_H\n"
19756"#error \"Never use <fma4intrin.h> directly; include <x86intrin.h> instead.\"\n"
19757"#endif\n"
19758"\n"
19759"#ifndef __FMA4INTRIN_H\n"
19760"#define __FMA4INTRIN_H\n"
19761"\n"
19762"#include <pmmintrin.h>\n"
19763"\n"
19764"/* Define the default attributes for the functions in this file. */\n"
19765"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(128)))\n"
19766"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(256)))\n"
19767"\n"
19768"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19769"_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19770"{\n"
19771" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19772"}\n"
19773"\n"
19774"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19775"_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19776"{\n"
19777" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19778"}\n"
19779"\n"
19780"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19781"_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19782"{\n"
19783" return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19784"}\n"
19785"\n"
19786"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19787"_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19788"{\n"
19789" return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19790"}\n"
19791"\n"
19792"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19793"_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19794"{\n"
19795" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19796"}\n"
19797"\n"
19798"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19799"_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19800"{\n"
19801" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19802"}\n"
19803"\n"
19804"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19805"_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19806"{\n"
19807" return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19808"}\n"
19809"\n"
19810"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19811"_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19812"{\n"
19813" return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19814"}\n"
19815"\n"
19816"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19817"_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19818"{\n"
19819" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19820"}\n"
19821"\n"
19822"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19823"_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19824"{\n"
19825" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19826"}\n"
19827"\n"
19828"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19829"_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19830"{\n"
19831" return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19832"}\n"
19833"\n"
19834"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19835"_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19836"{\n"
19837" return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19838"}\n"
19839"\n"
19840"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19841"_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19842"{\n"
19843" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19844"}\n"
19845"\n"
19846"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19847"_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19848"{\n"
19849" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19850"}\n"
19851"\n"
19852"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19853"_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19854"{\n"
19855" return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19856"}\n"
19857"\n"
19858"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19859"_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19860"{\n"
19861" return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19862"}\n"
19863"\n"
19864"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19865"_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19866"{\n"
19867" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19868"}\n"
19869"\n"
19870"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19871"_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19872"{\n"
19873" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19874"}\n"
19875"\n"
19876"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19877"_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19878"{\n"
19879" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19880"}\n"
19881"\n"
19882"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19883"_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19884"{\n"
19885" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19886"}\n"
19887"\n"
19888"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19889"_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19890"{\n"
19891" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
19892"}\n"
19893"\n"
19894"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19895"_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19896"{\n"
19897" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
19898"}\n"
19899"\n"
19900"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19901"_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19902"{\n"
19903" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
19904"}\n"
19905"\n"
19906"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19907"_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19908"{\n"
19909" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
19910"}\n"
19911"\n"
19912"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19913"_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19914"{\n"
19915" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
19916"}\n"
19917"\n"
19918"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19919"_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19920"{\n"
19921" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
19922"}\n"
19923"\n"
19924"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19925"_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19926"{\n"
19927" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
19928"}\n"
19929"\n"
19930"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19931"_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19932"{\n"
19933" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
19934"}\n"
19935"\n"
19936"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19937"_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19938"{\n"
19939" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
19940"}\n"
19941"\n"
19942"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19943"_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19944"{\n"
19945" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
19946"}\n"
19947"\n"
19948"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19949"_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19950"{\n"
19951" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
19952"}\n"
19953"\n"
19954"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19955"_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19956"{\n"
19957" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
19958"}\n"
19959"\n"
19960"#undef __DEFAULT_FN_ATTRS128\n"
19961"#undef __DEFAULT_FN_ATTRS256\n"
19962"\n"
19963"#endif /* __FMA4INTRIN_H */\n"
19964"" } ,
19965 { "/builtins/fmaintrin.h" , "/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===\n"
19966" *\n"
19967" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19968" * of this software and associated documentation files (the \"Software\"), to deal\n"
19969" * in the Software without restriction, including without limitation the rights\n"
19970" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19971" * copies of the Software, and to permit persons to whom the Software is\n"
19972" * furnished to do so, subject to the following conditions:\n"
19973" *\n"
19974" * The above copyright notice and this permission notice shall be included in\n"
19975" * all copies or substantial portions of the Software.\n"
19976" *\n"
19977" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19978" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19979" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19980" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19981" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19982" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19983" * THE SOFTWARE.\n"
19984" *\n"
19985" *===-----------------------------------------------------------------------===\n"
19986" */\n"
19987"\n"
19988"#ifndef __IMMINTRIN_H\n"
19989"#error \"Never use <fmaintrin.h> directly; include <immintrin.h> instead.\"\n"
19990"#endif\n"
19991"\n"
19992"#ifndef __FMAINTRIN_H\n"
19993"#define __FMAINTRIN_H\n"
19994"\n"
19995"/* Define the default attributes for the functions in this file. */\n"
19996"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(128)))\n"
19997"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(256)))\n"
19998"\n"
19999"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20000"_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20001"{\n"
20002" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
20003"}\n"
20004"\n"
20005"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20006"_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20007"{\n"
20008" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
20009"}\n"
20010"\n"
20011"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20012"_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n"
20013"{\n"
20014" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
20015"}\n"
20016"\n"
20017"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20018"_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n"
20019"{\n"
20020" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
20021"}\n"
20022"\n"
20023"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20024"_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20025"{\n"
20026" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
20027"}\n"
20028"\n"
20029"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20030"_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20031"{\n"
20032" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
20033"}\n"
20034"\n"
20035"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20036"_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
20037"{\n"
20038" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
20039"}\n"
20040"\n"
20041"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20042"_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
20043"{\n"
20044" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
20045"}\n"
20046"\n"
20047"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20048"_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20049"{\n"
20050" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
20051"}\n"
20052"\n"
20053"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20054"_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20055"{\n"
20056" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
20057"}\n"
20058"\n"
20059"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20060"_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n"
20061"{\n"
20062" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);\n"
20063"}\n"
20064"\n"
20065"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20066"_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n"
20067"{\n"
20068" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);\n"
20069"}\n"
20070"\n"
20071"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20072"_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20073"{\n"
20074" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
20075"}\n"
20076"\n"
20077"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20078"_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20079"{\n"
20080" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
20081"}\n"
20082"\n"
20083"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20084"_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
20085"{\n"
20086" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);\n"
20087"}\n"
20088"\n"
20089"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20090"_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
20091"{\n"
20092" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);\n"
20093"}\n"
20094"\n"
20095"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20096"_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20097"{\n"
20098" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
20099"}\n"
20100"\n"
20101"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20102"_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20103"{\n"
20104" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
20105"}\n"
20106"\n"
20107"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20108"_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20109"{\n"
20110" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
20111"}\n"
20112"\n"
20113"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20114"_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20115"{\n"
20116" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
20117"}\n"
20118"\n"
20119"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20120"_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20121"{\n"
20122" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
20123"}\n"
20124"\n"
20125"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20126"_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20127"{\n"
20128" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
20129"}\n"
20130"\n"
20131"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20132"_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20133"{\n"
20134" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
20135"}\n"
20136"\n"
20137"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20138"_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20139"{\n"
20140" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
20141"}\n"
20142"\n"
20143"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20144"_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20145"{\n"
20146" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
20147"}\n"
20148"\n"
20149"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20150"_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20151"{\n"
20152" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
20153"}\n"
20154"\n"
20155"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20156"_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20157"{\n"
20158" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
20159"}\n"
20160"\n"
20161"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20162"_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20163"{\n"
20164" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
20165"}\n"
20166"\n"
20167"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20168"_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20169"{\n"
20170" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
20171"}\n"
20172"\n"
20173"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20174"_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20175"{\n"
20176" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
20177"}\n"
20178"\n"
20179"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20180"_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20181"{\n"
20182" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
20183"}\n"
20184"\n"
20185"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20186"_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20187"{\n"
20188" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
20189"}\n"
20190"\n"
20191"#undef __DEFAULT_FN_ATTRS128\n"
20192"#undef __DEFAULT_FN_ATTRS256\n"
20193"\n"
20194"#endif /* __FMAINTRIN_H */\n"
20195"" } ,
20196 { "/builtins/fxsrintrin.h" , "/*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------===\n"
20197" *\n"
20198" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20199" * of this software and associated documentation files (the \"Software\"), to deal\n"
20200" * in the Software without restriction, including without limitation the rights\n"
20201" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20202" * copies of the Software, and to permit persons to whom the Software is\n"
20203" * furnished to do so, subject to the following conditions:\n"
20204" *\n"
20205" * The above copyright notice and this permission notice shall be included in\n"
20206" * all copies or substantial portions of the Software.\n"
20207" *\n"
20208" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20209" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20210" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20211" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20212" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20213" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20214" * THE SOFTWARE.\n"
20215" *\n"
20216" *===-----------------------------------------------------------------------===\n"
20217" */\n"
20218"\n"
20219"#ifndef __IMMINTRIN_H\n"
20220"#error \"Never use <fxsrintrin.h> directly; include <immintrin.h> instead.\"\n"
20221"#endif\n"
20222"\n"
20223"#ifndef __FXSRINTRIN_H\n"
20224"#define __FXSRINTRIN_H\n"
20225"\n"
20226"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"fxsr\")))\n"
20227"\n"
20228"/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n"
20229"/// memory region pointed to by the input parameter \\a __p.\n"
20230"///\n"
20231"/// \\headerfile <x86intrin.h>\n"
20232"///\n"
20233"/// This intrinsic corresponds to the <c> FXSAVE </c> instruction.\n"
20234"///\n"
20235"/// \\param __p\n"
20236"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20237"/// region should be aligned on a 16-byte boundary.\n"
20238"static __inline__ void __DEFAULT_FN_ATTRS\n"
20239"_fxsave(void *__p)\n"
20240"{\n"
20241" __builtin_ia32_fxsave(__p);\n"
20242"}\n"
20243"\n"
20244"/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n"
20245"/// memory region pointed to by the input parameter \\a __p. The contents of\n"
20246"/// this memory region should have been written to by a previous \\c _fxsave\n"
20247"/// or \\c _fxsave64 intrinsic.\n"
20248"///\n"
20249"/// \\headerfile <x86intrin.h>\n"
20250"///\n"
20251"/// This intrinsic corresponds to the <c> FXRSTOR </c> instruction.\n"
20252"///\n"
20253"/// \\param __p\n"
20254"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20255"/// region should be aligned on a 16-byte boundary.\n"
20256"static __inline__ void __DEFAULT_FN_ATTRS\n"
20257"_fxrstor(void *__p)\n"
20258"{\n"
20259" __builtin_ia32_fxrstor(__p);\n"
20260"}\n"
20261"\n"
20262"#ifdef __x86_64__\n"
20263"/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n"
20264"/// memory region pointed to by the input parameter \\a __p.\n"
20265"///\n"
20266"/// \\headerfile <x86intrin.h>\n"
20267"///\n"
20268"/// This intrinsic corresponds to the <c> FXSAVE64 </c> instruction.\n"
20269"///\n"
20270"/// \\param __p\n"
20271"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20272"/// region should be aligned on a 16-byte boundary.\n"
20273"static __inline__ void __DEFAULT_FN_ATTRS\n"
20274"_fxsave64(void *__p)\n"
20275"{\n"
20276" __builtin_ia32_fxsave64(__p);\n"
20277"}\n"
20278"\n"
20279"/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n"
20280"/// memory region pointed to by the input parameter \\a __p. The contents of\n"
20281"/// this memory region should have been written to by a previous \\c _fxsave\n"
20282"/// or \\c _fxsave64 intrinsic.\n"
20283"///\n"
20284"/// \\headerfile <x86intrin.h>\n"
20285"///\n"
20286"/// This intrinsic corresponds to the <c> FXRSTOR64 </c> instruction.\n"
20287"///\n"
20288"/// \\param __p\n"
20289"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20290"/// region should be aligned on a 16-byte boundary.\n"
20291"static __inline__ void __DEFAULT_FN_ATTRS\n"
20292"_fxrstor64(void *__p)\n"
20293"{\n"
20294" __builtin_ia32_fxrstor64(__p);\n"
20295"}\n"
20296"#endif\n"
20297"\n"
20298"#undef __DEFAULT_FN_ATTRS\n"
20299"\n"
20300"#endif\n"
20301"" } ,
20302 { "/builtins/gfniintrin.h" , "/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===\n"
20303" *\n"
20304" *\n"
20305" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20306" * of this software and associated documentation files (the \"Software\"), to deal\n"
20307" * in the Software without restriction, including without limitation the rights\n"
20308" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20309" * copies of the Software, and to permit persons to whom the Software is\n"
20310" * furnished to do so, subject to the following conditions:\n"
20311" *\n"
20312" * The above copyright notice and this permission notice shall be included in\n"
20313" * all copies or substantial portions of the Software.\n"
20314" *\n"
20315" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20316" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20317" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20318" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20319" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20320" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20321" * THE SOFTWARE.\n"
20322" *\n"
20323" *===-----------------------------------------------------------------------===\n"
20324" */\n"
20325"#ifndef __IMMINTRIN_H\n"
20326"#error \"Never use <gfniintrin.h> directly; include <immintrin.h> instead.\"\n"
20327"#endif\n"
20328"\n"
20329"#ifndef __GFNIINTRIN_H\n"
20330"#define __GFNIINTRIN_H\n"
20331"\n"
20332"\n"
20333"#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \\\n"
20334" (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \\\n"
20335" (__v16qi)(__m128i)(B), \\\n"
20336" (char)(I))\n"
20337"\n"
20338"#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n"
20339" (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n"
20340" (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \\\n"
20341" (__v16qi)(__m128i)(S))\n"
20342"\n"
20343"\n"
20344"#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n"
20345" (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n"
20346" U, A, B, I)\n"
20347"\n"
20348"\n"
20349"#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \\\n"
20350" (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \\\n"
20351" (__v32qi)(__m256i)(B), \\\n"
20352" (char)(I))\n"
20353"\n"
20354"#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n"
20355" (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n"
20356" (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \\\n"
20357" (__v32qi)(__m256i)(S))\n"
20358"\n"
20359"#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n"
20360" (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n"
20361" U, A, B, I)\n"
20362"\n"
20363"\n"
20364"#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \\\n"
20365" (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \\\n"
20366" (__v64qi)(__m512i)(B), \\\n"
20367" (char)(I))\n"
20368"\n"
20369"#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n"
20370" (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n"
20371" (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \\\n"
20372" (__v64qi)(__m512i)(S))\n"
20373"\n"
20374"#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n"
20375" (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n"
20376" U, A, B, I)\n"
20377"\n"
20378"#define _mm_gf2p8affine_epi64_epi8(A, B, I) \\\n"
20379" (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \\\n"
20380" (__v16qi)(__m128i)(B), \\\n"
20381" (char)(I))\n"
20382"\n"
20383"#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n"
20384" (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n"
20385" (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \\\n"
20386" (__v16qi)(__m128i)(S))\n"
20387"\n"
20388"\n"
20389"#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n"
20390" (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n"
20391" U, A, B, I)\n"
20392"\n"
20393"\n"
20394"#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \\\n"
20395" (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \\\n"
20396" (__v32qi)(__m256i)(B), \\\n"
20397" (char)(I))\n"
20398"\n"
20399"#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n"
20400" (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n"
20401" (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \\\n"
20402" (__v32qi)(__m256i)(S))\n"
20403"\n"
20404"#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n"
20405" (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n"
20406" U, A, B, I)\n"
20407"\n"
20408"\n"
20409"#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \\\n"
20410" (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \\\n"
20411" (__v64qi)(__m512i)(B), \\\n"
20412" (char)(I))\n"
20413"\n"
20414"#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n"
20415" (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n"
20416" (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \\\n"
20417" (__v64qi)(__m512i)(S))\n"
20418"\n"
20419"#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n"
20420" (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n"
20421" U, A, B, I)\n"
20422"\n"
20423"/* Default attributes for simple form (no masking). */\n"
20424"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"gfni\"), __min_vector_width__(128)))\n"
20425"\n"
20426"/* Default attributes for YMM unmasked form. */\n"
20427"#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__(\"avx,gfni\"), __min_vector_width__(256)))\n"
20428"\n"
20429"/* Default attributes for ZMM forms. */\n"
20430"#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,gfni\"), __min_vector_width__(512)))\n"
20431"\n"
20432"/* Default attributes for VLX forms. */\n"
20433"#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(128)))\n"
20434"#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(256)))\n"
20435"\n"
20436"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
20437"_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)\n"
20438"{\n"
20439" return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,\n"
20440" (__v16qi) __B);\n"
20441"}\n"
20442"\n"
20443"static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n"
20444"_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)\n"
20445"{\n"
20446" return (__m128i) __builtin_ia32_selectb_128(__U,\n"
20447" (__v16qi) _mm_gf2p8mul_epi8(__A, __B),\n"
20448" (__v16qi) __S);\n"
20449"}\n"
20450"\n"
20451"static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n"
20452"_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)\n"
20453"{\n"
20454" return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),\n"
20455" __U, __A, __B);\n"
20456"}\n"
20457"\n"
20458"static __inline__ __m256i __DEFAULT_FN_ATTRS_Y\n"
20459"_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)\n"
20460"{\n"
20461" return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,\n"
20462" (__v32qi) __B);\n"
20463"}\n"
20464"\n"
20465"static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n"
20466"_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)\n"
20467"{\n"
20468" return (__m256i) __builtin_ia32_selectb_256(__U,\n"
20469" (__v32qi) _mm256_gf2p8mul_epi8(__A, __B),\n"
20470" (__v32qi) __S);\n"
20471"}\n"
20472"\n"
20473"static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n"
20474"_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)\n"
20475"{\n"
20476" return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),\n"
20477" __U, __A, __B);\n"
20478"}\n"
20479"\n"
20480"static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n"
20481"_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)\n"
20482"{\n"
20483" return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,\n"
20484" (__v64qi) __B);\n"
20485"}\n"
20486"\n"
20487"static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n"
20488"_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)\n"
20489"{\n"
20490" return (__m512i) __builtin_ia32_selectb_512(__U,\n"
20491" (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),\n"
20492" (__v64qi) __S);\n"
20493"}\n"
20494"\n"
20495"static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n"
20496"_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)\n"
20497"{\n"
20498" return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),\n"
20499" __U, __A, __B);\n"
20500"}\n"
20501"\n"
20502"#undef __DEFAULT_FN_ATTRS\n"
20503"#undef __DEFAULT_FN_ATTRS_Y\n"
20504"#undef __DEFAULT_FN_ATTRS_Z\n"
20505"#undef __DEFAULT_FN_ATTRS_VL128\n"
20506"#undef __DEFAULT_FN_ATTRS_VL256\n"
20507"\n"
20508"#endif /* __GFNIINTRIN_H */\n"
20509"\n"
20510"" } ,
20511 { "/builtins/htmintrin.h" , "/*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\\\n"
20512" *\n"
20513" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20514" * of this software and associated documentation files (the \"Software\"), to deal\n"
20515" * in the Software without restriction, including without limitation the rights\n"
20516" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20517" * copies of the Software, and to permit persons to whom the Software is\n"
20518" * furnished to do so, subject to the following conditions:\n"
20519" *\n"
20520" * The above copyright notice and this permission notice shall be included in\n"
20521" * all copies or substantial portions of the Software.\n"
20522" *\n"
20523" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20524" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20525" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20526" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20527" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20528" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20529" * THE SOFTWARE.\n"
20530" *\n"
20531"\\*===----------------------------------------------------------------------===*/\n"
20532"\n"
20533"#ifndef __HTMINTRIN_H\n"
20534"#define __HTMINTRIN_H\n"
20535"\n"
20536"#ifndef __HTM__\n"
20537"#error \"HTM instruction set not enabled\"\n"
20538"#endif\n"
20539"\n"
20540"#ifdef __powerpc__\n"
20541"\n"
20542"#include <stdint.h>\n"
20543"\n"
20544"typedef uint64_t texasr_t;\n"
20545"typedef uint32_t texasru_t;\n"
20546"typedef uint32_t texasrl_t;\n"
20547"typedef uintptr_t tfiar_t;\n"
20548"typedef uintptr_t tfhar_t;\n"
20549"\n"
20550"#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3)\n"
20551"#define _HTM_NONTRANSACTIONAL 0x0\n"
20552"#define _HTM_SUSPENDED 0x1\n"
20553"#define _HTM_TRANSACTIONAL 0x2\n"
20554"\n"
20555"#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n"
20556" (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1))\n"
20557"#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n"
20558" (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))\n"
20559"\n"
20560"#define _TEXASR_FAILURE_CODE(TEXASR) \\\n"
20561" _TEXASR_EXTRACT_BITS(TEXASR, 7, 8)\n"
20562"#define _TEXASRU_FAILURE_CODE(TEXASRU) \\\n"
20563" _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8)\n"
20564"\n"
20565"#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \\\n"
20566" _TEXASR_EXTRACT_BITS(TEXASR, 7, 1)\n"
20567"#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \\\n"
20568" _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)\n"
20569"\n"
20570"#define _TEXASR_DISALLOWED(TEXASR) \\\n"
20571" _TEXASR_EXTRACT_BITS(TEXASR, 8, 1)\n"
20572"#define _TEXASRU_DISALLOWED(TEXASRU) \\\n"
20573" _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1)\n"
20574"\n"
20575"#define _TEXASR_NESTING_OVERFLOW(TEXASR) \\\n"
20576" _TEXASR_EXTRACT_BITS(TEXASR, 9, 1)\n"
20577"#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \\\n"
20578" _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1)\n"
20579"\n"
20580"#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \\\n"
20581" _TEXASR_EXTRACT_BITS(TEXASR, 10, 1)\n"
20582"#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \\\n"
20583" _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1)\n"
20584"\n"
20585"#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \\\n"
20586" _TEXASR_EXTRACT_BITS(TEXASR, 11, 1)\n"
20587"#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \\\n"
20588" _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1)\n"
20589"\n"
20590"#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \\\n"
20591" _TEXASR_EXTRACT_BITS(TEXASR, 12, 1)\n"
20592"#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \\\n"
20593" _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1)\n"
20594"\n"
20595"#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \\\n"
20596" _TEXASR_EXTRACT_BITS(TEXASR, 13, 1)\n"
20597"#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \\\n"
20598" _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1)\n"
20599"\n"
20600"#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \\\n"
20601" _TEXASR_EXTRACT_BITS(TEXASR, 14, 1)\n"
20602"#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \\\n"
20603" _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1)\n"
20604"\n"
20605"#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \\\n"
20606" _TEXASR_EXTRACT_BITS(TEXASR, 15, 1)\n"
20607"#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \\\n"
20608" _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1)\n"
20609"\n"
20610"#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \\\n"
20611" _TEXASR_EXTRACT_BITS(TEXASR, 16, 1)\n"
20612"#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \\\n"
20613" _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1)\n"
20614"\n"
20615"#define _TEXASR_ABORT(TEXASR) \\\n"
20616" _TEXASR_EXTRACT_BITS(TEXASR, 31, 1)\n"
20617"#define _TEXASRU_ABORT(TEXASRU) \\\n"
20618" _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1)\n"
20619"\n"
20620"\n"
20621"#define _TEXASR_SUSPENDED(TEXASR) \\\n"
20622" _TEXASR_EXTRACT_BITS(TEXASR, 32, 1)\n"
20623"\n"
20624"#define _TEXASR_PRIVILEGE(TEXASR) \\\n"
20625" _TEXASR_EXTRACT_BITS(TEXASR, 35, 2)\n"
20626"\n"
20627"#define _TEXASR_FAILURE_SUMMARY(TEXASR) \\\n"
20628" _TEXASR_EXTRACT_BITS(TEXASR, 36, 1)\n"
20629"\n"
20630"#define _TEXASR_TFIAR_EXACT(TEXASR) \\\n"
20631" _TEXASR_EXTRACT_BITS(TEXASR, 37, 1)\n"
20632"\n"
20633"#define _TEXASR_ROT(TEXASR) \\\n"
20634" _TEXASR_EXTRACT_BITS(TEXASR, 38, 1)\n"
20635"\n"
20636"#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \\\n"
20637" _TEXASR_EXTRACT_BITS(TEXASR, 63, 12)\n"
20638"\n"
20639"#endif /* __powerpc */\n"
20640"\n"
20641"#ifdef __s390__\n"
20642"\n"
20643"/* Condition codes generated by tbegin */\n"
20644"#define _HTM_TBEGIN_STARTED 0\n"
20645"#define _HTM_TBEGIN_INDETERMINATE 1\n"
20646"#define _HTM_TBEGIN_TRANSIENT 2\n"
20647"#define _HTM_TBEGIN_PERSISTENT 3\n"
20648"\n"
20649"/* The abort codes below this threshold are reserved for machine use. */\n"
20650"#define _HTM_FIRST_USER_ABORT_CODE 256\n"
20651"\n"
20652"/* The transaction diagnostic block is it is defined in the Principles\n"
20653" of Operation chapter 5-91. */\n"
20654"\n"
20655"struct __htm_tdb {\n"
20656" unsigned char format; /* 0 */\n"
20657" unsigned char flags;\n"
20658" unsigned char reserved1[4];\n"
20659" unsigned short nesting_depth;\n"
20660" unsigned long long abort_code; /* 8 */\n"
20661" unsigned long long conflict_token; /* 16 */\n"
20662" unsigned long long atia; /* 24 */\n"
20663" unsigned char eaid; /* 32 */\n"
20664" unsigned char dxc;\n"
20665" unsigned char reserved2[2];\n"
20666" unsigned int program_int_id;\n"
20667" unsigned long long exception_id; /* 40 */\n"
20668" unsigned long long bea; /* 48 */\n"
20669" unsigned char reserved3[72]; /* 56 */\n"
20670" unsigned long long gprs[16]; /* 128 */\n"
20671"} __attribute__((__packed__, __aligned__ (8)));\n"
20672"\n"
20673"\n"
20674"/* Helper intrinsics to retry tbegin in case of transient failure. */\n"
20675"\n"
20676"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20677"__builtin_tbegin_retry_null (int __retry)\n"
20678"{\n"
20679" int cc, i = 0;\n"
20680"\n"
20681" while ((cc = __builtin_tbegin(0)) == _HTM_TBEGIN_TRANSIENT\n"
20682" && i++ < __retry)\n"
20683" __builtin_tx_assist(i);\n"
20684"\n"
20685" return cc;\n"
20686"}\n"
20687"\n"
20688"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20689"__builtin_tbegin_retry_tdb (void *__tdb, int __retry)\n"
20690"{\n"
20691" int cc, i = 0;\n"
20692"\n"
20693" while ((cc = __builtin_tbegin(__tdb)) == _HTM_TBEGIN_TRANSIENT\n"
20694" && i++ < __retry)\n"
20695" __builtin_tx_assist(i);\n"
20696"\n"
20697" return cc;\n"
20698"}\n"
20699"\n"
20700"#define __builtin_tbegin_retry(tdb, retry) \\\n"
20701" (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n"
20702" __builtin_tbegin_retry_null(retry) : \\\n"
20703" __builtin_tbegin_retry_tdb(tdb, retry))\n"
20704"\n"
20705"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20706"__builtin_tbegin_retry_nofloat_null (int __retry)\n"
20707"{\n"
20708" int cc, i = 0;\n"
20709"\n"
20710" while ((cc = __builtin_tbegin_nofloat(0)) == _HTM_TBEGIN_TRANSIENT\n"
20711" && i++ < __retry)\n"
20712" __builtin_tx_assist(i);\n"
20713"\n"
20714" return cc;\n"
20715"}\n"
20716"\n"
20717"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20718"__builtin_tbegin_retry_nofloat_tdb (void *__tdb, int __retry)\n"
20719"{\n"
20720" int cc, i = 0;\n"
20721"\n"
20722" while ((cc = __builtin_tbegin_nofloat(__tdb)) == _HTM_TBEGIN_TRANSIENT\n"
20723" && i++ < __retry)\n"
20724" __builtin_tx_assist(i);\n"
20725"\n"
20726" return cc;\n"
20727"}\n"
20728"\n"
20729"#define __builtin_tbegin_retry_nofloat(tdb, retry) \\\n"
20730" (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n"
20731" __builtin_tbegin_retry_nofloat_null(retry) : \\\n"
20732" __builtin_tbegin_retry_nofloat_tdb(tdb, retry))\n"
20733"\n"
20734"#endif /* __s390__ */\n"
20735"\n"
20736"#endif /* __HTMINTRIN_H */\n"
20737"" } ,
20738 { "/builtins/htmxlintrin.h" , "/*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\\\n"
20739" *\n"
20740" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20741" * of this software and associated documentation files (the \"Software\"), to deal\n"
20742" * in the Software without restriction, including without limitation the rights\n"
20743" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20744" * copies of the Software, and to permit persons to whom the Software is\n"
20745" * furnished to do so, subject to the following conditions:\n"
20746" *\n"
20747" * The above copyright notice and this permission notice shall be included in\n"
20748" * all copies or substantial portions of the Software.\n"
20749" *\n"
20750" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20751" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20752" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20753" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20754" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20755" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20756" * THE SOFTWARE.\n"
20757" *\n"
20758"\\*===----------------------------------------------------------------------===*/\n"
20759"\n"
20760"#ifndef __HTMXLINTRIN_H\n"
20761"#define __HTMXLINTRIN_H\n"
20762"\n"
20763"#ifndef __HTM__\n"
20764"#error \"HTM instruction set not enabled\"\n"
20765"#endif\n"
20766"\n"
20767"#include <htmintrin.h>\n"
20768"\n"
20769"#ifdef __powerpc__\n"
20770"\n"
20771"#ifdef __cplusplus\n"
20772"extern \"C\" {\n"
20773"#endif\n"
20774"\n"
20775"#define _TEXASR_PTR(TM_BUF) ((texasr_t *)((char *)(TM_BUF) + 0))\n"
20776"#define _TEXASRU_PTR(TM_BUF) ((texasru_t *)((char *)(TM_BUF) + 0))\n"
20777"#define _TEXASRL_PTR(TM_BUF) ((texasrl_t *)((char *)(TM_BUF) + 4))\n"
20778"#define _TFIAR_PTR(TM_BUF) ((tfiar_t *)((char *)(TM_BUF) + 8))\n"
20779"\n"
20780"typedef char TM_buff_type[16];\n"
20781"\n"
20782"/* This macro can be used to determine whether a transaction was successfully\n"
20783" started from the __TM_begin() and __TM_simple_begin() intrinsic functions\n"
20784" below. */\n"
20785"#define _HTM_TBEGIN_STARTED 1\n"
20786"\n"
20787"extern __inline long\n"
20788"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20789"__TM_simple_begin (void)\n"
20790"{\n"
20791" if (__builtin_expect (__builtin_tbegin (0), 1))\n"
20792" return _HTM_TBEGIN_STARTED;\n"
20793" return 0;\n"
20794"}\n"
20795"\n"
20796"extern __inline long\n"
20797"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20798"__TM_begin (void* const __TM_buff)\n"
20799"{\n"
20800" *_TEXASRL_PTR (__TM_buff) = 0;\n"
20801" if (__builtin_expect (__builtin_tbegin (0), 1))\n"
20802" return _HTM_TBEGIN_STARTED;\n"
20803"#ifdef __powerpc64__\n"
20804" *_TEXASR_PTR (__TM_buff) = __builtin_get_texasr ();\n"
20805"#else\n"
20806" *_TEXASRU_PTR (__TM_buff) = __builtin_get_texasru ();\n"
20807" *_TEXASRL_PTR (__TM_buff) = __builtin_get_texasr ();\n"
20808"#endif\n"
20809" *_TFIAR_PTR (__TM_buff) = __builtin_get_tfiar ();\n"
20810" return 0;\n"
20811"}\n"
20812"\n"
20813"extern __inline long\n"
20814"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20815"__TM_end (void)\n"
20816"{\n"
20817" if (__builtin_expect (__builtin_tend (0), 1))\n"
20818" return 1;\n"
20819" return 0;\n"
20820"}\n"
20821"\n"
20822"extern __inline void\n"
20823"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20824"__TM_abort (void)\n"
20825"{\n"
20826" __builtin_tabort (0);\n"
20827"}\n"
20828"\n"
20829"extern __inline void\n"
20830"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20831"__TM_named_abort (unsigned char const __code)\n"
20832"{\n"
20833" __builtin_tabort (__code);\n"
20834"}\n"
20835"\n"
20836"extern __inline void\n"
20837"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20838"__TM_resume (void)\n"
20839"{\n"
20840" __builtin_tresume ();\n"
20841"}\n"
20842"\n"
20843"extern __inline void\n"
20844"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20845"__TM_suspend (void)\n"
20846"{\n"
20847" __builtin_tsuspend ();\n"
20848"}\n"
20849"\n"
20850"extern __inline long\n"
20851"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20852"__TM_is_user_abort (void* const __TM_buff)\n"
20853"{\n"
20854" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20855" return _TEXASRU_ABORT (texasru);\n"
20856"}\n"
20857"\n"
20858"extern __inline long\n"
20859"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20860"__TM_is_named_user_abort (void* const __TM_buff, unsigned char *__code)\n"
20861"{\n"
20862" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20863"\n"
20864" *__code = _TEXASRU_FAILURE_CODE (texasru);\n"
20865" return _TEXASRU_ABORT (texasru);\n"
20866"}\n"
20867"\n"
20868"extern __inline long\n"
20869"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20870"__TM_is_illegal (void* const __TM_buff)\n"
20871"{\n"
20872" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20873" return _TEXASRU_DISALLOWED (texasru);\n"
20874"}\n"
20875"\n"
20876"extern __inline long\n"
20877"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20878"__TM_is_footprint_exceeded (void* const __TM_buff)\n"
20879"{\n"
20880" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20881" return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);\n"
20882"}\n"
20883"\n"
20884"extern __inline long\n"
20885"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20886"__TM_nesting_depth (void* const __TM_buff)\n"
20887"{\n"
20888" texasrl_t texasrl;\n"
20889"\n"
20890" if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)\n"
20891" {\n"
20892" texasrl = *_TEXASRL_PTR (__TM_buff);\n"
20893" if (!_TEXASR_FAILURE_SUMMARY (texasrl))\n"
20894" texasrl = 0;\n"
20895" }\n"
20896" else\n"
20897" texasrl = (texasrl_t) __builtin_get_texasr ();\n"
20898"\n"
20899" return _TEXASR_TRANSACTION_LEVEL (texasrl);\n"
20900"}\n"
20901"\n"
20902"extern __inline long\n"
20903"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20904"__TM_is_nested_too_deep(void* const __TM_buff)\n"
20905"{\n"
20906" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20907" return _TEXASRU_NESTING_OVERFLOW (texasru);\n"
20908"}\n"
20909"\n"
20910"extern __inline long\n"
20911"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20912"__TM_is_conflict(void* const __TM_buff)\n"
20913"{\n"
20914" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20915" /* Return TEXASR bits 11 (Self-Induced Conflict) through\n"
20916" 14 (Translation Invalidation Conflict). */\n"
20917" return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0;\n"
20918"}\n"
20919"\n"
20920"extern __inline long\n"
20921"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20922"__TM_is_failure_persistent(void* const __TM_buff)\n"
20923"{\n"
20924" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20925" return _TEXASRU_FAILURE_PERSISTENT (texasru);\n"
20926"}\n"
20927"\n"
20928"extern __inline long\n"
20929"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20930"__TM_failure_address(void* const __TM_buff)\n"
20931"{\n"
20932" return *_TFIAR_PTR (__TM_buff);\n"
20933"}\n"
20934"\n"
20935"extern __inline long long\n"
20936"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20937"__TM_failure_code(void* const __TM_buff)\n"
20938"{\n"
20939" return *_TEXASR_PTR (__TM_buff);\n"
20940"}\n"
20941"\n"
20942"#ifdef __cplusplus\n"
20943"}\n"
20944"#endif\n"
20945"\n"
20946"#endif /* __powerpc__ */\n"
20947"\n"
20948"#ifdef __s390__\n"
20949"\n"
20950"#include <stdint.h>\n"
20951"\n"
20952"/* These intrinsics are being made available for compatibility with\n"
20953" the IBM XL compiler. For documentation please see the \"z/OS XL\n"
20954" C/C++ Programming Guide\" publicly available on the web. */\n"
20955"\n"
20956"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20957"__TM_simple_begin ()\n"
20958"{\n"
20959" return __builtin_tbegin_nofloat (0);\n"
20960"}\n"
20961"\n"
20962"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20963"__TM_begin (void* const __tdb)\n"
20964"{\n"
20965" return __builtin_tbegin_nofloat (__tdb);\n"
20966"}\n"
20967"\n"
20968"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20969"__TM_end ()\n"
20970"{\n"
20971" return __builtin_tend ();\n"
20972"}\n"
20973"\n"
20974"static __inline void __attribute__((__always_inline__))\n"
20975"__TM_abort ()\n"
20976"{\n"
20977" return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE);\n"
20978"}\n"
20979"\n"
20980"static __inline void __attribute__((__always_inline__, __nodebug__))\n"
20981"__TM_named_abort (unsigned char const __code)\n"
20982"{\n"
20983" return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + __code);\n"
20984"}\n"
20985"\n"
20986"static __inline void __attribute__((__always_inline__, __nodebug__))\n"
20987"__TM_non_transactional_store (void* const __addr, long long const __value)\n"
20988"{\n"
20989" __builtin_non_tx_store ((uint64_t*)__addr, (uint64_t)__value);\n"
20990"}\n"
20991"\n"
20992"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20993"__TM_nesting_depth (void* const __tdb_ptr)\n"
20994"{\n"
20995" int depth = __builtin_tx_nesting_depth ();\n"
20996" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20997"\n"
20998" if (depth != 0)\n"
20999" return depth;\n"
21000"\n"
21001" if (tdb->format != 1)\n"
21002" return 0;\n"
21003" return tdb->nesting_depth;\n"
21004"}\n"
21005"\n"
21006"/* Transaction failure diagnostics */\n"
21007"\n"
21008"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21009"__TM_is_user_abort (void* const __tdb_ptr)\n"
21010"{\n"
21011" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21012"\n"
21013" if (tdb->format != 1)\n"
21014" return 0;\n"
21015"\n"
21016" return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);\n"
21017"}\n"
21018"\n"
21019"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21020"__TM_is_named_user_abort (void* const __tdb_ptr, unsigned char* __code)\n"
21021"{\n"
21022" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21023"\n"
21024" if (tdb->format != 1)\n"
21025" return 0;\n"
21026"\n"
21027" if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)\n"
21028" {\n"
21029" *__code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;\n"
21030" return 1;\n"
21031" }\n"
21032" return 0;\n"
21033"}\n"
21034"\n"
21035"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21036"__TM_is_illegal (void* const __tdb_ptr)\n"
21037"{\n"
21038" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21039"\n"
21040" return (tdb->format == 1\n"
21041" && (tdb->abort_code == 4 /* unfiltered program interruption */\n"
21042" || tdb->abort_code == 11 /* restricted instruction */));\n"
21043"}\n"
21044"\n"
21045"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21046"__TM_is_footprint_exceeded (void* const __tdb_ptr)\n"
21047"{\n"
21048" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21049"\n"
21050" return (tdb->format == 1\n"
21051" && (tdb->abort_code == 7 /* fetch overflow */\n"
21052" || tdb->abort_code == 8 /* store overflow */));\n"
21053"}\n"
21054"\n"
21055"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21056"__TM_is_nested_too_deep (void* const __tdb_ptr)\n"
21057"{\n"
21058" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21059"\n"
21060" return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */\n"
21061"}\n"
21062"\n"
21063"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21064"__TM_is_conflict (void* const __tdb_ptr)\n"
21065"{\n"
21066" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21067"\n"
21068" return (tdb->format == 1\n"
21069" && (tdb->abort_code == 9 /* fetch conflict */\n"
21070" || tdb->abort_code == 10 /* store conflict */));\n"
21071"}\n"
21072"\n"
21073"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21074"__TM_is_failure_persistent (long const __result)\n"
21075"{\n"
21076" return __result == _HTM_TBEGIN_PERSISTENT;\n"
21077"}\n"
21078"\n"
21079"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21080"__TM_failure_address (void* const __tdb_ptr)\n"
21081"{\n"
21082" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21083" return tdb->atia;\n"
21084"}\n"
21085"\n"
21086"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21087"__TM_failure_code (void* const __tdb_ptr)\n"
21088"{\n"
21089" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21090"\n"
21091" return tdb->abort_code;\n"
21092"}\n"
21093"\n"
21094"#endif /* __s390__ */\n"
21095"\n"
21096"#endif /* __HTMXLINTRIN_H */\n"
21097"" } ,
21098 { "/builtins/ia32intrin.h" , "/* ===-------- ia32intrin.h ---------------------------------------------------===\n"
21099" *\n"
21100" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
21101" * of this software and associated documentation files (the \"Software\"), to deal\n"
21102" * in the Software without restriction, including without limitation the rights\n"
21103" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
21104" * copies of the Software, and to permit persons to whom the Software is\n"
21105" * furnished to do so, subject to the following conditions:\n"
21106" *\n"
21107" * The above copyright notice and this permission notice shall be included in\n"
21108" * all copies or substantial portions of the Software.\n"
21109" *\n"
21110" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
21111" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
21112" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
21113" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
21114" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
21115" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
21116" * THE SOFTWARE.\n"
21117" *\n"
21118" *===-----------------------------------------------------------------------===\n"
21119" */\n"
21120"\n"
21121"#ifndef __X86INTRIN_H\n"
21122"#error \"Never use <ia32intrin.h> directly; include <x86intrin.h> instead.\"\n"
21123"#endif\n"
21124"\n"
21125"#ifndef __IA32INTRIN_H\n"
21126"#define __IA32INTRIN_H\n"
21127"\n"
21128"#ifdef __x86_64__\n"
21129"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n"
21130"__readeflags(void)\n"
21131"{\n"
21132" return __builtin_ia32_readeflags_u64();\n"
21133"}\n"
21134"\n"
21135"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
21136"__writeeflags(unsigned long long __f)\n"
21137"{\n"
21138" __builtin_ia32_writeeflags_u64(__f);\n"
21139"}\n"
21140"\n"
21141"#else /* !__x86_64__ */\n"
21142"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))\n"
21143"__readeflags(void)\n"
21144"{\n"
21145" return __builtin_ia32_readeflags_u32();\n"
21146"}\n"
21147"\n"
21148"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
21149"__writeeflags(unsigned int __f)\n"
21150"{\n"
21151" __builtin_ia32_writeeflags_u32(__f);\n"
21152"}\n"
21153"#endif /* !__x86_64__ */\n"
21154"\n"
21155"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n"
21156"__rdpmc(int __A) {\n"
21157" return __builtin_ia32_rdpmc(__A);\n"
21158"}\n"
21159"\n"
21160"/* __rdtscp */\n"
21161"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n"
21162"__rdtscp(unsigned int *__A) {\n"
21163" return __builtin_ia32_rdtscp(__A);\n"
21164"}\n"
21165"\n"
21166"#define _rdtsc() __rdtsc()\n"
21167"\n"
21168"#define _rdpmc(A) __rdpmc(A)\n"
21169"\n"
21170"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
21171"_wbinvd(void) {\n"
21172" __builtin_ia32_wbinvd();\n"
21173"}\n"
21174"\n"
21175"#endif /* __IA32INTRIN_H */\n"
21176"" } ,
21177 { "/builtins/immintrin.h" , "/*===---- immintrin.h - Intel intrinsics -----------------------------------===\n"
21178" *\n"
21179" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
21180" * of this software and associated documentation files (the \"Software\"), to deal\n"
21181" * in the Software without restriction, including without limitation the rights\n"
21182" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
21183" * copies of the Software, and to permit persons to whom the Software is\n"
21184" * furnished to do so, subject to the following conditions:\n"
21185" *\n"
21186" * The above copyright notice and this permission notice shall be included in\n"
21187" * all copies or substantial portions of the Software.\n"
21188" *\n"
21189" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
21190" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
21191" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
21192" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
21193" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
21194" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
21195" * THE SOFTWARE.\n"
21196" *\n"
21197" *===-----------------------------------------------------------------------===\n"
21198" */\n"
21199"\n"
21200"#ifndef __IMMINTRIN_H\n"
21201"#define __IMMINTRIN_H\n"
21202"\n"
21203"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)\n"
21204"#include <mmintrin.h>\n"
21205"#endif\n"
21206"\n"
21207"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)\n"
21208"#include <xmmintrin.h>\n"
21209"#endif\n"
21210"\n"
21211"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)\n"
21212"#include <emmintrin.h>\n"
21213"#endif\n"
21214"\n"
21215"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)\n"
21216"#include <pmmintrin.h>\n"
21217"#endif\n"
21218"\n"
21219"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)\n"
21220"#include <tmmintrin.h>\n"
21221"#endif\n"
21222"\n"
21223"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21224" (defined(__SSE4_2__) || defined(__SSE4_1__))\n"
21225"#include <smmintrin.h>\n"
21226"#endif\n"
21227"\n"
21228"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21229" (defined(__AES__) || defined(__PCLMUL__))\n"
21230"#include <wmmintrin.h>\n"
21231"#endif\n"
21232"\n"
21233"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)\n"
21234"#include <clflushoptintrin.h>\n"
21235"#endif\n"
21236"\n"
21237"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)\n"
21238"#include <clwbintrin.h>\n"
21239"#endif\n"
21240"\n"
21241"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)\n"
21242"#include <avxintrin.h>\n"
21243"#endif\n"
21244"\n"
21245"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)\n"
21246"#include <avx2intrin.h>\n"
21247"#endif\n"
21248"\n"
21249"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)\n"
21250"#include <f16cintrin.h>\n"
21251"#endif\n"
21252"\n"
21253"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)\n"
21254"#include <vpclmulqdqintrin.h>\n"
21255"#endif\n"
21256"\n"
21257"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)\n"
21258"#include <bmiintrin.h>\n"
21259"#endif\n"
21260"\n"
21261"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)\n"
21262"#include <bmi2intrin.h>\n"
21263"#endif\n"
21264"\n"
21265"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)\n"
21266"#include <lzcntintrin.h>\n"
21267"#endif\n"
21268"\n"
21269"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)\n"
21270"#include <popcntintrin.h>\n"
21271"#endif\n"
21272"\n"
21273"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)\n"
21274"#include <fmaintrin.h>\n"
21275"#endif\n"
21276"\n"
21277"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)\n"
21278"#include <avx512fintrin.h>\n"
21279"#endif\n"
21280"\n"
21281"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)\n"
21282"#include <avx512vlintrin.h>\n"
21283"#endif\n"
21284"\n"
21285"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)\n"
21286"#include <avx512bwintrin.h>\n"
21287"#endif\n"
21288"\n"
21289"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)\n"
21290"#include <avx512bitalgintrin.h>\n"
21291"#endif\n"
21292"\n"
21293"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)\n"
21294"#include <avx512cdintrin.h>\n"
21295"#endif\n"
21296"\n"
21297"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)\n"
21298"#include <avx512vpopcntdqintrin.h>\n"
21299"#endif\n"
21300"\n"
21301"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21302" (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))\n"
21303"#include <avx512vpopcntdqvlintrin.h>\n"
21304"#endif\n"
21305"\n"
21306"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)\n"
21307"#include <avx512vnniintrin.h>\n"
21308"#endif\n"
21309"\n"
21310"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21311" (defined(__AVX512VL__) && defined(__AVX512VNNI__))\n"
21312"#include <avx512vlvnniintrin.h>\n"
21313"#endif\n"
21314"\n"
21315"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)\n"
21316"#include <avx512dqintrin.h>\n"
21317"#endif\n"
21318"\n"
21319"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21320" (defined(__AVX512VL__) && defined(__AVX512BITALG__))\n"
21321"#include <avx512vlbitalgintrin.h>\n"
21322"#endif\n"
21323"\n"
21324"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21325" (defined(__AVX512VL__) && defined(__AVX512BW__))\n"
21326"#include <avx512vlbwintrin.h>\n"
21327"#endif\n"
21328"\n"
21329"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21330" (defined(__AVX512VL__) && defined(__AVX512CD__))\n"
21331"#include <avx512vlcdintrin.h>\n"
21332"#endif\n"
21333"\n"
21334"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21335" (defined(__AVX512VL__) && defined(__AVX512DQ__))\n"
21336"#include <avx512vldqintrin.h>\n"
21337"#endif\n"
21338"\n"
21339"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)\n"
21340"#include <avx512erintrin.h>\n"
21341"#endif\n"
21342"\n"
21343"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)\n"
21344"#include <avx512ifmaintrin.h>\n"
21345"#endif\n"
21346"\n"
21347"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21348" (defined(__AVX512IFMA__) && defined(__AVX512VL__))\n"
21349"#include <avx512ifmavlintrin.h>\n"
21350"#endif\n"
21351"\n"
21352"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)\n"
21353"#include <avx512vbmiintrin.h>\n"
21354"#endif\n"
21355"\n"
21356"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21357" (defined(__AVX512VBMI__) && defined(__AVX512VL__))\n"
21358"#include <avx512vbmivlintrin.h>\n"
21359"#endif\n"
21360"\n"
21361"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)\n"
21362"#include <avx512vbmi2intrin.h>\n"
21363"#endif\n"
21364"\n"
21365"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21366" (defined(__AVX512VBMI2__) && defined(__AVX512VL__))\n"
21367"#include <avx512vlvbmi2intrin.h>\n"
21368"#endif\n"
21369"\n"
21370"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)\n"
21371"#include <avx512pfintrin.h>\n"
21372"#endif\n"
21373"\n"
21374"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)\n"
21375"#include <pkuintrin.h>\n"
21376"#endif\n"
21377"\n"
21378"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)\n"
21379"#include <vaesintrin.h>\n"
21380"#endif\n"
21381"\n"
21382"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)\n"
21383"#include <gfniintrin.h>\n"
21384"#endif\n"
21385"\n"
21386"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)\n"
21387"/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).\n"
21388"///\n"
21389"/// \\headerfile <immintrin.h>\n"
21390"///\n"
21391"/// This intrinsic corresponds to the <c> RDPID </c> instruction.\n"
21392"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"rdpid\")))\n"
21393"_rdpid_u32(void) {\n"
21394" return __builtin_ia32_rdpid();\n"
21395"}\n"
21396"#endif // __RDPID__\n"
21397"\n"
21398"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)\n"
21399"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n"
21400"_rdrand16_step(unsigned short *__p)\n"
21401"{\n"
21402" return __builtin_ia32_rdrand16_step(__p);\n"
21403"}\n"
21404"\n"
21405"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n"
21406"_rdrand32_step(unsigned int *__p)\n"
21407"{\n"
21408" return __builtin_ia32_rdrand32_step(__p);\n"
21409"}\n"
21410"\n"
21411"#ifdef __x86_64__\n"
21412"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n"
21413"_rdrand64_step(unsigned long long *__p)\n"
21414"{\n"
21415" return __builtin_ia32_rdrand64_step(__p);\n"
21416"}\n"
21417"#endif\n"
21418"#endif /* __RDRND__ */\n"
21419"\n"
21420"/* __bit_scan_forward */\n"
21421"static __inline__ int __attribute__((__always_inline__, __nodebug__))\n"
21422"_bit_scan_forward(int __A) {\n"
21423" return __builtin_ctz(__A);\n"
21424"}\n"
21425"\n"
21426"/* __bit_scan_reverse */\n"
21427"static __inline__ int __attribute__((__always_inline__, __nodebug__))\n"
21428"_bit_scan_reverse(int __A) {\n"
21429" return 31 - __builtin_clz(__A);\n"
21430"}\n"
21431"\n"
21432"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)\n"
21433"#ifdef __x86_64__\n"
21434"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21435"_readfsbase_u32(void)\n"
21436"{\n"
21437" return __builtin_ia32_rdfsbase32();\n"
21438"}\n"
21439"\n"
21440"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21441"_readfsbase_u64(void)\n"
21442"{\n"
21443" return __builtin_ia32_rdfsbase64();\n"
21444"}\n"
21445"\n"
21446"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21447"_readgsbase_u32(void)\n"
21448"{\n"
21449" return __builtin_ia32_rdgsbase32();\n"
21450"}\n"
21451"\n"
21452"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21453"_readgsbase_u64(void)\n"
21454"{\n"
21455" return __builtin_ia32_rdgsbase64();\n"
21456"}\n"
21457"\n"
21458"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21459"_writefsbase_u32(unsigned int __V)\n"
21460"{\n"
21461" __builtin_ia32_wrfsbase32(__V);\n"
21462"}\n"
21463"\n"
21464"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21465"_writefsbase_u64(unsigned long long __V)\n"
21466"{\n"
21467" __builtin_ia32_wrfsbase64(__V);\n"
21468"}\n"
21469"\n"
21470"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21471"_writegsbase_u32(unsigned int __V)\n"
21472"{\n"
21473" __builtin_ia32_wrgsbase32(__V);\n"
21474"}\n"
21475"\n"
21476"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21477"_writegsbase_u64(unsigned long long __V)\n"
21478"{\n"
21479" __builtin_ia32_wrgsbase64(__V);\n"
21480"}\n"
21481"\n"
21482"#endif\n"
21483"#endif /* __FSGSBASE__ */\n"
21484"\n"
21485"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)\n"
21486"\n"
21487"/* The structs used below are to force the load/store to be unaligned. This\n"
21488" * is accomplished with the __packed__ attribute. The __may_alias__ prevents\n"
21489" * tbaa metadata from being generated based on the struct and the type of the\n"
21490" * field inside of it.\n"
21491" */\n"
21492"\n"
21493"static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21494"_loadbe_i16(void const * __P) {\n"
21495" struct __loadu_i16 {\n"
21496" short __v;\n"
21497" } __attribute__((__packed__, __may_alias__));\n"
21498" return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);\n"
21499"}\n"
21500"\n"
21501"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21502"_storebe_i16(void * __P, short __D) {\n"
21503" struct __storeu_i16 {\n"
21504" short __v;\n"
21505" } __attribute__((__packed__, __may_alias__));\n"
21506" ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);\n"
21507"}\n"
21508"\n"
21509"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21510"_loadbe_i32(void const * __P) {\n"
21511" struct __loadu_i32 {\n"
21512" int __v;\n"
21513" } __attribute__((__packed__, __may_alias__));\n"
21514" return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);\n"
21515"}\n"
21516"\n"
21517"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21518"_storebe_i32(void * __P, int __D) {\n"
21519" struct __storeu_i32 {\n"
21520" int __v;\n"
21521" } __attribute__((__packed__, __may_alias__));\n"
21522" ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);\n"
21523"}\n"
21524"\n"
21525"#ifdef __x86_64__\n"
21526"static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21527"_loadbe_i64(void const * __P) {\n"
21528" struct __loadu_i64 {\n"
21529" long long __v;\n"
21530" } __attribute__((__packed__, __may_alias__));\n"
21531" return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);\n"
21532"}\n"
21533"\n"
21534"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21535"_storebe_i64(void * __P, long long __D) {\n"
21536" struct __storeu_i64 {\n"
21537" long long __v;\n"
21538" } __attribute__((__packed__, __may_alias__));\n"
21539" ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);\n"
21540"}\n"
21541"#endif\n"
21542"#endif /* __MOVBE */\n"
21543"\n"
21544"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)\n"
21545"#include <rtmintrin.h>\n"
21546"#include <xtestintrin.h>\n"
21547"#endif\n"
21548"\n"
21549"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)\n"
21550"#include <shaintrin.h>\n"
21551"#endif\n"
21552"\n"
21553"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)\n"
21554"#include <fxsrintrin.h>\n"
21555"#endif\n"
21556"\n"
21557"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)\n"
21558"#include <xsaveintrin.h>\n"
21559"#endif\n"
21560"\n"
21561"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)\n"
21562"#include <xsaveoptintrin.h>\n"
21563"#endif\n"
21564"\n"
21565"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)\n"
21566"#include <xsavecintrin.h>\n"
21567"#endif\n"
21568"\n"
21569"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)\n"
21570"#include <xsavesintrin.h>\n"
21571"#endif\n"
21572"\n"
21573"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)\n"
21574"#include <cetintrin.h>\n"
21575"#endif\n"
21576"\n"
21577"/* Some intrinsics inside adxintrin.h are available only on processors with ADX,\n"
21578" * whereas others are also available at all times. */\n"
21579"#include <adxintrin.h>\n"
21580"\n"
21581"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)\n"
21582"#include <rdseedintrin.h>\n"
21583"#endif\n"
21584"\n"
21585"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)\n"
21586"#include <wbnoinvdintrin.h>\n"
21587"#endif\n"
21588"\n"
21589"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)\n"
21590"#include <cldemoteintrin.h>\n"
21591"#endif\n"
21592"\n"
21593"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)\n"
21594"#include <waitpkgintrin.h>\n"
21595"#endif\n"
21596"\n"
21597"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21598" defined(__MOVDIRI__) || defined(__MOVDIR64B__)\n"
21599"#include <movdirintrin.h>\n"
21600"#endif\n"
21601"\n"
21602"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)\n"
21603"#include <pconfigintrin.h>\n"
21604"#endif\n"
21605"\n"
21606"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)\n"
21607"#include <sgxintrin.h>\n"
21608"#endif\n"
21609"\n"
21610"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)\n"
21611"#include <ptwriteintrin.h>\n"
21612"#endif\n"
21613"\n"
21614"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)\n"
21615"#include <invpcidintrin.h>\n"
21616"#endif\n"
21617"\n"
21618"#ifdef _MSC_VER\n"
21619"/* Define the default attributes for these intrinsics */\n"
21620"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
21621"#ifdef __cplusplus\n"
21622"extern \"C\" {\n"
21623"#endif\n"
21624"/*----------------------------------------------------------------------------*\\\n"
21625"|* Interlocked Exchange HLE\n"
21626"\\*----------------------------------------------------------------------------*/\n"
21627"#if defined(__i386__) || defined(__x86_64__)\n"
21628"static __inline__ long __DEFAULT_FN_ATTRS\n"
21629"_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {\n"
21630" __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n"
21631" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21632" return _Value;\n"
21633"}\n"
21634"static __inline__ long __DEFAULT_FN_ATTRS\n"
21635"_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {\n"
21636" __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n"
21637" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21638" return _Value;\n"
21639"}\n"
21640"#endif\n"
21641"#if defined(__x86_64__)\n"
21642"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21643"_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {\n"
21644" __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n"
21645" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21646" return _Value;\n"
21647"}\n"
21648"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21649"_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {\n"
21650" __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n"
21651" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21652" return _Value;\n"
21653"}\n"
21654"#endif\n"
21655"/*----------------------------------------------------------------------------*\\\n"
21656"|* Interlocked Compare Exchange HLE\n"
21657"\\*----------------------------------------------------------------------------*/\n"
21658"#if defined(__i386__) || defined(__x86_64__)\n"
21659"static __inline__ long __DEFAULT_FN_ATTRS\n"
21660"_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,\n"
21661" long _Exchange, long _Comparand) {\n"
21662" __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n"
21663" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21664" : \"r\" (_Exchange) : \"memory\");\n"
21665" return _Comparand;\n"
21666"}\n"
21667"static __inline__ long __DEFAULT_FN_ATTRS\n"
21668"_InterlockedCompareExchange_HLERelease(long volatile *_Destination,\n"
21669" long _Exchange, long _Comparand) {\n"
21670" __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n"
21671" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21672" : \"r\" (_Exchange) : \"memory\");\n"
21673" return _Comparand;\n"
21674"}\n"
21675"#endif\n"
21676"#if defined(__x86_64__)\n"
21677"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21678"_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,\n"
21679" __int64 _Exchange, __int64 _Comparand) {\n"
21680" __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n"
21681" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21682" : \"r\" (_Exchange) : \"memory\");\n"
21683" return _Comparand;\n"
21684"}\n"
21685"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21686"_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,\n"
21687" __int64 _Exchange, __int64 _Comparand) {\n"
21688" __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n"
21689" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21690" : \"r\" (_Exchange) : \"memory\");\n"
21691" return _Comparand;\n"
21692"}\n"
21693"#endif\n"
21694"#ifdef __cplusplus\n"
21695"}\n"
21696"#endif\n"
21697"\n"
21698"#undef __DEFAULT_FN_ATTRS\n"
21699"\n"
21700"#endif /* _MSC_VER */\n"
21701"\n"
21702"#endif /* __IMMINTRIN_H */\n"
21703"" } ,
21704 { "/builtins/intrin.h" , "/* ===-------- intrin.h ---------------------------------------------------===\n"
21705" *\n"
21706" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
21707" * of this software and associated documentation files (the \"Software\"), to deal\n"
21708" * in the Software without restriction, including without limitation the rights\n"
21709" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
21710" * copies of the Software, and to permit persons to whom the Software is\n"
21711" * furnished to do so, subject to the following conditions:\n"
21712" *\n"
21713" * The above copyright notice and this permission notice shall be included in\n"
21714" * all copies or substantial portions of the Software.\n"
21715" *\n"
21716" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
21717" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
21718" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
21719" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
21720" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
21721" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
21722" * THE SOFTWARE.\n"
21723" *\n"
21724" *===-----------------------------------------------------------------------===\n"
21725" */\n"
21726"\n"
21727"/* Only include this if we're compiling for the windows platform. */\n"
21728"#ifndef _MSC_VER\n"
21729"#include_next <intrin.h>\n"
21730"#else\n"
21731"\n"
21732"#ifndef __INTRIN_H\n"
21733"#define __INTRIN_H\n"
21734"\n"
21735"/* First include the standard intrinsics. */\n"
21736"#if defined(__i386__) || defined(__x86_64__)\n"
21737"#include <x86intrin.h>\n"
21738"#endif\n"
21739"\n"
21740"#if defined(__arm__)\n"
21741"#include <armintr.h>\n"
21742"#endif\n"
21743"\n"
21744"#if defined(__aarch64__)\n"
21745"#include <arm64intr.h>\n"
21746"#endif\n"
21747"\n"
21748"/* For the definition of jmp_buf. */\n"
21749"#if __STDC_HOSTED__\n"
21750"#include <setjmp.h>\n"
21751"#endif\n"
21752"\n"
21753"/* Define the default attributes for the functions in this file. */\n"
21754"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
21755"\n"
21756"#ifdef __cplusplus\n"
21757"extern \"C\" {\n"
21758"#endif\n"
21759"\n"
21760"#if defined(__MMX__)\n"
21761"/* And the random ones that aren't in those files. */\n"
21762"__m64 _m_from_float(float);\n"
21763"float _m_to_float(__m64);\n"
21764"#endif\n"
21765"\n"
21766"/* Other assorted instruction intrinsics. */\n"
21767"void __addfsbyte(unsigned long, unsigned char);\n"
21768"void __addfsdword(unsigned long, unsigned long);\n"
21769"void __addfsword(unsigned long, unsigned short);\n"
21770"void __code_seg(const char *);\n"
21771"static __inline__\n"
21772"void __cpuid(int[4], int);\n"
21773"static __inline__\n"
21774"void __cpuidex(int[4], int, int);\n"
21775"static __inline__\n"
21776"__int64 __emul(int, int);\n"
21777"static __inline__\n"
21778"unsigned __int64 __emulu(unsigned int, unsigned int);\n"
21779"unsigned int __getcallerseflags(void);\n"
21780"static __inline__\n"
21781"void __halt(void);\n"
21782"unsigned char __inbyte(unsigned short);\n"
21783"void __inbytestring(unsigned short, unsigned char *, unsigned long);\n"
21784"void __incfsbyte(unsigned long);\n"
21785"void __incfsdword(unsigned long);\n"
21786"void __incfsword(unsigned long);\n"
21787"unsigned long __indword(unsigned short);\n"
21788"void __indwordstring(unsigned short, unsigned long *, unsigned long);\n"
21789"void __int2c(void);\n"
21790"void __invlpg(void *);\n"
21791"unsigned short __inword(unsigned short);\n"
21792"void __inwordstring(unsigned short, unsigned short *, unsigned long);\n"
21793"void __lidt(void *);\n"
21794"unsigned __int64 __ll_lshift(unsigned __int64, int);\n"
21795"__int64 __ll_rshift(__int64, int);\n"
21796"static __inline__\n"
21797"void __movsb(unsigned char *, unsigned char const *, size_t);\n"
21798"static __inline__\n"
21799"void __movsd(unsigned long *, unsigned long const *, size_t);\n"
21800"static __inline__\n"
21801"void __movsw(unsigned short *, unsigned short const *, size_t);\n"
21802"static __inline__\n"
21803"void __nop(void);\n"
21804"void __nvreg_restore_fence(void);\n"
21805"void __nvreg_save_fence(void);\n"
21806"void __outbyte(unsigned short, unsigned char);\n"
21807"void __outbytestring(unsigned short, unsigned char *, unsigned long);\n"
21808"void __outdword(unsigned short, unsigned long);\n"
21809"void __outdwordstring(unsigned short, unsigned long *, unsigned long);\n"
21810"void __outword(unsigned short, unsigned short);\n"
21811"void __outwordstring(unsigned short, unsigned short *, unsigned long);\n"
21812"unsigned long __readcr0(void);\n"
21813"unsigned long __readcr2(void);\n"
21814"static __inline__\n"
21815"unsigned long __readcr3(void);\n"
21816"unsigned long __readcr4(void);\n"
21817"unsigned long __readcr8(void);\n"
21818"unsigned int __readdr(unsigned int);\n"
21819"#ifdef __i386__\n"
21820"static __inline__\n"
21821"unsigned char __readfsbyte(unsigned long);\n"
21822"static __inline__\n"
21823"unsigned __int64 __readfsqword(unsigned long);\n"
21824"static __inline__\n"
21825"unsigned short __readfsword(unsigned long);\n"
21826"#endif\n"
21827"static __inline__\n"
21828"unsigned __int64 __readmsr(unsigned long);\n"
21829"unsigned __int64 __readpmc(unsigned long);\n"
21830"unsigned long __segmentlimit(unsigned long);\n"
21831"void __sidt(void *);\n"
21832"static __inline__\n"
21833"void __stosb(unsigned char *, unsigned char, size_t);\n"
21834"static __inline__\n"
21835"void __stosd(unsigned long *, unsigned long, size_t);\n"
21836"static __inline__\n"
21837"void __stosw(unsigned short *, unsigned short, size_t);\n"
21838"void __svm_clgi(void);\n"
21839"void __svm_invlpga(void *, int);\n"
21840"void __svm_skinit(int);\n"
21841"void __svm_stgi(void);\n"
21842"void __svm_vmload(size_t);\n"
21843"void __svm_vmrun(size_t);\n"
21844"void __svm_vmsave(size_t);\n"
21845"void __ud2(void);\n"
21846"unsigned __int64 __ull_rshift(unsigned __int64, int);\n"
21847"void __vmx_off(void);\n"
21848"void __vmx_vmptrst(unsigned __int64 *);\n"
21849"void __wbinvd(void);\n"
21850"void __writecr0(unsigned int);\n"
21851"static __inline__\n"
21852"void __writecr3(unsigned int);\n"
21853"void __writecr4(unsigned int);\n"
21854"void __writecr8(unsigned int);\n"
21855"void __writedr(unsigned int, unsigned int);\n"
21856"void __writefsbyte(unsigned long, unsigned char);\n"
21857"void __writefsdword(unsigned long, unsigned long);\n"
21858"void __writefsqword(unsigned long, unsigned __int64);\n"
21859"void __writefsword(unsigned long, unsigned short);\n"
21860"void __writemsr(unsigned long, unsigned __int64);\n"
21861"static __inline__\n"
21862"void *_AddressOfReturnAddress(void);\n"
21863"static __inline__\n"
21864"unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);\n"
21865"static __inline__\n"
21866"unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);\n"
21867"unsigned char _bittest(long const *, long);\n"
21868"unsigned char _bittestandcomplement(long *, long);\n"
21869"unsigned char _bittestandreset(long *, long);\n"
21870"unsigned char _bittestandset(long *, long);\n"
21871"void __cdecl _disable(void);\n"
21872"void __cdecl _enable(void);\n"
21873"long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);\n"
21874"unsigned char _interlockedbittestandreset(long volatile *, long);\n"
21875"unsigned char _interlockedbittestandset(long volatile *, long);\n"
21876"void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,\n"
21877" void *);\n"
21878"void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,\n"
21879" void *);\n"
21880"long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);\n"
21881"long _InterlockedExchangeAdd_HLERelease(long volatile *, long);\n"
21882"__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);\n"
21883"__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);\n"
21884"void __cdecl _invpcid(unsigned int, void *);\n"
21885"static __inline__ void\n"
21886"__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n"
21887"_ReadBarrier(void);\n"
21888"static __inline__ void\n"
21889"__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n"
21890"_ReadWriteBarrier(void);\n"
21891"unsigned int _rorx_u32(unsigned int, const unsigned int);\n"
21892"int _sarx_i32(int, unsigned int);\n"
21893"#if __STDC_HOSTED__\n"
21894"int __cdecl _setjmp(jmp_buf);\n"
21895"#endif\n"
21896"unsigned int _shlx_u32(unsigned int, unsigned int);\n"
21897"unsigned int _shrx_u32(unsigned int, unsigned int);\n"
21898"void _Store_HLERelease(long volatile *, long);\n"
21899"void _Store64_HLERelease(__int64 volatile *, __int64);\n"
21900"void _StorePointer_HLERelease(void *volatile *, void *);\n"
21901"static __inline__ void\n"
21902"__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n"
21903"_WriteBarrier(void);\n"
21904"unsigned __int32 xbegin(void);\n"
21905"void _xend(void);\n"
21906"static __inline__\n"
21907"#define _XCR_XFEATURE_ENABLED_MASK 0\n"
21908"unsigned __int64 __cdecl _xgetbv(unsigned int);\n"
21909"void __cdecl _xsetbv(unsigned int, unsigned __int64);\n"
21910"\n"
21911"/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */\n"
21912"#ifdef __x86_64__\n"
21913"void __addgsbyte(unsigned long, unsigned char);\n"
21914"void __addgsdword(unsigned long, unsigned long);\n"
21915"void __addgsqword(unsigned long, unsigned __int64);\n"
21916"void __addgsword(unsigned long, unsigned short);\n"
21917"static __inline__\n"
21918"void __faststorefence(void);\n"
21919"void __incgsbyte(unsigned long);\n"
21920"void __incgsdword(unsigned long);\n"
21921"void __incgsqword(unsigned long);\n"
21922"void __incgsword(unsigned long);\n"
21923"static __inline__\n"
21924"void __movsq(unsigned long long *, unsigned long long const *, size_t);\n"
21925"static __inline__\n"
21926"unsigned char __readgsbyte(unsigned long);\n"
21927"static __inline__\n"
21928"unsigned long __readgsdword(unsigned long);\n"
21929"static __inline__\n"
21930"unsigned __int64 __readgsqword(unsigned long);\n"
21931"unsigned short __readgsword(unsigned long);\n"
21932"unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,\n"
21933" unsigned __int64 _HighPart,\n"
21934" unsigned char _Shift);\n"
21935"unsigned __int64 __shiftright128(unsigned __int64 _LowPart,\n"
21936" unsigned __int64 _HighPart,\n"
21937" unsigned char _Shift);\n"
21938"static __inline__\n"
21939"void __stosq(unsigned __int64 *, unsigned __int64, size_t);\n"
21940"unsigned char __vmx_on(unsigned __int64 *);\n"
21941"unsigned char __vmx_vmclear(unsigned __int64 *);\n"
21942"unsigned char __vmx_vmlaunch(void);\n"
21943"unsigned char __vmx_vmptrld(unsigned __int64 *);\n"
21944"unsigned char __vmx_vmread(size_t, size_t *);\n"
21945"unsigned char __vmx_vmresume(void);\n"
21946"unsigned char __vmx_vmwrite(size_t, size_t);\n"
21947"void __writegsbyte(unsigned long, unsigned char);\n"
21948"void __writegsdword(unsigned long, unsigned long);\n"
21949"void __writegsqword(unsigned long, unsigned __int64);\n"
21950"void __writegsword(unsigned long, unsigned short);\n"
21951"unsigned char _bittest64(__int64 const *, __int64);\n"
21952"unsigned char _bittestandcomplement64(__int64 *, __int64);\n"
21953"unsigned char _bittestandreset64(__int64 *, __int64);\n"
21954"unsigned char _bittestandset64(__int64 *, __int64);\n"
21955"long _InterlockedAnd_np(long volatile *_Value, long _Mask);\n"
21956"short _InterlockedAnd16_np(short volatile *_Value, short _Mask);\n"
21957"__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);\n"
21958"char _InterlockedAnd8_np(char volatile *_Value, char _Mask);\n"
21959"unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);\n"
21960"unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);\n"
21961"long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,\n"
21962" long _Comparand);\n"
21963"unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,\n"
21964" __int64 _ExchangeHigh,\n"
21965" __int64 _ExchangeLow,\n"
21966" __int64 *_CompareandResult);\n"
21967"unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,\n"
21968" __int64 _ExchangeHigh,\n"
21969" __int64 _ExchangeLow,\n"
21970" __int64 *_ComparandResult);\n"
21971"short _InterlockedCompareExchange16_np(short volatile *_Destination,\n"
21972" short _Exchange, short _Comparand);\n"
21973"__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,\n"
21974" __int64 _Exchange, __int64 _Comparand);\n"
21975"void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,\n"
21976" void *_Exchange, void *_Comparand);\n"
21977"long _InterlockedOr_np(long volatile *_Value, long _Mask);\n"
21978"short _InterlockedOr16_np(short volatile *_Value, short _Mask);\n"
21979"__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask);\n"
21980"char _InterlockedOr8_np(char volatile *_Value, char _Mask);\n"
21981"long _InterlockedXor_np(long volatile *_Value, long _Mask);\n"
21982"short _InterlockedXor16_np(short volatile *_Value, short _Mask);\n"
21983"__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);\n"
21984"char _InterlockedXor8_np(char volatile *_Value, char _Mask);\n"
21985"unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);\n"
21986"__int64 _sarx_i64(__int64, unsigned int);\n"
21987"unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);\n"
21988"unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);\n"
21989"static __inline__\n"
21990"__int64 __mulh(__int64, __int64);\n"
21991"static __inline__\n"
21992"unsigned __int64 __umulh(unsigned __int64, unsigned __int64);\n"
21993"static __inline__\n"
21994"__int64 _mul128(__int64, __int64, __int64*);\n"
21995"static __inline__\n"
21996"unsigned __int64 _umul128(unsigned __int64,\n"
21997" unsigned __int64,\n"
21998" unsigned __int64*);\n"
21999"\n"
22000"#endif /* __x86_64__ */\n"
22001"\n"
22002"#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)\n"
22003"\n"
22004"static __inline__\n"
22005"unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);\n"
22006"static __inline__\n"
22007"unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);\n"
22008"\n"
22009"static __inline__\n"
22010"__int64 _InterlockedDecrement64(__int64 volatile *_Addend);\n"
22011"static __inline__\n"
22012"__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value);\n"
22013"static __inline__\n"
22014"__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value);\n"
22015"static __inline__\n"
22016"__int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value);\n"
22017"static __inline__\n"
22018"__int64 _InterlockedIncrement64(__int64 volatile *_Addend);\n"
22019"static __inline__\n"
22020"__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask);\n"
22021"static __inline__\n"
22022"__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask);\n"
22023"static __inline__\n"
22024"__int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);\n"
22025"\n"
22026"#endif\n"
22027"\n"
22028"/*----------------------------------------------------------------------------*\\\n"
22029"|* Interlocked Exchange Add\n"
22030"\\*----------------------------------------------------------------------------*/\n"
22031"#if defined(__arm__) || defined(__aarch64__)\n"
22032"char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);\n"
22033"char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);\n"
22034"char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);\n"
22035"short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);\n"
22036"short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);\n"
22037"short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);\n"
22038"long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);\n"
22039"long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);\n"
22040"long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);\n"
22041"__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value);\n"
22042"__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);\n"
22043"__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value);\n"
22044"#endif\n"
22045"/*----------------------------------------------------------------------------*\\\n"
22046"|* Interlocked Increment\n"
22047"\\*----------------------------------------------------------------------------*/\n"
22048"#if defined(__arm__) || defined(__aarch64__)\n"
22049"short _InterlockedIncrement16_acq(short volatile *_Value);\n"
22050"short _InterlockedIncrement16_nf(short volatile *_Value);\n"
22051"short _InterlockedIncrement16_rel(short volatile *_Value);\n"
22052"long _InterlockedIncrement_acq(long volatile *_Value);\n"
22053"long _InterlockedIncrement_nf(long volatile *_Value);\n"
22054"long _InterlockedIncrement_rel(long volatile *_Value);\n"
22055"__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);\n"
22056"__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);\n"
22057"__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);\n"
22058"#endif\n"
22059"/*----------------------------------------------------------------------------*\\\n"
22060"|* Interlocked Decrement\n"
22061"\\*----------------------------------------------------------------------------*/\n"
22062"#if defined(__arm__) || defined(__aarch64__)\n"
22063"short _InterlockedDecrement16_acq(short volatile *_Value);\n"
22064"short _InterlockedDecrement16_nf(short volatile *_Value);\n"
22065"short _InterlockedDecrement16_rel(short volatile *_Value);\n"
22066"long _InterlockedDecrement_acq(long volatile *_Value);\n"
22067"long _InterlockedDecrement_nf(long volatile *_Value);\n"
22068"long _InterlockedDecrement_rel(long volatile *_Value);\n"
22069"__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);\n"
22070"__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);\n"
22071"__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);\n"
22072"#endif\n"
22073"/*----------------------------------------------------------------------------*\\\n"
22074"|* Interlocked And\n"
22075"\\*----------------------------------------------------------------------------*/\n"
22076"#if defined(__arm__) || defined(__aarch64__)\n"
22077"char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);\n"
22078"char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);\n"
22079"char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);\n"
22080"short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);\n"
22081"short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);\n"
22082"short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);\n"
22083"long _InterlockedAnd_acq(long volatile *_Value, long _Mask);\n"
22084"long _InterlockedAnd_nf(long volatile *_Value, long _Mask);\n"
22085"long _InterlockedAnd_rel(long volatile *_Value, long _Mask);\n"
22086"__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);\n"
22087"__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);\n"
22088"__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);\n"
22089"#endif\n"
22090"/*----------------------------------------------------------------------------*\\\n"
22091"|* Bit Counting and Testing\n"
22092"\\*----------------------------------------------------------------------------*/\n"
22093"#if defined(__arm__) || defined(__aarch64__)\n"
22094"unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,\n"
22095" long _BitPos);\n"
22096"unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,\n"
22097" long _BitPos);\n"
22098"unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,\n"
22099" long _BitPos);\n"
22100"unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,\n"
22101" long _BitPos);\n"
22102"unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,\n"
22103" long _BitPos);\n"
22104"unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,\n"
22105" long _BitPos);\n"
22106"#endif\n"
22107"/*----------------------------------------------------------------------------*\\\n"
22108"|* Interlocked Or\n"
22109"\\*----------------------------------------------------------------------------*/\n"
22110"#if defined(__arm__) || defined(__aarch64__)\n"
22111"char _InterlockedOr8_acq(char volatile *_Value, char _Mask);\n"
22112"char _InterlockedOr8_nf(char volatile *_Value, char _Mask);\n"
22113"char _InterlockedOr8_rel(char volatile *_Value, char _Mask);\n"
22114"short _InterlockedOr16_acq(short volatile *_Value, short _Mask);\n"
22115"short _InterlockedOr16_nf(short volatile *_Value, short _Mask);\n"
22116"short _InterlockedOr16_rel(short volatile *_Value, short _Mask);\n"
22117"long _InterlockedOr_acq(long volatile *_Value, long _Mask);\n"
22118"long _InterlockedOr_nf(long volatile *_Value, long _Mask);\n"
22119"long _InterlockedOr_rel(long volatile *_Value, long _Mask);\n"
22120"__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);\n"
22121"__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);\n"
22122"__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);\n"
22123"#endif\n"
22124"/*----------------------------------------------------------------------------*\\\n"
22125"|* Interlocked Xor\n"
22126"\\*----------------------------------------------------------------------------*/\n"
22127"#if defined(__arm__) || defined(__aarch64__)\n"
22128"char _InterlockedXor8_acq(char volatile *_Value, char _Mask);\n"
22129"char _InterlockedXor8_nf(char volatile *_Value, char _Mask);\n"
22130"char _InterlockedXor8_rel(char volatile *_Value, char _Mask);\n"
22131"short _InterlockedXor16_acq(short volatile *_Value, short _Mask);\n"
22132"short _InterlockedXor16_nf(short volatile *_Value, short _Mask);\n"
22133"short _InterlockedXor16_rel(short volatile *_Value, short _Mask);\n"
22134"long _InterlockedXor_acq(long volatile *_Value, long _Mask);\n"
22135"long _InterlockedXor_nf(long volatile *_Value, long _Mask);\n"
22136"long _InterlockedXor_rel(long volatile *_Value, long _Mask);\n"
22137"__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);\n"
22138"__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);\n"
22139"__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);\n"
22140"#endif\n"
22141"/*----------------------------------------------------------------------------*\\\n"
22142"|* Interlocked Exchange\n"
22143"\\*----------------------------------------------------------------------------*/\n"
22144"#if defined(__arm__) || defined(__aarch64__)\n"
22145"char _InterlockedExchange8_acq(char volatile *_Target, char _Value);\n"
22146"char _InterlockedExchange8_nf(char volatile *_Target, char _Value);\n"
22147"char _InterlockedExchange8_rel(char volatile *_Target, char _Value);\n"
22148"short _InterlockedExchange16_acq(short volatile *_Target, short _Value);\n"
22149"short _InterlockedExchange16_nf(short volatile *_Target, short _Value);\n"
22150"short _InterlockedExchange16_rel(short volatile *_Target, short _Value);\n"
22151"long _InterlockedExchange_acq(long volatile *_Target, long _Value);\n"
22152"long _InterlockedExchange_nf(long volatile *_Target, long _Value);\n"
22153"long _InterlockedExchange_rel(long volatile *_Target, long _Value);\n"
22154"__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);\n"
22155"__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);\n"
22156"__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);\n"
22157"#endif\n"
22158"/*----------------------------------------------------------------------------*\\\n"
22159"|* Interlocked Compare Exchange\n"
22160"\\*----------------------------------------------------------------------------*/\n"
22161"#if defined(__arm__) || defined(__aarch64__)\n"
22162"char _InterlockedCompareExchange8_acq(char volatile *_Destination,\n"
22163" char _Exchange, char _Comparand);\n"
22164"char _InterlockedCompareExchange8_nf(char volatile *_Destination,\n"
22165" char _Exchange, char _Comparand);\n"
22166"char _InterlockedCompareExchange8_rel(char volatile *_Destination,\n"
22167" char _Exchange, char _Comparand);\n"
22168"short _InterlockedCompareExchange16_acq(short volatile *_Destination,\n"
22169" short _Exchange, short _Comparand);\n"
22170"short _InterlockedCompareExchange16_nf(short volatile *_Destination,\n"
22171" short _Exchange, short _Comparand);\n"
22172"short _InterlockedCompareExchange16_rel(short volatile *_Destination,\n"
22173" short _Exchange, short _Comparand);\n"
22174"long _InterlockedCompareExchange_acq(long volatile *_Destination,\n"
22175" long _Exchange, long _Comparand);\n"
22176"long _InterlockedCompareExchange_nf(long volatile *_Destination,\n"
22177" long _Exchange, long _Comparand);\n"
22178"long _InterlockedCompareExchange_rel(long volatile *_Destination,\n"
22179" long _Exchange, long _Comparand);\n"
22180"__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,\n"
22181" __int64 _Exchange, __int64 _Comparand);\n"
22182"__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,\n"
22183" __int64 _Exchange, __int64 _Comparand);\n"
22184"__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,\n"
22185" __int64 _Exchange, __int64 _Comparand);\n"
22186"#endif\n"
22187"\n"
22188"/*----------------------------------------------------------------------------*\\\n"
22189"|* movs, stos\n"
22190"\\*----------------------------------------------------------------------------*/\n"
22191"#if defined(__i386__) || defined(__x86_64__)\n"
22192"static __inline__ void __DEFAULT_FN_ATTRS\n"
22193"__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) {\n"
22194" __asm__ __volatile__(\"rep movsb\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22195" : : \"memory\");\n"
22196"}\n"
22197"static __inline__ void __DEFAULT_FN_ATTRS\n"
22198"__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {\n"
22199" __asm__ __volatile__(\"rep movsl\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22200" : : \"memory\");\n"
22201"}\n"
22202"static __inline__ void __DEFAULT_FN_ATTRS\n"
22203"__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {\n"
22204" __asm__ __volatile__(\"rep movsw\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22205" : : \"memory\");\n"
22206"}\n"
22207"static __inline__ void __DEFAULT_FN_ATTRS\n"
22208"__stosd(unsigned long *__dst, unsigned long __x, size_t __n) {\n"
22209" __asm__ __volatile__(\"rep stosl\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n"
22210" : \"memory\");\n"
22211"}\n"
22212"static __inline__ void __DEFAULT_FN_ATTRS\n"
22213"__stosw(unsigned short *__dst, unsigned short __x, size_t __n) {\n"
22214" __asm__ __volatile__(\"rep stosw\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n"
22215" : \"memory\");\n"
22216"}\n"
22217"#endif\n"
22218"#ifdef __x86_64__\n"
22219"static __inline__ void __DEFAULT_FN_ATTRS\n"
22220"__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) {\n"
22221" __asm__ __volatile__(\"rep movsq\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22222" : : \"memory\");\n"
22223"}\n"
22224"static __inline__ void __DEFAULT_FN_ATTRS\n"
22225"__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {\n"
22226" __asm__ __volatile__(\"rep stosq\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n"
22227" : \"memory\");\n"
22228"}\n"
22229"#endif\n"
22230"\n"
22231"/*----------------------------------------------------------------------------*\\\n"
22232"|* Misc\n"
22233"\\*----------------------------------------------------------------------------*/\n"
22234"#if defined(__i386__) || defined(__x86_64__)\n"
22235"static __inline__ void __DEFAULT_FN_ATTRS\n"
22236"__cpuid(int __info[4], int __level) {\n"
22237" __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n"
22238" : \"a\"(__level), \"c\"(0));\n"
22239"}\n"
22240"static __inline__ void __DEFAULT_FN_ATTRS\n"
22241"__cpuidex(int __info[4], int __level, int __ecx) {\n"
22242" __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n"
22243" : \"a\"(__level), \"c\"(__ecx));\n"
22244"}\n"
22245"static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS\n"
22246"_xgetbv(unsigned int __xcr_no) {\n"
22247" unsigned int __eax, __edx;\n"
22248" __asm__ (\"xgetbv\" : \"=a\" (__eax), \"=d\" (__edx) : \"c\" (__xcr_no));\n"
22249" return ((unsigned __int64)__edx << 32) | __eax;\n"
22250"}\n"
22251"static __inline__ void __DEFAULT_FN_ATTRS\n"
22252"__halt(void) {\n"
22253" __asm__ volatile (\"hlt\");\n"
22254"}\n"
22255"#endif\n"
22256"\n"
22257"#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)\n"
22258"static __inline__ void __DEFAULT_FN_ATTRS\n"
22259"__nop(void) {\n"
22260" __asm__ volatile (\"nop\");\n"
22261"}\n"
22262"#endif\n"
22263"\n"
22264"/*----------------------------------------------------------------------------*\\\n"
22265"|* MS AArch64 specific\n"
22266"\\*----------------------------------------------------------------------------*/\n"
22267"#if defined(__aarch64__)\n"
22268"unsigned __int64 __getReg(int);\n"
22269"long _InterlockedAdd(long volatile *Addend, long Value);\n"
22270"__int64 _ReadStatusReg(int);\n"
22271"void _WriteStatusReg(int, __int64);\n"
22272"\n"
22273"static inline unsigned short _byteswap_ushort (unsigned short val) {\n"
22274" return __builtin_bswap16(val);\n"
22275"}\n"
22276"static inline unsigned long _byteswap_ulong (unsigned long val) {\n"
22277" return __builtin_bswap32(val);\n"
22278"}\n"
22279"static inline unsigned __int64 _byteswap_uint64 (unsigned __int64 val) {\n"
22280" return __builtin_bswap64(val);\n"
22281"}\n"
22282"#endif\n"
22283"\n"
22284"/*----------------------------------------------------------------------------*\\\n"
22285"|* Privileged intrinsics\n"
22286"\\*----------------------------------------------------------------------------*/\n"
22287"#if defined(__i386__) || defined(__x86_64__)\n"
22288"static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS\n"
22289"__readmsr(unsigned long __register) {\n"
22290" // Loads the contents of a 64-bit model specific register (MSR) specified in\n"
22291" // the ECX register into registers EDX:EAX. The EDX register is loaded with\n"
22292" // the high-order 32 bits of the MSR and the EAX register is loaded with the\n"
22293" // low-order 32 bits. If less than 64 bits are implemented in the MSR being\n"
22294" // read, the values returned to EDX:EAX in unimplemented bit locations are\n"
22295" // undefined.\n"
22296" unsigned long __edx;\n"
22297" unsigned long __eax;\n"
22298" __asm__ (\"rdmsr\" : \"=d\"(__edx), \"=a\"(__eax) : \"c\"(__register));\n"
22299" return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;\n"
22300"}\n"
22301"\n"
22302"static __inline__ unsigned long __DEFAULT_FN_ATTRS\n"
22303"__readcr3(void) {\n"
22304" unsigned long __cr3_val;\n"
22305" __asm__ __volatile__ (\"mov %%cr3, %0\" : \"=q\"(__cr3_val) : : \"memory\");\n"
22306" return __cr3_val;\n"
22307"}\n"
22308"\n"
22309"static __inline__ void __DEFAULT_FN_ATTRS\n"
22310"__writecr3(unsigned int __cr3_val) {\n"
22311" __asm__ (\"mov %0, %%cr3\" : : \"q\"(__cr3_val) : \"memory\");\n"
22312"}\n"
22313"#endif\n"
22314"\n"
22315"#ifdef __cplusplus\n"
22316"}\n"
22317"#endif\n"
22318"\n"
22319"#undef __DEFAULT_FN_ATTRS\n"
22320"\n"
22321"#endif /* __INTRIN_H */\n"
22322"#endif /* _MSC_VER */\n"
22323"" } ,
22324 { "/builtins/inttypes.h" , "/*===---- inttypes.h - Standard header for integer printf macros ----------===*\\\n"
22325" *\n"
22326" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22327" * of this software and associated documentation files (the \"Software\"), to deal\n"
22328" * in the Software without restriction, including without limitation the rights\n"
22329" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22330" * copies of the Software, and to permit persons to whom the Software is\n"
22331" * furnished to do so, subject to the following conditions:\n"
22332" *\n"
22333" * The above copyright notice and this permission notice shall be included in\n"
22334" * all copies or substantial portions of the Software.\n"
22335" *\n"
22336" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22337" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22338" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22339" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22340" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22341" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22342" * THE SOFTWARE.\n"
22343" *\n"
22344"\\*===----------------------------------------------------------------------===*/\n"
22345"\n"
22346"#ifndef __CLANG_INTTYPES_H\n"
22347"#define __CLANG_INTTYPES_H\n"
22348"\n"
22349"#if defined(_MSC_VER) && _MSC_VER < 1800\n"
22350"#error MSVC does not have inttypes.h prior to Visual Studio 2013\n"
22351"#endif\n"
22352"\n"
22353"#include_next <inttypes.h>\n"
22354"\n"
22355"#if defined(_MSC_VER) && _MSC_VER < 1900\n"
22356"/* MSVC headers define int32_t as int, but PRIx32 as \"lx\" instead of \"x\".\n"
22357" * This triggers format warnings, so fix it up here. */\n"
22358"#undef PRId32\n"
22359"#undef PRIdLEAST32\n"
22360"#undef PRIdFAST32\n"
22361"#undef PRIi32\n"
22362"#undef PRIiLEAST32\n"
22363"#undef PRIiFAST32\n"
22364"#undef PRIo32\n"
22365"#undef PRIoLEAST32\n"
22366"#undef PRIoFAST32\n"
22367"#undef PRIu32\n"
22368"#undef PRIuLEAST32\n"
22369"#undef PRIuFAST32\n"
22370"#undef PRIx32\n"
22371"#undef PRIxLEAST32\n"
22372"#undef PRIxFAST32\n"
22373"#undef PRIX32\n"
22374"#undef PRIXLEAST32\n"
22375"#undef PRIXFAST32\n"
22376"\n"
22377"#undef SCNd32\n"
22378"#undef SCNdLEAST32\n"
22379"#undef SCNdFAST32\n"
22380"#undef SCNi32\n"
22381"#undef SCNiLEAST32\n"
22382"#undef SCNiFAST32\n"
22383"#undef SCNo32\n"
22384"#undef SCNoLEAST32\n"
22385"#undef SCNoFAST32\n"
22386"#undef SCNu32\n"
22387"#undef SCNuLEAST32\n"
22388"#undef SCNuFAST32\n"
22389"#undef SCNx32\n"
22390"#undef SCNxLEAST32\n"
22391"#undef SCNxFAST32\n"
22392"\n"
22393"#define PRId32 \"d\"\n"
22394"#define PRIdLEAST32 \"d\"\n"
22395"#define PRIdFAST32 \"d\"\n"
22396"#define PRIi32 \"i\"\n"
22397"#define PRIiLEAST32 \"i\"\n"
22398"#define PRIiFAST32 \"i\"\n"
22399"#define PRIo32 \"o\"\n"
22400"#define PRIoLEAST32 \"o\"\n"
22401"#define PRIoFAST32 \"o\"\n"
22402"#define PRIu32 \"u\"\n"
22403"#define PRIuLEAST32 \"u\"\n"
22404"#define PRIuFAST32 \"u\"\n"
22405"#define PRIx32 \"x\"\n"
22406"#define PRIxLEAST32 \"x\"\n"
22407"#define PRIxFAST32 \"x\"\n"
22408"#define PRIX32 \"X\"\n"
22409"#define PRIXLEAST32 \"X\"\n"
22410"#define PRIXFAST32 \"X\"\n"
22411"\n"
22412"#define SCNd32 \"d\"\n"
22413"#define SCNdLEAST32 \"d\"\n"
22414"#define SCNdFAST32 \"d\"\n"
22415"#define SCNi32 \"i\"\n"
22416"#define SCNiLEAST32 \"i\"\n"
22417"#define SCNiFAST32 \"i\"\n"
22418"#define SCNo32 \"o\"\n"
22419"#define SCNoLEAST32 \"o\"\n"
22420"#define SCNoFAST32 \"o\"\n"
22421"#define SCNu32 \"u\"\n"
22422"#define SCNuLEAST32 \"u\"\n"
22423"#define SCNuFAST32 \"u\"\n"
22424"#define SCNx32 \"x\"\n"
22425"#define SCNxLEAST32 \"x\"\n"
22426"#define SCNxFAST32 \"x\"\n"
22427"#endif\n"
22428"\n"
22429"#endif /* __CLANG_INTTYPES_H */\n"
22430"" } ,
22431 { "/builtins/invpcidintrin.h" , "/*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------===\n"
22432" *\n"
22433" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22434" * of this software and associated documentation files (the \"Software\"), to deal\n"
22435" * in the Software without restriction, including without limitation the rights\n"
22436" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22437" * copies of the Software, and to permit persons to whom the Software is\n"
22438" * furnished to do so, subject to the following conditions:\n"
22439" *\n"
22440" * The above copyright notice and this permission notice shall be included in\n"
22441" * all copies or substantial portions of the Software.\n"
22442" *\n"
22443" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22444" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22445" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22446" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22447" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22448" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22449" * THE SOFTWARE.\n"
22450" *\n"
22451" *===-----------------------------------------------------------------------===\n"
22452" */\n"
22453"\n"
22454"#ifndef __IMMINTRIN_H\n"
22455"#error \"Never use <invpcidintrin.h> directly; include <immintrin.h> instead.\"\n"
22456"#endif\n"
22457"\n"
22458"#ifndef __INVPCIDINTRIN_H\n"
22459"#define __INVPCIDINTRIN_H\n"
22460"\n"
22461"static __inline__ void\n"
22462" __attribute__((__always_inline__, __nodebug__, __target__(\"invpcid\")))\n"
22463"_invpcid(unsigned int __type, void *__descriptor) {\n"
22464" __builtin_ia32_invpcid(__type, __descriptor);\n"
22465"}\n"
22466"\n"
22467"#endif /* __INVPCIDINTRIN_H */\n"
22468"" } ,
22469 { "/builtins/iso646.h" , "/*===---- iso646.h - Standard header for alternate spellings of operators---===\n"
22470" *\n"
22471" * Copyright (c) 2008 Eli Friedman\n"
22472" *\n"
22473" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22474" * of this software and associated documentation files (the \"Software\"), to deal\n"
22475" * in the Software without restriction, including without limitation the rights\n"
22476" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22477" * copies of the Software, and to permit persons to whom the Software is\n"
22478" * furnished to do so, subject to the following conditions:\n"
22479" *\n"
22480" * The above copyright notice and this permission notice shall be included in\n"
22481" * all copies or substantial portions of the Software.\n"
22482" *\n"
22483" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22484" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22485" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22486" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22487" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22488" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22489" * THE SOFTWARE.\n"
22490" *\n"
22491" *===-----------------------------------------------------------------------===\n"
22492" */\n"
22493"\n"
22494"#ifndef __ISO646_H\n"
22495"#define __ISO646_H\n"
22496"\n"
22497"#ifndef __cplusplus\n"
22498"#define and &&\n"
22499"#define and_eq &=\n"
22500"#define bitand &\n"
22501"#define bitor |\n"
22502"#define compl ~\n"
22503"#define not !\n"
22504"#define not_eq !=\n"
22505"#define or ||\n"
22506"#define or_eq |=\n"
22507"#define xor ^\n"
22508"#define xor_eq ^=\n"
22509"#endif\n"
22510"\n"
22511"#endif /* __ISO646_H */\n"
22512"" } ,
22513 { "/builtins/limits.h" , "/*===---- limits.h - Standard header for integer sizes --------------------===*\\\n"
22514" *\n"
22515" * Copyright (c) 2009 Chris Lattner\n"
22516" *\n"
22517" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22518" * of this software and associated documentation files (the \"Software\"), to deal\n"
22519" * in the Software without restriction, including without limitation the rights\n"
22520" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22521" * copies of the Software, and to permit persons to whom the Software is\n"
22522" * furnished to do so, subject to the following conditions:\n"
22523" *\n"
22524" * The above copyright notice and this permission notice shall be included in\n"
22525" * all copies or substantial portions of the Software.\n"
22526" *\n"
22527" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22528" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22529" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22530" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22531" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22532" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22533" * THE SOFTWARE.\n"
22534" *\n"
22535"\\*===----------------------------------------------------------------------===*/\n"
22536"\n"
22537"#ifndef __CLANG_LIMITS_H\n"
22538"#define __CLANG_LIMITS_H\n"
22539"\n"
22540"/* The system's limits.h may, in turn, try to #include_next GCC's limits.h.\n"
22541" Avert this #include_next madness. */\n"
22542"#if defined __GNUC__ && !defined _GCC_LIMITS_H_\n"
22543"#define _GCC_LIMITS_H_\n"
22544"#endif\n"
22545"\n"
22546"/* System headers include a number of constants from POSIX in <limits.h>.\n"
22547" Include it if we're hosted. */\n"
22548"#if __STDC_HOSTED__ && __has_include_next(<limits.h>)\n"
22549"#include_next <limits.h>\n"
22550"#endif\n"
22551"\n"
22552"/* Many system headers try to \"help us out\" by defining these. No really, we\n"
22553" know how big each datatype is. */\n"
22554"#undef SCHAR_MIN\n"
22555"#undef SCHAR_MAX\n"
22556"#undef UCHAR_MAX\n"
22557"#undef SHRT_MIN\n"
22558"#undef SHRT_MAX\n"
22559"#undef USHRT_MAX\n"
22560"#undef INT_MIN\n"
22561"#undef INT_MAX\n"
22562"#undef UINT_MAX\n"
22563"#undef LONG_MIN\n"
22564"#undef LONG_MAX\n"
22565"#undef ULONG_MAX\n"
22566"\n"
22567"#undef CHAR_BIT\n"
22568"#undef CHAR_MIN\n"
22569"#undef CHAR_MAX\n"
22570"\n"
22571"/* C90/99 5.2.4.2.1 */\n"
22572"#define SCHAR_MAX __SCHAR_MAX__\n"
22573"#define SHRT_MAX __SHRT_MAX__\n"
22574"#define INT_MAX __INT_MAX__\n"
22575"#define LONG_MAX __LONG_MAX__\n"
22576"\n"
22577"#define SCHAR_MIN (-__SCHAR_MAX__-1)\n"
22578"#define SHRT_MIN (-__SHRT_MAX__ -1)\n"
22579"#define INT_MIN (-__INT_MAX__ -1)\n"
22580"#define LONG_MIN (-__LONG_MAX__ -1L)\n"
22581"\n"
22582"#define UCHAR_MAX (__SCHAR_MAX__*2 +1)\n"
22583"#define USHRT_MAX (__SHRT_MAX__ *2 +1)\n"
22584"#define UINT_MAX (__INT_MAX__ *2U +1U)\n"
22585"#define ULONG_MAX (__LONG_MAX__ *2UL+1UL)\n"
22586"\n"
22587"#ifndef MB_LEN_MAX\n"
22588"#define MB_LEN_MAX 1\n"
22589"#endif\n"
22590"\n"
22591"#define CHAR_BIT __CHAR_BIT__\n"
22592"\n"
22593"#ifdef __CHAR_UNSIGNED__ /* -funsigned-char */\n"
22594"#define CHAR_MIN 0\n"
22595"#define CHAR_MAX UCHAR_MAX\n"
22596"#else\n"
22597"#define CHAR_MIN SCHAR_MIN\n"
22598"#define CHAR_MAX __SCHAR_MAX__\n"
22599"#endif\n"
22600"\n"
22601"/* C99 5.2.4.2.1: Added long long.\n"
22602" C++11 18.3.3.2: same contents as the Standard C Library header <limits.h>.\n"
22603" */\n"
22604"#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L\n"
22605"\n"
22606"#undef LLONG_MIN\n"
22607"#undef LLONG_MAX\n"
22608"#undef ULLONG_MAX\n"
22609"\n"
22610"#define LLONG_MAX __LONG_LONG_MAX__\n"
22611"#define LLONG_MIN (-__LONG_LONG_MAX__-1LL)\n"
22612"#define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n"
22613"#endif\n"
22614"\n"
22615"/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad\n"
22616" that we don't have something like #pragma poison that could be used to\n"
22617" deprecate a macro - the code should just use LLONG_MAX and friends.\n"
22618" */\n"
22619"#if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__)\n"
22620"\n"
22621"#undef LONG_LONG_MIN\n"
22622"#undef LONG_LONG_MAX\n"
22623"#undef ULONG_LONG_MAX\n"
22624"\n"
22625"#define LONG_LONG_MAX __LONG_LONG_MAX__\n"
22626"#define LONG_LONG_MIN (-__LONG_LONG_MAX__-1LL)\n"
22627"#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n"
22628"#endif\n"
22629"\n"
22630"#endif /* __CLANG_LIMITS_H */\n"
22631"" } ,
22632 { "/builtins/lwpintrin.h" , "/*===---- lwpintrin.h - LWP intrinsics -------------------------------------===\n"
22633" *\n"
22634" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22635" * of this software and associated documentation files (the \"Software\"), to deal\n"
22636" * in the Software without restriction, including without limitation the rights\n"
22637" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22638" * copies of the Software, and to permit persons to whom the Software is\n"
22639" * furnished to do so, subject to the following conditions:\n"
22640" *\n"
22641" * The above copyright notice and this permission notice shall be included in\n"
22642" * all copies or substantial portions of the Software.\n"
22643" *\n"
22644" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22645" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22646" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22647" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22648" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22649" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22650" * THE SOFTWARE.\n"
22651" *\n"
22652" *===-----------------------------------------------------------------------===\n"
22653" */\n"
22654"\n"
22655"#ifndef __X86INTRIN_H\n"
22656"#error \"Never use <lwpintrin.h> directly; include <x86intrin.h> instead.\"\n"
22657"#endif\n"
22658"\n"
22659"#ifndef __LWPINTRIN_H\n"
22660"#define __LWPINTRIN_H\n"
22661"\n"
22662"/* Define the default attributes for the functions in this file. */\n"
22663"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lwp\")))\n"
22664"\n"
22665"/// Parses the LWPCB at the specified address and enables\n"
22666"/// profiling if valid.\n"
22667"///\n"
22668"/// \\headerfile <x86intrin.h>\n"
22669"///\n"
22670"/// This intrinsic corresponds to the <c> LLWPCB </c> instruction.\n"
22671"///\n"
22672"/// \\param __addr\n"
22673"/// Address to the new Lightweight Profiling Control Block (LWPCB). If the\n"
22674"/// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables\n"
22675"/// Lightweight Profiling.\n"
22676"static __inline__ void __DEFAULT_FN_ATTRS\n"
22677"__llwpcb (void *__addr)\n"
22678"{\n"
22679" __builtin_ia32_llwpcb(__addr);\n"
22680"}\n"
22681"\n"
22682"/// Flushes the LWP state to memory and returns the address of the LWPCB.\n"
22683"///\n"
22684"/// \\headerfile <x86intrin.h>\n"
22685"///\n"
22686"/// This intrinsic corresponds to the <c> SLWPCB </c> instruction.\n"
22687"///\n"
22688"/// \\return\n"
22689"/// Address to the current Lightweight Profiling Control Block (LWPCB).\n"
22690"/// If LWP is not currently enabled, returns NULL.\n"
22691"static __inline__ void* __DEFAULT_FN_ATTRS\n"
22692"__slwpcb (void)\n"
22693"{\n"
22694" return __builtin_ia32_slwpcb();\n"
22695"}\n"
22696"\n"
22697"/// Inserts programmed event record into the LWP event ring buffer\n"
22698"/// and advances the ring buffer pointer.\n"
22699"///\n"
22700"/// \\headerfile <x86intrin.h>\n"
22701"///\n"
22702"/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n"
22703"///\n"
22704"/// \\param DATA2\n"
22705"/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n"
22706"/// \\param DATA1\n"
22707"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22708"/// \\param FLAGS\n"
22709"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22710"/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n"
22711"/// the event record overwrites the last record in the buffer, the MissedEvents\n"
22712"/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n"
22713"/// 1 is returned. Otherwise 0 is returned.\n"
22714"#define __lwpins32(DATA2, DATA1, FLAGS) \\\n"
22715" (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n"
22716" (unsigned int) (FLAGS)))\n"
22717"\n"
22718"/// Decrements the LWP programmed value sample event counter. If the result is\n"
22719"/// negative, inserts an event record into the LWP event ring buffer in memory\n"
22720"/// and advances the ring buffer pointer.\n"
22721"///\n"
22722"/// \\headerfile <x86intrin.h>\n"
22723"///\n"
22724"/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n"
22725"///\n"
22726"/// \\param DATA2\n"
22727"/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n"
22728"/// \\param DATA1\n"
22729"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22730"/// \\param FLAGS\n"
22731"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22732"#define __lwpval32(DATA2, DATA1, FLAGS) \\\n"
22733" (__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n"
22734" (unsigned int) (FLAGS)))\n"
22735"\n"
22736"#ifdef __x86_64__\n"
22737"\n"
22738"/// Inserts programmed event record into the LWP event ring buffer\n"
22739"/// and advances the ring buffer pointer.\n"
22740"///\n"
22741"/// \\headerfile <x86intrin.h>\n"
22742"///\n"
22743"/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n"
22744"///\n"
22745"/// \\param DATA2\n"
22746"/// A 64-bit value is inserted into the 64-bit Data2 field.\n"
22747"/// \\param DATA1\n"
22748"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22749"/// \\param FLAGS\n"
22750"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22751"/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n"
22752"/// the event record overwrites the last record in the buffer, the MissedEvents\n"
22753"/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n"
22754"/// 1 is returned. Otherwise 0 is returned.\n"
22755"#define __lwpins64(DATA2, DATA1, FLAGS) \\\n"
22756" (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n"
22757" (unsigned int) (FLAGS)))\n"
22758"\n"
22759"/// Decrements the LWP programmed value sample event counter. If the result is\n"
22760"/// negative, inserts an event record into the LWP event ring buffer in memory\n"
22761"/// and advances the ring buffer pointer.\n"
22762"///\n"
22763"/// \\headerfile <x86intrin.h>\n"
22764"///\n"
22765"/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n"
22766"///\n"
22767"/// \\param DATA2\n"
22768"/// A 64-bit value is and inserted into the 64-bit Data2 field.\n"
22769"/// \\param DATA1\n"
22770"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22771"/// \\param FLAGS\n"
22772"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22773"#define __lwpval64(DATA2, DATA1, FLAGS) \\\n"
22774" (__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n"
22775" (unsigned int) (FLAGS)))\n"
22776"\n"
22777"#endif\n"
22778"\n"
22779"#undef __DEFAULT_FN_ATTRS\n"
22780"\n"
22781"#endif /* __LWPINTRIN_H */\n"
22782"" } ,
22783 { "/builtins/lzcntintrin.h" , "/*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------===\n"
22784" *\n"
22785" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22786" * of this software and associated documentation files (the \"Software\"), to deal\n"
22787" * in the Software without restriction, including without limitation the rights\n"
22788" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22789" * copies of the Software, and to permit persons to whom the Software is\n"
22790" * furnished to do so, subject to the following conditions:\n"
22791" *\n"
22792" * The above copyright notice and this permission notice shall be included in\n"
22793" * all copies or substantial portions of the Software.\n"
22794" *\n"
22795" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22796" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22797" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22798" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22799" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22800" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22801" * THE SOFTWARE.\n"
22802" *\n"
22803" *===-----------------------------------------------------------------------===\n"
22804" */\n"
22805"\n"
22806"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
22807"#error \"Never use <lzcntintrin.h> directly; include <x86intrin.h> instead.\"\n"
22808"#endif\n"
22809"\n"
22810"#ifndef __LZCNTINTRIN_H\n"
22811"#define __LZCNTINTRIN_H\n"
22812"\n"
22813"/* Define the default attributes for the functions in this file. */\n"
22814"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lzcnt\")))\n"
22815"\n"
22816"#ifndef _MSC_VER\n"
22817"/// Counts the number of leading zero bits in the operand.\n"
22818"///\n"
22819"/// \\headerfile <x86intrin.h>\n"
22820"///\n"
22821"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22822"///\n"
22823"/// \\param __X\n"
22824"/// An unsigned 16-bit integer whose leading zeros are to be counted.\n"
22825"/// \\returns An unsigned 16-bit integer containing the number of leading zero\n"
22826"/// bits in the operand.\n"
22827"#define __lzcnt16(X) __builtin_ia32_lzcnt_u16((unsigned short)(X))\n"
22828"#endif // _MSC_VER\n"
22829"\n"
22830"/// Counts the number of leading zero bits in the operand.\n"
22831"///\n"
22832"/// \\headerfile <x86intrin.h>\n"
22833"///\n"
22834"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22835"///\n"
22836"/// \\param __X\n"
22837"/// An unsigned 32-bit integer whose leading zeros are to be counted.\n"
22838"/// \\returns An unsigned 32-bit integer containing the number of leading zero\n"
22839"/// bits in the operand.\n"
22840"/// \\see _lzcnt_u32\n"
22841"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
22842"__lzcnt32(unsigned int __X)\n"
22843"{\n"
22844" return __builtin_ia32_lzcnt_u32(__X);\n"
22845"}\n"
22846"\n"
22847"/// Counts the number of leading zero bits in the operand.\n"
22848"///\n"
22849"/// \\headerfile <x86intrin.h>\n"
22850"///\n"
22851"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22852"///\n"
22853"/// \\param __X\n"
22854"/// An unsigned 32-bit integer whose leading zeros are to be counted.\n"
22855"/// \\returns An unsigned 32-bit integer containing the number of leading zero\n"
22856"/// bits in the operand.\n"
22857"/// \\see __lzcnt32\n"
22858"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
22859"_lzcnt_u32(unsigned int __X)\n"
22860"{\n"
22861" return __builtin_ia32_lzcnt_u32(__X);\n"
22862"}\n"
22863"\n"
22864"#ifdef __x86_64__\n"
22865"#ifndef _MSC_VER\n"
22866"/// Counts the number of leading zero bits in the operand.\n"
22867"///\n"
22868"/// \\headerfile <x86intrin.h>\n"
22869"///\n"
22870"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22871"///\n"
22872"/// \\param __X\n"
22873"/// An unsigned 64-bit integer whose leading zeros are to be counted.\n"
22874"/// \\returns An unsigned 64-bit integer containing the number of leading zero\n"
22875"/// bits in the operand.\n"
22876"/// \\see _lzcnt_u64\n"
22877"#define __lzcnt64(X) __builtin_ia32_lzcnt_u64((unsigned long long)(X))\n"
22878"#endif // _MSC_VER\n"
22879"\n"
22880"/// Counts the number of leading zero bits in the operand.\n"
22881"///\n"
22882"/// \\headerfile <x86intrin.h>\n"
22883"///\n"
22884"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22885"///\n"
22886"/// \\param __X\n"
22887"/// An unsigned 64-bit integer whose leading zeros are to be counted.\n"
22888"/// \\returns An unsigned 64-bit integer containing the number of leading zero\n"
22889"/// bits in the operand.\n"
22890"/// \\see __lzcnt64\n"
22891"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
22892"_lzcnt_u64(unsigned long long __X)\n"
22893"{\n"
22894" return __builtin_ia32_lzcnt_u64(__X);\n"
22895"}\n"
22896"#endif\n"
22897"\n"
22898"#undef __DEFAULT_FN_ATTRS\n"
22899"\n"
22900"#endif /* __LZCNTINTRIN_H */\n"
22901"" } ,
22902 { "/builtins/mm3dnow.h" , "/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------===\n"
22903" *\n"
22904" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22905" * of this software and associated documentation files (the \"Software\"), to deal\n"
22906" * in the Software without restriction, including without limitation the rights\n"
22907" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22908" * copies of the Software, and to permit persons to whom the Software is\n"
22909" * furnished to do so, subject to the following conditions:\n"
22910" *\n"
22911" * The above copyright notice and this permission notice shall be included in\n"
22912" * all copies or substantial portions of the Software.\n"
22913" *\n"
22914" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22915" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22916" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22917" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22918" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22919" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22920" * THE SOFTWARE.\n"
22921" *\n"
22922" *===-----------------------------------------------------------------------===\n"
22923" */\n"
22924"\n"
22925"#ifndef _MM3DNOW_H_INCLUDED\n"
22926"#define _MM3DNOW_H_INCLUDED\n"
22927"\n"
22928"#include <mmintrin.h>\n"
22929"#include <prfchwintrin.h>\n"
22930"\n"
22931"typedef float __v2sf __attribute__((__vector_size__(8)));\n"
22932"\n"
22933"/* Define the default attributes for the functions in this file. */\n"
22934"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\"), __min_vector_width__(64)))\n"
22935"\n"
22936"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\")))\n"
22937"_m_femms(void) {\n"
22938" __builtin_ia32_femms();\n"
22939"}\n"
22940"\n"
22941"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22942"_m_pavgusb(__m64 __m1, __m64 __m2) {\n"
22943" return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);\n"
22944"}\n"
22945"\n"
22946"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22947"_m_pf2id(__m64 __m) {\n"
22948" return (__m64)__builtin_ia32_pf2id((__v2sf)__m);\n"
22949"}\n"
22950"\n"
22951"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22952"_m_pfacc(__m64 __m1, __m64 __m2) {\n"
22953" return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);\n"
22954"}\n"
22955"\n"
22956"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22957"_m_pfadd(__m64 __m1, __m64 __m2) {\n"
22958" return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);\n"
22959"}\n"
22960"\n"
22961"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22962"_m_pfcmpeq(__m64 __m1, __m64 __m2) {\n"
22963" return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);\n"
22964"}\n"
22965"\n"
22966"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22967"_m_pfcmpge(__m64 __m1, __m64 __m2) {\n"
22968" return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);\n"
22969"}\n"
22970"\n"
22971"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22972"_m_pfcmpgt(__m64 __m1, __m64 __m2) {\n"
22973" return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);\n"
22974"}\n"
22975"\n"
22976"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22977"_m_pfmax(__m64 __m1, __m64 __m2) {\n"
22978" return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);\n"
22979"}\n"
22980"\n"
22981"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22982"_m_pfmin(__m64 __m1, __m64 __m2) {\n"
22983" return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);\n"
22984"}\n"
22985"\n"
22986"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22987"_m_pfmul(__m64 __m1, __m64 __m2) {\n"
22988" return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);\n"
22989"}\n"
22990"\n"
22991"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22992"_m_pfrcp(__m64 __m) {\n"
22993" return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);\n"
22994"}\n"
22995"\n"
22996"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22997"_m_pfrcpit1(__m64 __m1, __m64 __m2) {\n"
22998" return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);\n"
22999"}\n"
23000"\n"
23001"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23002"_m_pfrcpit2(__m64 __m1, __m64 __m2) {\n"
23003" return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);\n"
23004"}\n"
23005"\n"
23006"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23007"_m_pfrsqrt(__m64 __m) {\n"
23008" return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);\n"
23009"}\n"
23010"\n"
23011"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23012"_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {\n"
23013" return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);\n"
23014"}\n"
23015"\n"
23016"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23017"_m_pfsub(__m64 __m1, __m64 __m2) {\n"
23018" return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);\n"
23019"}\n"
23020"\n"
23021"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23022"_m_pfsubr(__m64 __m1, __m64 __m2) {\n"
23023" return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);\n"
23024"}\n"
23025"\n"
23026"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23027"_m_pi2fd(__m64 __m) {\n"
23028" return (__m64)__builtin_ia32_pi2fd((__v2si)__m);\n"
23029"}\n"
23030"\n"
23031"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23032"_m_pmulhrw(__m64 __m1, __m64 __m2) {\n"
23033" return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);\n"
23034"}\n"
23035"\n"
23036"/* Handle the 3dnowa instructions here. */\n"
23037"#undef __DEFAULT_FN_ATTRS\n"
23038"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnowa\"), __min_vector_width__(64)))\n"
23039"\n"
23040"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23041"_m_pf2iw(__m64 __m) {\n"
23042" return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);\n"
23043"}\n"
23044"\n"
23045"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23046"_m_pfnacc(__m64 __m1, __m64 __m2) {\n"
23047" return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);\n"
23048"}\n"
23049"\n"
23050"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23051"_m_pfpnacc(__m64 __m1, __m64 __m2) {\n"
23052" return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);\n"
23053"}\n"
23054"\n"
23055"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23056"_m_pi2fw(__m64 __m) {\n"
23057" return (__m64)__builtin_ia32_pi2fw((__v2si)__m);\n"
23058"}\n"
23059"\n"
23060"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23061"_m_pswapdsf(__m64 __m) {\n"
23062" return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);\n"
23063"}\n"
23064"\n"
23065"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23066"_m_pswapdsi(__m64 __m) {\n"
23067" return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);\n"
23068"}\n"
23069"\n"
23070"#undef __DEFAULT_FN_ATTRS\n"
23071"\n"
23072"#endif\n"
23073"" } ,
23074 { "/builtins/mm_malloc.h" , "/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------===\n"
23075" *\n"
23076" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
23077" * of this software and associated documentation files (the \"Software\"), to deal\n"
23078" * in the Software without restriction, including without limitation the rights\n"
23079" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
23080" * copies of the Software, and to permit persons to whom the Software is\n"
23081" * furnished to do so, subject to the following conditions:\n"
23082" *\n"
23083" * The above copyright notice and this permission notice shall be included in\n"
23084" * all copies or substantial portions of the Software.\n"
23085" *\n"
23086" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
23087" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
23088" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
23089" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
23090" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
23091" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
23092" * THE SOFTWARE.\n"
23093" *\n"
23094" *===-----------------------------------------------------------------------===\n"
23095" */\n"
23096"\n"
23097"#ifndef __MM_MALLOC_H\n"
23098"#define __MM_MALLOC_H\n"
23099"\n"
23100"#include <stdlib.h>\n"
23101"\n"
23102"#ifdef _WIN32\n"
23103"#include <malloc.h>\n"
23104"#else\n"
23105"#ifndef __cplusplus\n"
23106"extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n"
23107"#else\n"
23108"// Some systems (e.g. those with GNU libc) declare posix_memalign with an\n"
23109"// exception specifier. Via an \"egregious workaround\" in\n"
23110"// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid\n"
23111"// redeclaration of glibc's declaration.\n"
23112"extern \"C\" int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n"
23113"#endif\n"
23114"#endif\n"
23115"\n"
23116"#if !(defined(_WIN32) && defined(_mm_malloc))\n"
23117"static __inline__ void *__attribute__((__always_inline__, __nodebug__,\n"
23118" __malloc__))\n"
23119"_mm_malloc(size_t __size, size_t __align)\n"
23120"{\n"
23121" if (__align == 1) {\n"
23122" return malloc(__size);\n"
23123" }\n"
23124"\n"
23125" if (!(__align & (__align - 1)) && __align < sizeof(void *))\n"
23126" __align = sizeof(void *);\n"
23127"\n"
23128" void *__mallocedMemory;\n"
23129"#if defined(__MINGW32__)\n"
23130" __mallocedMemory = __mingw_aligned_malloc(__size, __align);\n"
23131"#elif defined(_WIN32)\n"
23132" __mallocedMemory = _aligned_malloc(__size, __align);\n"
23133"#else\n"
23134" if (posix_memalign(&__mallocedMemory, __align, __size))\n"
23135" return 0;\n"
23136"#endif\n"
23137"\n"
23138" return __mallocedMemory;\n"
23139"}\n"
23140"\n"
23141"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
23142"_mm_free(void *__p)\n"
23143"{\n"
23144" free(__p);\n"
23145"}\n"
23146"#endif\n"
23147"\n"
23148"#endif /* __MM_MALLOC_H */\n"
23149"" } ,
23150 { "/builtins/mmintrin.h" , "/*===---- mmintrin.h - MMX intrinsics --------------------------------------===\n"
23151" *\n"
23152" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
23153" * of this software and associated documentation files (the \"Software\"), to deal\n"
23154" * in the Software without restriction, including without limitation the rights\n"
23155" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
23156" * copies of the Software, and to permit persons to whom the Software is\n"
23157" * furnished to do so, subject to the following conditions:\n"
23158" *\n"
23159" * The above copyright notice and this permission notice shall be included in\n"
23160" * all copies or substantial portions of the Software.\n"
23161" *\n"
23162" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
23163" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
23164" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
23165" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
23166" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
23167" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
23168" * THE SOFTWARE.\n"
23169" *\n"
23170" *===-----------------------------------------------------------------------===\n"
23171" */\n"
23172"\n"
23173"#ifndef __MMINTRIN_H\n"
23174"#define __MMINTRIN_H\n"
23175"\n"
23176"typedef long long __m64 __attribute__((__vector_size__(8)));\n"
23177"\n"
23178"typedef long long __v1di __attribute__((__vector_size__(8)));\n"
23179"typedef int __v2si __attribute__((__vector_size__(8)));\n"
23180"typedef short __v4hi __attribute__((__vector_size__(8)));\n"
23181"typedef char __v8qi __attribute__((__vector_size__(8)));\n"
23182"\n"
23183"/* Define the default attributes for the functions in this file. */\n"
23184"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\"), __min_vector_width__(64)))\n"
23185"\n"
23186"/// Clears the MMX state by setting the state of the x87 stack registers\n"
23187"/// to empty.\n"
23188"///\n"
23189"/// \\headerfile <x86intrin.h>\n"
23190"///\n"
23191"/// This intrinsic corresponds to the <c> EMMS </c> instruction.\n"
23192"///\n"
23193"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\")))\n"
23194"_mm_empty(void)\n"
23195"{\n"
23196" __builtin_ia32_emms();\n"
23197"}\n"
23198"\n"
23199"/// Constructs a 64-bit integer vector, setting the lower 32 bits to the\n"
23200"/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.\n"
23201"///\n"
23202"/// \\headerfile <x86intrin.h>\n"
23203"///\n"
23204"/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n"
23205"///\n"
23206"/// \\param __i\n"
23207"/// A 32-bit integer value.\n"
23208"/// \\returns A 64-bit integer vector. The lower 32 bits contain the value of the\n"
23209"/// parameter. The upper 32 bits are set to 0.\n"
23210"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23211"_mm_cvtsi32_si64(int __i)\n"
23212"{\n"
23213" return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);\n"
23214"}\n"
23215"\n"
23216"/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit\n"
23217"/// signed integer.\n"
23218"///\n"
23219"/// \\headerfile <x86intrin.h>\n"
23220"///\n"
23221"/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n"
23222"///\n"
23223"/// \\param __m\n"
23224"/// A 64-bit integer vector.\n"
23225"/// \\returns A 32-bit signed integer value containing the lower 32 bits of the\n"
23226"/// parameter.\n"
23227"static __inline__ int __DEFAULT_FN_ATTRS\n"
23228"_mm_cvtsi64_si32(__m64 __m)\n"
23229"{\n"
23230" return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);\n"
23231"}\n"
23232"\n"
23233"/// Casts a 64-bit signed integer value into a 64-bit integer vector.\n"
23234"///\n"
23235"/// \\headerfile <x86intrin.h>\n"
23236"///\n"
23237"/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n"
23238"///\n"
23239"/// \\param __i\n"
23240"/// A 64-bit signed integer.\n"
23241"/// \\returns A 64-bit integer vector containing the same bitwise pattern as the\n"
23242"/// parameter.\n"
23243"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23244"_mm_cvtsi64_m64(long long __i)\n"
23245"{\n"
23246" return (__m64)__i;\n"
23247"}\n"
23248"\n"
23249"/// Casts a 64-bit integer vector into a 64-bit signed integer value.\n"
23250"///\n"
23251"/// \\headerfile <x86intrin.h>\n"
23252"///\n"
23253"/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n"
23254"///\n"
23255"/// \\param __m\n"
23256"/// A 64-bit integer vector.\n"
23257"/// \\returns A 64-bit signed integer containing the same bitwise pattern as the\n"
23258"/// parameter.\n"
23259"static __inline__ long long __DEFAULT_FN_ATTRS\n"
23260"_mm_cvtm64_si64(__m64 __m)\n"
23261"{\n"
23262" return (long long)__m;\n"
23263"}\n"
23264"\n"
23265"/// Converts 16-bit signed integers from both 64-bit integer vector\n"
23266"/// parameters of [4 x i16] into 8-bit signed integer values, and constructs\n"
23267"/// a 64-bit integer vector of [8 x i8] as the result. Positive values\n"
23268"/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80\n"
23269"/// are saturated to 0x80.\n"
23270"///\n"
23271"/// \\headerfile <x86intrin.h>\n"
23272"///\n"
23273"/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.\n"
23274"///\n"
23275"/// \\param __m1\n"
23276"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23277"/// 16-bit signed integer and is converted to an 8-bit signed integer with\n"
23278"/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n"
23279"/// Negative values less than 0x80 are saturated to 0x80. The converted\n"
23280"/// [4 x i8] values are written to the lower 32 bits of the result.\n"
23281"/// \\param __m2\n"
23282"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23283"/// 16-bit signed integer and is converted to an 8-bit signed integer with\n"
23284"/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n"
23285"/// Negative values less than 0x80 are saturated to 0x80. The converted\n"
23286"/// [4 x i8] values are written to the upper 32 bits of the result.\n"
23287"/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n"
23288"/// values.\n"
23289"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23290"_mm_packs_pi16(__m64 __m1, __m64 __m2)\n"
23291"{\n"
23292" return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);\n"
23293"}\n"
23294"\n"
23295"/// Converts 32-bit signed integers from both 64-bit integer vector\n"
23296"/// parameters of [2 x i32] into 16-bit signed integer values, and constructs\n"
23297"/// a 64-bit integer vector of [4 x i16] as the result. Positive values\n"
23298"/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than\n"
23299"/// 0x8000 are saturated to 0x8000.\n"
23300"///\n"
23301"/// \\headerfile <x86intrin.h>\n"
23302"///\n"
23303"/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.\n"
23304"///\n"
23305"/// \\param __m1\n"
23306"/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n"
23307"/// 32-bit signed integer and is converted to a 16-bit signed integer with\n"
23308"/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n"
23309"/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n"
23310"/// [2 x i16] values are written to the lower 32 bits of the result.\n"
23311"/// \\param __m2\n"
23312"/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n"
23313"/// 32-bit signed integer and is converted to a 16-bit signed integer with\n"
23314"/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n"
23315"/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n"
23316"/// [2 x i16] values are written to the upper 32 bits of the result.\n"
23317"/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n"
23318"/// values.\n"
23319"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23320"_mm_packs_pi32(__m64 __m1, __m64 __m2)\n"
23321"{\n"
23322" return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);\n"
23323"}\n"
23324"\n"
23325"/// Converts 16-bit signed integers from both 64-bit integer vector\n"
23326"/// parameters of [4 x i16] into 8-bit unsigned integer values, and\n"
23327"/// constructs a 64-bit integer vector of [8 x i8] as the result. Values\n"
23328"/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated\n"
23329"/// to 0.\n"
23330"///\n"
23331"/// \\headerfile <x86intrin.h>\n"
23332"///\n"
23333"/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.\n"
23334"///\n"
23335"/// \\param __m1\n"
23336"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23337"/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n"
23338"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
23339"/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n"
23340"/// the lower 32 bits of the result.\n"
23341"/// \\param __m2\n"
23342"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23343"/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n"
23344"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
23345"/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n"
23346"/// the upper 32 bits of the result.\n"
23347"/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n"
23348"/// values.\n"
23349"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23350"_mm_packs_pu16(__m64 __m1, __m64 __m2)\n"
23351"{\n"
23352" return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);\n"
23353"}\n"
23354"\n"
23355"/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]\n"
23356"/// and interleaves them into a 64-bit integer vector of [8 x i8].\n"
23357"///\n"
23358"/// \\headerfile <x86intrin.h>\n"
23359"///\n"
23360"/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.\n"
23361"///\n"
23362"/// \\param __m1\n"
23363"/// A 64-bit integer vector of [8 x i8]. \\n\n"
23364"/// Bits [39:32] are written to bits [7:0] of the result. \\n\n"
23365"/// Bits [47:40] are written to bits [23:16] of the result. \\n\n"
23366"/// Bits [55:48] are written to bits [39:32] of the result. \\n\n"
23367"/// Bits [63:56] are written to bits [55:48] of the result.\n"
23368"/// \\param __m2\n"
23369"/// A 64-bit integer vector of [8 x i8].\n"
23370"/// Bits [39:32] are written to bits [15:8] of the result. \\n\n"
23371"/// Bits [47:40] are written to bits [31:24] of the result. \\n\n"
23372"/// Bits [55:48] are written to bits [47:40] of the result. \\n\n"
23373"/// Bits [63:56] are written to bits [63:56] of the result.\n"
23374"/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n"
23375"/// values.\n"
23376"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23377"_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)\n"
23378"{\n"
23379" return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);\n"
23380"}\n"
23381"\n"
23382"/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n"
23383"/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n"
23384"///\n"
23385"/// \\headerfile <x86intrin.h>\n"
23386"///\n"
23387"/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.\n"
23388"///\n"
23389"/// \\param __m1\n"
23390"/// A 64-bit integer vector of [4 x i16].\n"
23391"/// Bits [47:32] are written to bits [15:0] of the result. \\n\n"
23392"/// Bits [63:48] are written to bits [47:32] of the result.\n"
23393"/// \\param __m2\n"
23394"/// A 64-bit integer vector of [4 x i16].\n"
23395"/// Bits [47:32] are written to bits [31:16] of the result. \\n\n"
23396"/// Bits [63:48] are written to bits [63:48] of the result.\n"
23397"/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n"
23398"/// values.\n"
23399"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23400"_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)\n"
23401"{\n"
23402" return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);\n"
23403"}\n"
23404"\n"
23405"/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n"
23406"/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n"
23407"///\n"
23408"/// \\headerfile <x86intrin.h>\n"
23409"///\n"
23410"/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.\n"
23411"///\n"
23412"/// \\param __m1\n"
23413"/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n"
23414"/// the lower 32 bits of the result.\n"
23415"/// \\param __m2\n"
23416"/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n"
23417"/// the upper 32 bits of the result.\n"
23418"/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n"
23419"/// values.\n"
23420"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23421"_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)\n"
23422"{\n"
23423" return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);\n"
23424"}\n"
23425"\n"
23426"/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]\n"
23427"/// and interleaves them into a 64-bit integer vector of [8 x i8].\n"
23428"///\n"
23429"/// \\headerfile <x86intrin.h>\n"
23430"///\n"
23431"/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.\n"
23432"///\n"
23433"/// \\param __m1\n"
23434"/// A 64-bit integer vector of [8 x i8].\n"
23435"/// Bits [7:0] are written to bits [7:0] of the result. \\n\n"
23436"/// Bits [15:8] are written to bits [23:16] of the result. \\n\n"
23437"/// Bits [23:16] are written to bits [39:32] of the result. \\n\n"
23438"/// Bits [31:24] are written to bits [55:48] of the result.\n"
23439"/// \\param __m2\n"
23440"/// A 64-bit integer vector of [8 x i8].\n"
23441"/// Bits [7:0] are written to bits [15:8] of the result. \\n\n"
23442"/// Bits [15:8] are written to bits [31:24] of the result. \\n\n"
23443"/// Bits [23:16] are written to bits [47:40] of the result. \\n\n"
23444"/// Bits [31:24] are written to bits [63:56] of the result.\n"
23445"/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n"
23446"/// values.\n"
23447"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23448"_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)\n"
23449"{\n"
23450" return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);\n"
23451"}\n"
23452"\n"
23453"/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n"
23454"/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n"
23455"///\n"
23456"/// \\headerfile <x86intrin.h>\n"
23457"///\n"
23458"/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.\n"
23459"///\n"
23460"/// \\param __m1\n"
23461"/// A 64-bit integer vector of [4 x i16].\n"
23462"/// Bits [15:0] are written to bits [15:0] of the result. \\n\n"
23463"/// Bits [31:16] are written to bits [47:32] of the result.\n"
23464"/// \\param __m2\n"
23465"/// A 64-bit integer vector of [4 x i16].\n"
23466"/// Bits [15:0] are written to bits [31:16] of the result. \\n\n"
23467"/// Bits [31:16] are written to bits [63:48] of the result.\n"
23468"/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n"
23469"/// values.\n"
23470"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23471"_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)\n"
23472"{\n"
23473" return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);\n"
23474"}\n"
23475"\n"
23476"/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n"
23477"/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n"
23478"///\n"
23479"/// \\headerfile <x86intrin.h>\n"
23480"///\n"
23481"/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.\n"
23482"///\n"
23483"/// \\param __m1\n"
23484"/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n"
23485"/// the lower 32 bits of the result.\n"
23486"/// \\param __m2\n"
23487"/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n"
23488"/// the upper 32 bits of the result.\n"
23489"/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n"
23490"/// values.\n"
23491"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23492"_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)\n"
23493"{\n"
23494" return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);\n"
23495"}\n"
23496"\n"
23497"/// Adds each 8-bit integer element of the first 64-bit integer vector\n"
23498"/// of [8 x i8] to the corresponding 8-bit integer element of the second\n"
23499"/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are\n"
23500"/// packed into a 64-bit integer vector of [8 x i8].\n"
23501"///\n"
23502"/// \\headerfile <x86intrin.h>\n"
23503"///\n"
23504"/// This intrinsic corresponds to the <c> PADDB </c> instruction.\n"
23505"///\n"
23506"/// \\param __m1\n"
23507"/// A 64-bit integer vector of [8 x i8].\n"
23508"/// \\param __m2\n"
23509"/// A 64-bit integer vector of [8 x i8].\n"
23510"/// \\returns A 64-bit integer vector of [8 x i8] containing the sums of both\n"
23511"/// parameters.\n"
23512"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23513"_mm_add_pi8(__m64 __m1, __m64 __m2)\n"
23514"{\n"
23515" return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);\n"
23516"}\n"
23517"\n"
23518"/// Adds each 16-bit integer element of the first 64-bit integer vector\n"
23519"/// of [4 x i16] to the corresponding 16-bit integer element of the second\n"
23520"/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are\n"
23521"/// packed into a 64-bit integer vector of [4 x i16].\n"
23522"///\n"
23523"/// \\headerfile <x86intrin.h>\n"
23524"///\n"
23525"/// This intrinsic corresponds to the <c> PADDW </c> instruction.\n"
23526"///\n"
23527"/// \\param __m1\n"
23528"/// A 64-bit integer vector of [4 x i16].\n"
23529"/// \\param __m2\n"
23530"/// A 64-bit integer vector of [4 x i16].\n"
23531"/// \\returns A 64-bit integer vector of [4 x i16] containing the sums of both\n"
23532"/// parameters.\n"
23533"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23534"_mm_add_pi16(__m64 __m1, __m64 __m2)\n"
23535"{\n"
23536" return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);\n"
23537"}\n"
23538"\n"
23539"/// Adds each 32-bit integer element of the first 64-bit integer vector\n"
23540"/// of [2 x i32] to the corresponding 32-bit integer element of the second\n"
23541"/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are\n"
23542"/// packed into a 64-bit integer vector of [2 x i32].\n"
23543"///\n"
23544"/// \\headerfile <x86intrin.h>\n"
23545"///\n"
23546"/// This intrinsic corresponds to the <c> PADDD </c> instruction.\n"
23547"///\n"
23548"/// \\param __m1\n"
23549"/// A 64-bit integer vector of [2 x i32].\n"
23550"/// \\param __m2\n"
23551"/// A 64-bit integer vector of [2 x i32].\n"
23552"/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of both\n"
23553"/// parameters.\n"
23554"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23555"_mm_add_pi32(__m64 __m1, __m64 __m2)\n"
23556"{\n"
23557" return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);\n"
23558"}\n"
23559"\n"
23560"/// Adds each 8-bit signed integer element of the first 64-bit integer\n"
23561"/// vector of [8 x i8] to the corresponding 8-bit signed integer element of\n"
23562"/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than\n"
23563"/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to\n"
23564"/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].\n"
23565"///\n"
23566"/// \\headerfile <x86intrin.h>\n"
23567"///\n"
23568"/// This intrinsic corresponds to the <c> PADDSB </c> instruction.\n"
23569"///\n"
23570"/// \\param __m1\n"
23571"/// A 64-bit integer vector of [8 x i8].\n"
23572"/// \\param __m2\n"
23573"/// A 64-bit integer vector of [8 x i8].\n"
23574"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated sums\n"
23575"/// of both parameters.\n"
23576"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23577"_mm_adds_pi8(__m64 __m1, __m64 __m2)\n"
23578"{\n"
23579" return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);\n"
23580"}\n"
23581"\n"
23582"/// Adds each 16-bit signed integer element of the first 64-bit integer\n"
23583"/// vector of [4 x i16] to the corresponding 16-bit signed integer element of\n"
23584"/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than\n"
23585"/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are\n"
23586"/// saturated to 0x8000. The results are packed into a 64-bit integer vector\n"
23587"/// of [4 x i16].\n"
23588"///\n"
23589"/// \\headerfile <x86intrin.h>\n"
23590"///\n"
23591"/// This intrinsic corresponds to the <c> PADDSW </c> instruction.\n"
23592"///\n"
23593"/// \\param __m1\n"
23594"/// A 64-bit integer vector of [4 x i16].\n"
23595"/// \\param __m2\n"
23596"/// A 64-bit integer vector of [4 x i16].\n"
23597"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated sums\n"
23598"/// of both parameters.\n"
23599"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23600"_mm_adds_pi16(__m64 __m1, __m64 __m2)\n"
23601"{\n"
23602" return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);\n"
23603"}\n"
23604"\n"
23605"/// Adds each 8-bit unsigned integer element of the first 64-bit integer\n"
23606"/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of\n"
23607"/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are\n"
23608"/// saturated to 0xFF. The results are packed into a 64-bit integer vector of\n"
23609"/// [8 x i8].\n"
23610"///\n"
23611"/// \\headerfile <x86intrin.h>\n"
23612"///\n"
23613"/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.\n"
23614"///\n"
23615"/// \\param __m1\n"
23616"/// A 64-bit integer vector of [8 x i8].\n"
23617"/// \\param __m2\n"
23618"/// A 64-bit integer vector of [8 x i8].\n"
23619"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n"
23620"/// unsigned sums of both parameters.\n"
23621"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23622"_mm_adds_pu8(__m64 __m1, __m64 __m2)\n"
23623"{\n"
23624" return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);\n"
23625"}\n"
23626"\n"
23627"/// Adds each 16-bit unsigned integer element of the first 64-bit integer\n"
23628"/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element\n"
23629"/// of the second 64-bit integer vector of [4 x i16]. Sums greater than\n"
23630"/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit\n"
23631"/// integer vector of [4 x i16].\n"
23632"///\n"
23633"/// \\headerfile <x86intrin.h>\n"
23634"///\n"
23635"/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.\n"
23636"///\n"
23637"/// \\param __m1\n"
23638"/// A 64-bit integer vector of [4 x i16].\n"
23639"/// \\param __m2\n"
23640"/// A 64-bit integer vector of [4 x i16].\n"
23641"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n"
23642"/// unsigned sums of both parameters.\n"
23643"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23644"_mm_adds_pu16(__m64 __m1, __m64 __m2)\n"
23645"{\n"
23646" return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);\n"
23647"}\n"
23648"\n"
23649"/// Subtracts each 8-bit integer element of the second 64-bit integer\n"
23650"/// vector of [8 x i8] from the corresponding 8-bit integer element of the\n"
23651"/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results\n"
23652"/// are packed into a 64-bit integer vector of [8 x i8].\n"
23653"///\n"
23654"/// \\headerfile <x86intrin.h>\n"
23655"///\n"
23656"/// This intrinsic corresponds to the <c> PSUBB </c> instruction.\n"
23657"///\n"
23658"/// \\param __m1\n"
23659"/// A 64-bit integer vector of [8 x i8] containing the minuends.\n"
23660"/// \\param __m2\n"
23661"/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n"
23662"/// \\returns A 64-bit integer vector of [8 x i8] containing the differences of\n"
23663"/// both parameters.\n"
23664"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23665"_mm_sub_pi8(__m64 __m1, __m64 __m2)\n"
23666"{\n"
23667" return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);\n"
23668"}\n"
23669"\n"
23670"/// Subtracts each 16-bit integer element of the second 64-bit integer\n"
23671"/// vector of [4 x i16] from the corresponding 16-bit integer element of the\n"
23672"/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the\n"
23673"/// results are packed into a 64-bit integer vector of [4 x i16].\n"
23674"///\n"
23675"/// \\headerfile <x86intrin.h>\n"
23676"///\n"
23677"/// This intrinsic corresponds to the <c> PSUBW </c> instruction.\n"
23678"///\n"
23679"/// \\param __m1\n"
23680"/// A 64-bit integer vector of [4 x i16] containing the minuends.\n"
23681"/// \\param __m2\n"
23682"/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n"
23683"/// \\returns A 64-bit integer vector of [4 x i16] containing the differences of\n"
23684"/// both parameters.\n"
23685"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23686"_mm_sub_pi16(__m64 __m1, __m64 __m2)\n"
23687"{\n"
23688" return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);\n"
23689"}\n"
23690"\n"
23691"/// Subtracts each 32-bit integer element of the second 64-bit integer\n"
23692"/// vector of [2 x i32] from the corresponding 32-bit integer element of the\n"
23693"/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the\n"
23694"/// results are packed into a 64-bit integer vector of [2 x i32].\n"
23695"///\n"
23696"/// \\headerfile <x86intrin.h>\n"
23697"///\n"
23698"/// This intrinsic corresponds to the <c> PSUBD </c> instruction.\n"
23699"///\n"
23700"/// \\param __m1\n"
23701"/// A 64-bit integer vector of [2 x i32] containing the minuends.\n"
23702"/// \\param __m2\n"
23703"/// A 64-bit integer vector of [2 x i32] containing the subtrahends.\n"
23704"/// \\returns A 64-bit integer vector of [2 x i32] containing the differences of\n"
23705"/// both parameters.\n"
23706"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23707"_mm_sub_pi32(__m64 __m1, __m64 __m2)\n"
23708"{\n"
23709" return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);\n"
23710"}\n"
23711"\n"
23712"/// Subtracts each 8-bit signed integer element of the second 64-bit\n"
23713"/// integer vector of [8 x i8] from the corresponding 8-bit signed integer\n"
23714"/// element of the first 64-bit integer vector of [8 x i8]. Positive results\n"
23715"/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80\n"
23716"/// are saturated to 0x80. The results are packed into a 64-bit integer\n"
23717"/// vector of [8 x i8].\n"
23718"///\n"
23719"/// \\headerfile <x86intrin.h>\n"
23720"///\n"
23721"/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.\n"
23722"///\n"
23723"/// \\param __m1\n"
23724"/// A 64-bit integer vector of [8 x i8] containing the minuends.\n"
23725"/// \\param __m2\n"
23726"/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n"
23727"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n"
23728"/// differences of both parameters.\n"
23729"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23730"_mm_subs_pi8(__m64 __m1, __m64 __m2)\n"
23731"{\n"
23732" return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);\n"
23733"}\n"
23734"\n"
23735"/// Subtracts each 16-bit signed integer element of the second 64-bit\n"
23736"/// integer vector of [4 x i16] from the corresponding 16-bit signed integer\n"
23737"/// element of the first 64-bit integer vector of [4 x i16]. Positive results\n"
23738"/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than\n"
23739"/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit\n"
23740"/// integer vector of [4 x i16].\n"
23741"///\n"
23742"/// \\headerfile <x86intrin.h>\n"
23743"///\n"
23744"/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.\n"
23745"///\n"
23746"/// \\param __m1\n"
23747"/// A 64-bit integer vector of [4 x i16] containing the minuends.\n"
23748"/// \\param __m2\n"
23749"/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n"
23750"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n"
23751"/// differences of both parameters.\n"
23752"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23753"_mm_subs_pi16(__m64 __m1, __m64 __m2)\n"
23754"{\n"
23755" return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);\n"
23756"}\n"
23757"\n"
23758"/// Subtracts each 8-bit unsigned integer element of the second 64-bit\n"
23759"/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer\n"
23760"/// element of the first 64-bit integer vector of [8 x i8].\n"
23761"///\n"
23762"/// If an element of the first vector is less than the corresponding element\n"
23763"/// of the second vector, the result is saturated to 0. The results are\n"
23764"/// packed into a 64-bit integer vector of [8 x i8].\n"
23765"///\n"
23766"/// \\headerfile <x86intrin.h>\n"
23767"///\n"
23768"/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.\n"
23769"///\n"
23770"/// \\param __m1\n"
23771"/// A 64-bit integer vector of [8 x i8] containing the minuends.\n"
23772"/// \\param __m2\n"
23773"/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n"
23774"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n"
23775"/// differences of both parameters.\n"
23776"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23777"_mm_subs_pu8(__m64 __m1, __m64 __m2)\n"
23778"{\n"
23779" return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);\n"
23780"}\n"
23781"\n"
23782"/// Subtracts each 16-bit unsigned integer element of the second 64-bit\n"
23783"/// integer vector of [4 x i16] from the corresponding 16-bit unsigned\n"
23784"/// integer element of the first 64-bit integer vector of [4 x i16].\n"
23785"///\n"
23786"/// If an element of the first vector is less than the corresponding element\n"
23787"/// of the second vector, the result is saturated to 0. The results are\n"
23788"/// packed into a 64-bit integer vector of [4 x i16].\n"
23789"///\n"
23790"/// \\headerfile <x86intrin.h>\n"
23791"///\n"
23792"/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.\n"
23793"///\n"
23794"/// \\param __m1\n"
23795"/// A 64-bit integer vector of [4 x i16] containing the minuends.\n"
23796"/// \\param __m2\n"
23797"/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n"
23798"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n"
23799"/// differences of both parameters.\n"
23800"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23801"_mm_subs_pu16(__m64 __m1, __m64 __m2)\n"
23802"{\n"
23803" return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);\n"
23804"}\n"
23805"\n"
23806"/// Multiplies each 16-bit signed integer element of the first 64-bit\n"
23807"/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n"
23808"/// element of the second 64-bit integer vector of [4 x i16] and get four\n"
23809"/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.\n"
23810"/// The lower 32 bits of these two sums are packed into a 64-bit integer\n"
23811"/// vector of [2 x i32].\n"
23812"///\n"
23813"/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]\n"
23814"/// of both parameters are multiplied, and the sum of both results is written\n"
23815"/// to bits [31:0] of the result.\n"
23816"///\n"
23817"/// \\headerfile <x86intrin.h>\n"
23818"///\n"
23819"/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.\n"
23820"///\n"
23821"/// \\param __m1\n"
23822"/// A 64-bit integer vector of [4 x i16].\n"
23823"/// \\param __m2\n"
23824"/// A 64-bit integer vector of [4 x i16].\n"
23825"/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of\n"
23826"/// products of both parameters.\n"
23827"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23828"_mm_madd_pi16(__m64 __m1, __m64 __m2)\n"
23829"{\n"
23830" return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);\n"
23831"}\n"
23832"\n"
23833"/// Multiplies each 16-bit signed integer element of the first 64-bit\n"
23834"/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n"
23835"/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper\n"
23836"/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n"
23837"///\n"
23838"/// \\headerfile <x86intrin.h>\n"
23839"///\n"
23840"/// This intrinsic corresponds to the <c> PMULHW </c> instruction.\n"
23841"///\n"
23842"/// \\param __m1\n"
23843"/// A 64-bit integer vector of [4 x i16].\n"
23844"/// \\param __m2\n"
23845"/// A 64-bit integer vector of [4 x i16].\n"
23846"/// \\returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits\n"
23847"/// of the products of both parameters.\n"
23848"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23849"_mm_mulhi_pi16(__m64 __m1, __m64 __m2)\n"
23850"{\n"
23851" return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);\n"
23852"}\n"
23853"\n"
23854"/// Multiplies each 16-bit signed integer element of the first 64-bit\n"
23855"/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n"
23856"/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower\n"
23857"/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n"
23858"///\n"
23859"/// \\headerfile <x86intrin.h>\n"
23860"///\n"
23861"/// This intrinsic corresponds to the <c> PMULLW </c> instruction.\n"
23862"///\n"
23863"/// \\param __m1\n"
23864"/// A 64-bit integer vector of [4 x i16].\n"
23865"/// \\param __m2\n"
23866"/// A 64-bit integer vector of [4 x i16].\n"
23867"/// \\returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits\n"
23868"/// of the products of both parameters.\n"
23869"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23870"_mm_mullo_pi16(__m64 __m1, __m64 __m2)\n"
23871"{\n"
23872" return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);\n"
23873"}\n"
23874"\n"
23875"/// Left-shifts each 16-bit signed integer element of the first\n"
23876"/// parameter, which is a 64-bit integer vector of [4 x i16], by the number\n"
23877"/// of bits specified by the second parameter, which is a 64-bit integer. The\n"
23878"/// lower 16 bits of the results are packed into a 64-bit integer vector of\n"
23879"/// [4 x i16].\n"
23880"///\n"
23881"/// \\headerfile <x86intrin.h>\n"
23882"///\n"
23883"/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n"
23884"///\n"
23885"/// \\param __m\n"
23886"/// A 64-bit integer vector of [4 x i16].\n"
23887"/// \\param __count\n"
23888"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23889"/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n"
23890"/// values. If \\a __count is greater or equal to 16, the result is set to all\n"
23891"/// 0.\n"
23892"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23893"_mm_sll_pi16(__m64 __m, __m64 __count)\n"
23894"{\n"
23895" return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);\n"
23896"}\n"
23897"\n"
23898"/// Left-shifts each 16-bit signed integer element of a 64-bit integer\n"
23899"/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.\n"
23900"/// The lower 16 bits of the results are packed into a 64-bit integer vector\n"
23901"/// of [4 x i16].\n"
23902"///\n"
23903"/// \\headerfile <x86intrin.h>\n"
23904"///\n"
23905"/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n"
23906"///\n"
23907"/// \\param __m\n"
23908"/// A 64-bit integer vector of [4 x i16].\n"
23909"/// \\param __count\n"
23910"/// A 32-bit integer value.\n"
23911"/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n"
23912"/// values. If \\a __count is greater or equal to 16, the result is set to all\n"
23913"/// 0.\n"
23914"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23915"_mm_slli_pi16(__m64 __m, int __count)\n"
23916"{\n"
23917" return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);\n"
23918"}\n"
23919"\n"
23920"/// Left-shifts each 32-bit signed integer element of the first\n"
23921"/// parameter, which is a 64-bit integer vector of [2 x i32], by the number\n"
23922"/// of bits specified by the second parameter, which is a 64-bit integer. The\n"
23923"/// lower 32 bits of the results are packed into a 64-bit integer vector of\n"
23924"/// [2 x i32].\n"
23925"///\n"
23926"/// \\headerfile <x86intrin.h>\n"
23927"///\n"
23928"/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n"
23929"///\n"
23930"/// \\param __m\n"
23931"/// A 64-bit integer vector of [2 x i32].\n"
23932"/// \\param __count\n"
23933"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23934"/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n"
23935"/// values. If \\a __count is greater or equal to 32, the result is set to all\n"
23936"/// 0.\n"
23937"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23938"_mm_sll_pi32(__m64 __m, __m64 __count)\n"
23939"{\n"
23940" return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);\n"
23941"}\n"
23942"\n"
23943"/// Left-shifts each 32-bit signed integer element of a 64-bit integer\n"
23944"/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.\n"
23945"/// The lower 32 bits of the results are packed into a 64-bit integer vector\n"
23946"/// of [2 x i32].\n"
23947"///\n"
23948"/// \\headerfile <x86intrin.h>\n"
23949"///\n"
23950"/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n"
23951"///\n"
23952"/// \\param __m\n"
23953"/// A 64-bit integer vector of [2 x i32].\n"
23954"/// \\param __count\n"
23955"/// A 32-bit integer value.\n"
23956"/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n"
23957"/// values. If \\a __count is greater or equal to 32, the result is set to all\n"
23958"/// 0.\n"
23959"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23960"_mm_slli_pi32(__m64 __m, int __count)\n"
23961"{\n"
23962" return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);\n"
23963"}\n"
23964"\n"
23965"/// Left-shifts the first 64-bit integer parameter by the number of bits\n"
23966"/// specified by the second 64-bit integer parameter. The lower 64 bits of\n"
23967"/// result are returned.\n"
23968"///\n"
23969"/// \\headerfile <x86intrin.h>\n"
23970"///\n"
23971"/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n"
23972"///\n"
23973"/// \\param __m\n"
23974"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23975"/// \\param __count\n"
23976"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23977"/// \\returns A 64-bit integer vector containing the left-shifted value. If\n"
23978"/// \\a __count is greater or equal to 64, the result is set to 0.\n"
23979"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23980"_mm_sll_si64(__m64 __m, __m64 __count)\n"
23981"{\n"
23982" return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);\n"
23983"}\n"
23984"\n"
23985"/// Left-shifts the first parameter, which is a 64-bit integer, by the\n"
23986"/// number of bits specified by the second parameter, which is a 32-bit\n"
23987"/// integer. The lower 64 bits of result are returned.\n"
23988"///\n"
23989"/// \\headerfile <x86intrin.h>\n"
23990"///\n"
23991"/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n"
23992"///\n"
23993"/// \\param __m\n"
23994"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23995"/// \\param __count\n"
23996"/// A 32-bit integer value.\n"
23997"/// \\returns A 64-bit integer vector containing the left-shifted value. If\n"
23998"/// \\a __count is greater or equal to 64, the result is set to 0.\n"
23999"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24000"_mm_slli_si64(__m64 __m, int __count)\n"
24001"{\n"
24002" return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);\n"
24003"}\n"
24004"\n"
24005"/// Right-shifts each 16-bit integer element of the first parameter,\n"
24006"/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n"
24007"/// specified by the second parameter, which is a 64-bit integer.\n"
24008"///\n"
24009"/// High-order bits are filled with the sign bit of the initial value of each\n"
24010"/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n"
24011"/// vector of [4 x i16].\n"
24012"///\n"
24013"/// \\headerfile <x86intrin.h>\n"
24014"///\n"
24015"/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n"
24016"///\n"
24017"/// \\param __m\n"
24018"/// A 64-bit integer vector of [4 x i16].\n"
24019"/// \\param __count\n"
24020"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24021"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24022"/// values.\n"
24023"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24024"_mm_sra_pi16(__m64 __m, __m64 __count)\n"
24025"{\n"
24026" return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);\n"
24027"}\n"
24028"\n"
24029"/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n"
24030"/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n"
24031"///\n"
24032"/// High-order bits are filled with the sign bit of the initial value of each\n"
24033"/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n"
24034"/// vector of [4 x i16].\n"
24035"///\n"
24036"/// \\headerfile <x86intrin.h>\n"
24037"///\n"
24038"/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n"
24039"///\n"
24040"/// \\param __m\n"
24041"/// A 64-bit integer vector of [4 x i16].\n"
24042"/// \\param __count\n"
24043"/// A 32-bit integer value.\n"
24044"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24045"/// values.\n"
24046"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24047"_mm_srai_pi16(__m64 __m, int __count)\n"
24048"{\n"
24049" return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);\n"
24050"}\n"
24051"\n"
24052"/// Right-shifts each 32-bit integer element of the first parameter,\n"
24053"/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n"
24054"/// specified by the second parameter, which is a 64-bit integer.\n"
24055"///\n"
24056"/// High-order bits are filled with the sign bit of the initial value of each\n"
24057"/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n"
24058"/// vector of [2 x i32].\n"
24059"///\n"
24060"/// \\headerfile <x86intrin.h>\n"
24061"///\n"
24062"/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n"
24063"///\n"
24064"/// \\param __m\n"
24065"/// A 64-bit integer vector of [2 x i32].\n"
24066"/// \\param __count\n"
24067"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24068"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24069"/// values.\n"
24070"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24071"_mm_sra_pi32(__m64 __m, __m64 __count)\n"
24072"{\n"
24073" return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);\n"
24074"}\n"
24075"\n"
24076"/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n"
24077"/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n"
24078"///\n"
24079"/// High-order bits are filled with the sign bit of the initial value of each\n"
24080"/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n"
24081"/// vector of [2 x i32].\n"
24082"///\n"
24083"/// \\headerfile <x86intrin.h>\n"
24084"///\n"
24085"/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n"
24086"///\n"
24087"/// \\param __m\n"
24088"/// A 64-bit integer vector of [2 x i32].\n"
24089"/// \\param __count\n"
24090"/// A 32-bit integer value.\n"
24091"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24092"/// values.\n"
24093"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24094"_mm_srai_pi32(__m64 __m, int __count)\n"
24095"{\n"
24096" return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);\n"
24097"}\n"
24098"\n"
24099"/// Right-shifts each 16-bit integer element of the first parameter,\n"
24100"/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n"
24101"/// specified by the second parameter, which is a 64-bit integer.\n"
24102"///\n"
24103"/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n"
24104"/// integer vector of [4 x i16].\n"
24105"///\n"
24106"/// \\headerfile <x86intrin.h>\n"
24107"///\n"
24108"/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n"
24109"///\n"
24110"/// \\param __m\n"
24111"/// A 64-bit integer vector of [4 x i16].\n"
24112"/// \\param __count\n"
24113"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24114"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24115"/// values.\n"
24116"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24117"_mm_srl_pi16(__m64 __m, __m64 __count)\n"
24118"{\n"
24119" return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);\n"
24120"}\n"
24121"\n"
24122"/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n"
24123"/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n"
24124"///\n"
24125"/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n"
24126"/// integer vector of [4 x i16].\n"
24127"///\n"
24128"/// \\headerfile <x86intrin.h>\n"
24129"///\n"
24130"/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n"
24131"///\n"
24132"/// \\param __m\n"
24133"/// A 64-bit integer vector of [4 x i16].\n"
24134"/// \\param __count\n"
24135"/// A 32-bit integer value.\n"
24136"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24137"/// values.\n"
24138"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24139"_mm_srli_pi16(__m64 __m, int __count)\n"
24140"{\n"
24141" return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);\n"
24142"}\n"
24143"\n"
24144"/// Right-shifts each 32-bit integer element of the first parameter,\n"
24145"/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n"
24146"/// specified by the second parameter, which is a 64-bit integer.\n"
24147"///\n"
24148"/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n"
24149"/// integer vector of [2 x i32].\n"
24150"///\n"
24151"/// \\headerfile <x86intrin.h>\n"
24152"///\n"
24153"/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n"
24154"///\n"
24155"/// \\param __m\n"
24156"/// A 64-bit integer vector of [2 x i32].\n"
24157"/// \\param __count\n"
24158"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24159"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24160"/// values.\n"
24161"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24162"_mm_srl_pi32(__m64 __m, __m64 __count)\n"
24163"{\n"
24164" return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);\n"
24165"}\n"
24166"\n"
24167"/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n"
24168"/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n"
24169"///\n"
24170"/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n"
24171"/// integer vector of [2 x i32].\n"
24172"///\n"
24173"/// \\headerfile <x86intrin.h>\n"
24174"///\n"
24175"/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n"
24176"///\n"
24177"/// \\param __m\n"
24178"/// A 64-bit integer vector of [2 x i32].\n"
24179"/// \\param __count\n"
24180"/// A 32-bit integer value.\n"
24181"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24182"/// values.\n"
24183"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24184"_mm_srli_pi32(__m64 __m, int __count)\n"
24185"{\n"
24186" return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);\n"
24187"}\n"
24188"\n"
24189"/// Right-shifts the first 64-bit integer parameter by the number of bits\n"
24190"/// specified by the second 64-bit integer parameter.\n"
24191"///\n"
24192"/// High-order bits are cleared.\n"
24193"///\n"
24194"/// \\headerfile <x86intrin.h>\n"
24195"///\n"
24196"/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n"
24197"///\n"
24198"/// \\param __m\n"
24199"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24200"/// \\param __count\n"
24201"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24202"/// \\returns A 64-bit integer vector containing the right-shifted value.\n"
24203"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24204"_mm_srl_si64(__m64 __m, __m64 __count)\n"
24205"{\n"
24206" return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);\n"
24207"}\n"
24208"\n"
24209"/// Right-shifts the first parameter, which is a 64-bit integer, by the\n"
24210"/// number of bits specified by the second parameter, which is a 32-bit\n"
24211"/// integer.\n"
24212"///\n"
24213"/// High-order bits are cleared.\n"
24214"///\n"
24215"/// \\headerfile <x86intrin.h>\n"
24216"///\n"
24217"/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n"
24218"///\n"
24219"/// \\param __m\n"
24220"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24221"/// \\param __count\n"
24222"/// A 32-bit integer value.\n"
24223"/// \\returns A 64-bit integer vector containing the right-shifted value.\n"
24224"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24225"_mm_srli_si64(__m64 __m, int __count)\n"
24226"{\n"
24227" return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);\n"
24228"}\n"
24229"\n"
24230"/// Performs a bitwise AND of two 64-bit integer vectors.\n"
24231"///\n"
24232"/// \\headerfile <x86intrin.h>\n"
24233"///\n"
24234"/// This intrinsic corresponds to the <c> PAND </c> instruction.\n"
24235"///\n"
24236"/// \\param __m1\n"
24237"/// A 64-bit integer vector.\n"
24238"/// \\param __m2\n"
24239"/// A 64-bit integer vector.\n"
24240"/// \\returns A 64-bit integer vector containing the bitwise AND of both\n"
24241"/// parameters.\n"
24242"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24243"_mm_and_si64(__m64 __m1, __m64 __m2)\n"
24244"{\n"
24245" return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);\n"
24246"}\n"
24247"\n"
24248"/// Performs a bitwise NOT of the first 64-bit integer vector, and then\n"
24249"/// performs a bitwise AND of the intermediate result and the second 64-bit\n"
24250"/// integer vector.\n"
24251"///\n"
24252"/// \\headerfile <x86intrin.h>\n"
24253"///\n"
24254"/// This intrinsic corresponds to the <c> PANDN </c> instruction.\n"
24255"///\n"
24256"/// \\param __m1\n"
24257"/// A 64-bit integer vector. The one's complement of this parameter is used\n"
24258"/// in the bitwise AND.\n"
24259"/// \\param __m2\n"
24260"/// A 64-bit integer vector.\n"
24261"/// \\returns A 64-bit integer vector containing the bitwise AND of the second\n"
24262"/// parameter and the one's complement of the first parameter.\n"
24263"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24264"_mm_andnot_si64(__m64 __m1, __m64 __m2)\n"
24265"{\n"
24266" return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);\n"
24267"}\n"
24268"\n"
24269"/// Performs a bitwise OR of two 64-bit integer vectors.\n"
24270"///\n"
24271"/// \\headerfile <x86intrin.h>\n"
24272"///\n"
24273"/// This intrinsic corresponds to the <c> POR </c> instruction.\n"
24274"///\n"
24275"/// \\param __m1\n"
24276"/// A 64-bit integer vector.\n"
24277"/// \\param __m2\n"
24278"/// A 64-bit integer vector.\n"
24279"/// \\returns A 64-bit integer vector containing the bitwise OR of both\n"
24280"/// parameters.\n"
24281"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24282"_mm_or_si64(__m64 __m1, __m64 __m2)\n"
24283"{\n"
24284" return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);\n"
24285"}\n"
24286"\n"
24287"/// Performs a bitwise exclusive OR of two 64-bit integer vectors.\n"
24288"///\n"
24289"/// \\headerfile <x86intrin.h>\n"
24290"///\n"
24291"/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n"
24292"///\n"
24293"/// \\param __m1\n"
24294"/// A 64-bit integer vector.\n"
24295"/// \\param __m2\n"
24296"/// A 64-bit integer vector.\n"
24297"/// \\returns A 64-bit integer vector containing the bitwise exclusive OR of both\n"
24298"/// parameters.\n"
24299"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24300"_mm_xor_si64(__m64 __m1, __m64 __m2)\n"
24301"{\n"
24302" return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);\n"
24303"}\n"
24304"\n"
24305"/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n"
24306"/// [8 x i8] to determine if the element of the first vector is equal to the\n"
24307"/// corresponding element of the second vector.\n"
24308"///\n"
24309"/// The comparison yields 0 for false, 0xFF for true.\n"
24310"///\n"
24311"/// \\headerfile <x86intrin.h>\n"
24312"///\n"
24313"/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.\n"
24314"///\n"
24315"/// \\param __m1\n"
24316"/// A 64-bit integer vector of [8 x i8].\n"
24317"/// \\param __m2\n"
24318"/// A 64-bit integer vector of [8 x i8].\n"
24319"/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n"
24320"/// results.\n"
24321"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24322"_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)\n"
24323"{\n"
24324" return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);\n"
24325"}\n"
24326"\n"
24327"/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n"
24328"/// [4 x i16] to determine if the element of the first vector is equal to the\n"
24329"/// corresponding element of the second vector.\n"
24330"///\n"
24331"/// The comparison yields 0 for false, 0xFFFF for true.\n"
24332"///\n"
24333"/// \\headerfile <x86intrin.h>\n"
24334"///\n"
24335"/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.\n"
24336"///\n"
24337"/// \\param __m1\n"
24338"/// A 64-bit integer vector of [4 x i16].\n"
24339"/// \\param __m2\n"
24340"/// A 64-bit integer vector of [4 x i16].\n"
24341"/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n"
24342"/// results.\n"
24343"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24344"_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)\n"
24345"{\n"
24346" return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);\n"
24347"}\n"
24348"\n"
24349"/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n"
24350"/// [2 x i32] to determine if the element of the first vector is equal to the\n"
24351"/// corresponding element of the second vector.\n"
24352"///\n"
24353"/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n"
24354"///\n"
24355"/// \\headerfile <x86intrin.h>\n"
24356"///\n"
24357"/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.\n"
24358"///\n"
24359"/// \\param __m1\n"
24360"/// A 64-bit integer vector of [2 x i32].\n"
24361"/// \\param __m2\n"
24362"/// A 64-bit integer vector of [2 x i32].\n"
24363"/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n"
24364"/// results.\n"
24365"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24366"_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)\n"
24367"{\n"
24368" return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);\n"
24369"}\n"
24370"\n"
24371"/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n"
24372"/// [8 x i8] to determine if the element of the first vector is greater than\n"
24373"/// the corresponding element of the second vector.\n"
24374"///\n"
24375"/// The comparison yields 0 for false, 0xFF for true.\n"
24376"///\n"
24377"/// \\headerfile <x86intrin.h>\n"
24378"///\n"
24379"/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.\n"
24380"///\n"
24381"/// \\param __m1\n"
24382"/// A 64-bit integer vector of [8 x i8].\n"
24383"/// \\param __m2\n"
24384"/// A 64-bit integer vector of [8 x i8].\n"
24385"/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n"
24386"/// results.\n"
24387"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24388"_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)\n"
24389"{\n"
24390" return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);\n"
24391"}\n"
24392"\n"
24393"/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n"
24394"/// [4 x i16] to determine if the element of the first vector is greater than\n"
24395"/// the corresponding element of the second vector.\n"
24396"///\n"
24397"/// The comparison yields 0 for false, 0xFFFF for true.\n"
24398"///\n"
24399"/// \\headerfile <x86intrin.h>\n"
24400"///\n"
24401"/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.\n"
24402"///\n"
24403"/// \\param __m1\n"
24404"/// A 64-bit integer vector of [4 x i16].\n"
24405"/// \\param __m2\n"
24406"/// A 64-bit integer vector of [4 x i16].\n"
24407"/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n"
24408"/// results.\n"
24409"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24410"_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)\n"
24411"{\n"
24412" return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);\n"
24413"}\n"
24414"\n"
24415"/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n"
24416"/// [2 x i32] to determine if the element of the first vector is greater than\n"
24417"/// the corresponding element of the second vector.\n"
24418"///\n"
24419"/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n"
24420"///\n"
24421"/// \\headerfile <x86intrin.h>\n"
24422"///\n"
24423"/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.\n"
24424"///\n"
24425"/// \\param __m1\n"
24426"/// A 64-bit integer vector of [2 x i32].\n"
24427"/// \\param __m2\n"
24428"/// A 64-bit integer vector of [2 x i32].\n"
24429"/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n"
24430"/// results.\n"
24431"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24432"_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)\n"
24433"{\n"
24434" return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);\n"
24435"}\n"
24436"\n"
24437"/// Constructs a 64-bit integer vector initialized to zero.\n"
24438"///\n"
24439"/// \\headerfile <x86intrin.h>\n"
24440"///\n"
24441"/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n"
24442"///\n"
24443"/// \\returns An initialized 64-bit integer vector with all elements set to zero.\n"
24444"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24445"_mm_setzero_si64(void)\n"
24446"{\n"
24447" return __extension__ (__m64){ 0LL };\n"
24448"}\n"
24449"\n"
24450"/// Constructs a 64-bit integer vector initialized with the specified\n"
24451"/// 32-bit integer values.\n"
24452"///\n"
24453"/// \\headerfile <x86intrin.h>\n"
24454"///\n"
24455"/// This intrinsic is a utility function and does not correspond to a specific\n"
24456"/// instruction.\n"
24457"///\n"
24458"/// \\param __i1\n"
24459"/// A 32-bit integer value used to initialize the upper 32 bits of the\n"
24460"/// result.\n"
24461"/// \\param __i0\n"
24462"/// A 32-bit integer value used to initialize the lower 32 bits of the\n"
24463"/// result.\n"
24464"/// \\returns An initialized 64-bit integer vector.\n"
24465"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24466"_mm_set_pi32(int __i1, int __i0)\n"
24467"{\n"
24468" return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);\n"
24469"}\n"
24470"\n"
24471"/// Constructs a 64-bit integer vector initialized with the specified\n"
24472"/// 16-bit integer values.\n"
24473"///\n"
24474"/// \\headerfile <x86intrin.h>\n"
24475"///\n"
24476"/// This intrinsic is a utility function and does not correspond to a specific\n"
24477"/// instruction.\n"
24478"///\n"
24479"/// \\param __s3\n"
24480"/// A 16-bit integer value used to initialize bits [63:48] of the result.\n"
24481"/// \\param __s2\n"
24482"/// A 16-bit integer value used to initialize bits [47:32] of the result.\n"
24483"/// \\param __s1\n"
24484"/// A 16-bit integer value used to initialize bits [31:16] of the result.\n"
24485"/// \\param __s0\n"
24486"/// A 16-bit integer value used to initialize bits [15:0] of the result.\n"
24487"/// \\returns An initialized 64-bit integer vector.\n"
24488"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24489"_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)\n"
24490"{\n"
24491" return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);\n"
24492"}\n"
24493"\n"
24494"/// Constructs a 64-bit integer vector initialized with the specified\n"
24495"/// 8-bit integer values.\n"
24496"///\n"
24497"/// \\headerfile <x86intrin.h>\n"
24498"///\n"
24499"/// This intrinsic is a utility function and does not correspond to a specific\n"
24500"/// instruction.\n"
24501"///\n"
24502"/// \\param __b7\n"
24503"/// An 8-bit integer value used to initialize bits [63:56] of the result.\n"
24504"/// \\param __b6\n"
24505"/// An 8-bit integer value used to initialize bits [55:48] of the result.\n"
24506"/// \\param __b5\n"
24507"/// An 8-bit integer value used to initialize bits [47:40] of the result.\n"
24508"/// \\param __b4\n"
24509"/// An 8-bit integer value used to initialize bits [39:32] of the result.\n"
24510"/// \\param __b3\n"
24511"/// An 8-bit integer value used to initialize bits [31:24] of the result.\n"
24512"/// \\param __b2\n"
24513"/// An 8-bit integer value used to initialize bits [23:16] of the result.\n"
24514"/// \\param __b1\n"
24515"/// An 8-bit integer value used to initialize bits [15:8] of the result.\n"
24516"/// \\param __b0\n"
24517"/// An 8-bit integer value used to initialize bits [7:0] of the result.\n"
24518"/// \\returns An initialized 64-bit integer vector.\n"
24519"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24520"_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,\n"
24521" char __b1, char __b0)\n"
24522"{\n"
24523" return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,\n"
24524" __b4, __b5, __b6, __b7);\n"
24525"}\n"
24526"\n"
24527"/// Constructs a 64-bit integer vector of [2 x i32], with each of the\n"
24528"/// 32-bit integer vector elements set to the specified 32-bit integer\n"
24529"/// value.\n"
24530"///\n"
24531"/// \\headerfile <x86intrin.h>\n"
24532"///\n"
24533"/// This intrinsic is a utility function and does not correspond to a specific\n"
24534"/// instruction.\n"
24535"///\n"
24536"/// \\param __i\n"
24537"/// A 32-bit integer value used to initialize each vector element of the\n"
24538"/// result.\n"
24539"/// \\returns An initialized 64-bit integer vector of [2 x i32].\n"
24540"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24541"_mm_set1_pi32(int __i)\n"
24542"{\n"
24543" return _mm_set_pi32(__i, __i);\n"
24544"}\n"
24545"\n"
24546"/// Constructs a 64-bit integer vector of [4 x i16], with each of the\n"
24547"/// 16-bit integer vector elements set to the specified 16-bit integer\n"
24548"/// value.\n"
24549"///\n"
24550"/// \\headerfile <x86intrin.h>\n"
24551"///\n"
24552"/// This intrinsic is a utility function and does not correspond to a specific\n"
24553"/// instruction.\n"
24554"///\n"
24555"/// \\param __w\n"
24556"/// A 16-bit integer value used to initialize each vector element of the\n"
24557"/// result.\n"
24558"/// \\returns An initialized 64-bit integer vector of [4 x i16].\n"
24559"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24560"_mm_set1_pi16(short __w)\n"
24561"{\n"
24562" return _mm_set_pi16(__w, __w, __w, __w);\n"
24563"}\n"
24564"\n"
24565"/// Constructs a 64-bit integer vector of [8 x i8], with each of the\n"
24566"/// 8-bit integer vector elements set to the specified 8-bit integer value.\n"
24567"///\n"
24568"/// \\headerfile <x86intrin.h>\n"
24569"///\n"
24570"/// This intrinsic is a utility function and does not correspond to a specific\n"
24571"/// instruction.\n"
24572"///\n"
24573"/// \\param __b\n"
24574"/// An 8-bit integer value used to initialize each vector element of the\n"
24575"/// result.\n"
24576"/// \\returns An initialized 64-bit integer vector of [8 x i8].\n"
24577"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24578"_mm_set1_pi8(char __b)\n"
24579"{\n"
24580" return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);\n"
24581"}\n"
24582"\n"
24583"/// Constructs a 64-bit integer vector, initialized in reverse order with\n"
24584"/// the specified 32-bit integer values.\n"
24585"///\n"
24586"/// \\headerfile <x86intrin.h>\n"
24587"///\n"
24588"/// This intrinsic is a utility function and does not correspond to a specific\n"
24589"/// instruction.\n"
24590"///\n"
24591"/// \\param __i0\n"
24592"/// A 32-bit integer value used to initialize the lower 32 bits of the\n"
24593"/// result.\n"
24594"/// \\param __i1\n"
24595"/// A 32-bit integer value used to initialize the upper 32 bits of the\n"
24596"/// result.\n"
24597"/// \\returns An initialized 64-bit integer vector.\n"
24598"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24599"_mm_setr_pi32(int __i0, int __i1)\n"
24600"{\n"
24601" return _mm_set_pi32(__i1, __i0);\n"
24602"}\n"
24603"\n"
24604"/// Constructs a 64-bit integer vector, initialized in reverse order with\n"
24605"/// the specified 16-bit integer values.\n"
24606"///\n"
24607"/// \\headerfile <x86intrin.h>\n"
24608"///\n"
24609"/// This intrinsic is a utility function and does not correspond to a specific\n"
24610"/// instruction.\n"
24611"///\n"
24612"/// \\param __w0\n"
24613"/// A 16-bit integer value used to initialize bits [15:0] of the result.\n"
24614"/// \\param __w1\n"
24615"/// A 16-bit integer value used to initialize bits [31:16] of the result.\n"
24616"/// \\param __w2\n"
24617"/// A 16-bit integer value used to initialize bits [47:32] of the result.\n"
24618"/// \\param __w3\n"
24619"/// A 16-bit integer value used to initialize bits [63:48] of the result.\n"
24620"/// \\returns An initialized 64-bit integer vector.\n"
24621"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24622"_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)\n"
24623"{\n"
24624" return _mm_set_pi16(__w3, __w2, __w1, __w0);\n"
24625"}\n"
24626"\n"
24627"/// Constructs a 64-bit integer vector, initialized in reverse order with\n"
24628"/// the specified 8-bit integer values.\n"
24629"///\n"
24630"/// \\headerfile <x86intrin.h>\n"
24631"///\n"
24632"/// This intrinsic is a utility function and does not correspond to a specific\n"
24633"/// instruction.\n"
24634"///\n"
24635"/// \\param __b0\n"
24636"/// An 8-bit integer value used to initialize bits [7:0] of the result.\n"
24637"/// \\param __b1\n"
24638"/// An 8-bit integer value used to initialize bits [15:8] of the result.\n"
24639"/// \\param __b2\n"
24640"/// An 8-bit integer value used to initialize bits [23:16] of the result.\n"
24641"/// \\param __b3\n"
24642"/// An 8-bit integer value used to initialize bits [31:24] of the result.\n"
24643"/// \\param __b4\n"
24644"/// An 8-bit integer value used to initialize bits [39:32] of the result.\n"
24645"/// \\param __b5\n"
24646"/// An 8-bit integer value used to initialize bits [47:40] of the result.\n"
24647"/// \\param __b6\n"
24648"/// An 8-bit integer value used to initialize bits [55:48] of the result.\n"
24649"/// \\param __b7\n"
24650"/// An 8-bit integer value used to initialize bits [63:56] of the result.\n"
24651"/// \\returns An initialized 64-bit integer vector.\n"
24652"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24653"_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,\n"
24654" char __b6, char __b7)\n"
24655"{\n"
24656" return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n"
24657"}\n"
24658"\n"
24659"#undef __DEFAULT_FN_ATTRS\n"
24660"\n"
24661"/* Aliases for compatibility. */\n"
24662"#define _m_empty _mm_empty\n"
24663"#define _m_from_int _mm_cvtsi32_si64\n"
24664"#define _m_from_int64 _mm_cvtsi64_m64\n"
24665"#define _m_to_int _mm_cvtsi64_si32\n"
24666"#define _m_to_int64 _mm_cvtm64_si64\n"
24667"#define _m_packsswb _mm_packs_pi16\n"
24668"#define _m_packssdw _mm_packs_pi32\n"
24669"#define _m_packuswb _mm_packs_pu16\n"
24670"#define _m_punpckhbw _mm_unpackhi_pi8\n"
24671"#define _m_punpckhwd _mm_unpackhi_pi16\n"
24672"#define _m_punpckhdq _mm_unpackhi_pi32\n"
24673"#define _m_punpcklbw _mm_unpacklo_pi8\n"
24674"#define _m_punpcklwd _mm_unpacklo_pi16\n"
24675"#define _m_punpckldq _mm_unpacklo_pi32\n"
24676"#define _m_paddb _mm_add_pi8\n"
24677"#define _m_paddw _mm_add_pi16\n"
24678"#define _m_paddd _mm_add_pi32\n"
24679"#define _m_paddsb _mm_adds_pi8\n"
24680"#define _m_paddsw _mm_adds_pi16\n"
24681"#define _m_paddusb _mm_adds_pu8\n"
24682"#define _m_paddusw _mm_adds_pu16\n"
24683"#define _m_psubb _mm_sub_pi8\n"
24684"#define _m_psubw _mm_sub_pi16\n"
24685"#define _m_psubd _mm_sub_pi32\n"
24686"#define _m_psubsb _mm_subs_pi8\n"
24687"#define _m_psubsw _mm_subs_pi16\n"
24688"#define _m_psubusb _mm_subs_pu8\n"
24689"#define _m_psubusw _mm_subs_pu16\n"
24690"#define _m_pmaddwd _mm_madd_pi16\n"
24691"#define _m_pmulhw _mm_mulhi_pi16\n"
24692"#define _m_pmullw _mm_mullo_pi16\n"
24693"#define _m_psllw _mm_sll_pi16\n"
24694"#define _m_psllwi _mm_slli_pi16\n"
24695"#define _m_pslld _mm_sll_pi32\n"
24696"#define _m_pslldi _mm_slli_pi32\n"
24697"#define _m_psllq _mm_sll_si64\n"
24698"#define _m_psllqi _mm_slli_si64\n"
24699"#define _m_psraw _mm_sra_pi16\n"
24700"#define _m_psrawi _mm_srai_pi16\n"
24701"#define _m_psrad _mm_sra_pi32\n"
24702"#define _m_psradi _mm_srai_pi32\n"
24703"#define _m_psrlw _mm_srl_pi16\n"
24704"#define _m_psrlwi _mm_srli_pi16\n"
24705"#define _m_psrld _mm_srl_pi32\n"
24706"#define _m_psrldi _mm_srli_pi32\n"
24707"#define _m_psrlq _mm_srl_si64\n"
24708"#define _m_psrlqi _mm_srli_si64\n"
24709"#define _m_pand _mm_and_si64\n"
24710"#define _m_pandn _mm_andnot_si64\n"
24711"#define _m_por _mm_or_si64\n"
24712"#define _m_pxor _mm_xor_si64\n"
24713"#define _m_pcmpeqb _mm_cmpeq_pi8\n"
24714"#define _m_pcmpeqw _mm_cmpeq_pi16\n"
24715"#define _m_pcmpeqd _mm_cmpeq_pi32\n"
24716"#define _m_pcmpgtb _mm_cmpgt_pi8\n"
24717"#define _m_pcmpgtw _mm_cmpgt_pi16\n"
24718"#define _m_pcmpgtd _mm_cmpgt_pi32\n"
24719"\n"
24720"#endif /* __MMINTRIN_H */\n"
24721"\n"
24722"" } ,
24723 { "/builtins/movdirintrin.h" , "/*===------------------------- movdirintrin.h ------------------------------===\n"
24724" *\n"
24725" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
24726" * of this software and associated documentation files (the \"Software\"), to deal\n"
24727" * in the Software without restriction, including without limitation the rights\n"
24728" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
24729" * copies of the Software, and to permit persons to whom the Software is\n"
24730" * furnished to do so, subject to the following conditions:\n"
24731" *\n"
24732" * The above copyright notice and this permission notice shall be included in\n"
24733" * all copies or substantial portions of the Software.\n"
24734" *\n"
24735" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
24736" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
24737" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
24738" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
24739" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
24740" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
24741" * THE SOFTWARE.\n"
24742" *\n"
24743" *===-----------------------------------------------------------------------===\n"
24744" */\n"
24745"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
24746"#error \"Never use <movdirintrin.h> directly; include <x86intrin.h> instead.\"\n"
24747"#endif\n"
24748"\n"
24749"#ifndef _MOVDIRINTRIN_H\n"
24750"#define _MOVDIRINTRIN_H\n"
24751"\n"
24752"/* Move doubleword as direct store */\n"
24753"static __inline__ void\n"
24754"__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n"
24755"_directstoreu_u32 (void *__dst, unsigned int __value)\n"
24756"{\n"
24757" __builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value);\n"
24758"}\n"
24759"\n"
24760"#ifdef __x86_64__\n"
24761"\n"
24762"/* Move quadword as direct store */\n"
24763"static __inline__ void\n"
24764"__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n"
24765"_directstoreu_u64 (void *__dst, unsigned long __value)\n"
24766"{\n"
24767" __builtin_ia32_directstore_u64((unsigned long *)__dst, __value);\n"
24768"}\n"
24769"\n"
24770"#endif /* __x86_64__ */\n"
24771"\n"
24772"/*\n"
24773" * movdir64b - Move 64 bytes as direct store.\n"
24774" * The destination must be 64 byte aligned, and the store is atomic.\n"
24775" * The source address has no alignment requirement, and the load from\n"
24776" * the source address is not atomic.\n"
24777" */\n"
24778"static __inline__ void\n"
24779"__attribute__((__always_inline__, __nodebug__, __target__(\"movdir64b\")))\n"
24780"_movdir64b (void *__dst __attribute__((align_value(64))), const void *__src)\n"
24781"{\n"
24782" __builtin_ia32_movdir64b(__dst, __src);\n"
24783"}\n"
24784"\n"
24785"#endif /* _MOVDIRINTRIN_H */\n"
24786"" } ,
24787 { "/builtins/msa.h" , "/*===---- msa.h - MIPS MSA intrinsics --------------------------------------===\n"
24788" *\n"
24789" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
24790" * of this software and associated documentation files (the \"Software\"), to deal\n"
24791" * in the Software without restriction, including without limitation the rights\n"
24792" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
24793" * copies of the Software, and to permit persons to whom the Software is\n"
24794" * furnished to do so, subject to the following conditions:\n"
24795" *\n"
24796" * The above copyright notice and this permission notice shall be included in\n"
24797" * all copies or substantial portions of the Software.\n"
24798" *\n"
24799" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
24800" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
24801" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
24802" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
24803" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
24804" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
24805" * THE SOFTWARE.\n"
24806" *\n"
24807" *===-----------------------------------------------------------------------===\n"
24808" */\n"
24809"\n"
24810"#ifndef _MSA_H\n"
24811"#define _MSA_H 1\n"
24812"\n"
24813"#if defined(__mips_msa)\n"
24814"typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));\n"
24815"typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1)));\n"
24816"typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16)));\n"
24817"typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1)));\n"
24818"typedef short v8i16 __attribute__((vector_size(16), aligned(16)));\n"
24819"typedef short v8i16_h __attribute__((vector_size(16), aligned(2)));\n"
24820"typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16)));\n"
24821"typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2)));\n"
24822"typedef int v4i32 __attribute__((vector_size(16), aligned(16)));\n"
24823"typedef int v4i32_w __attribute__((vector_size(16), aligned(4)));\n"
24824"typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16)));\n"
24825"typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4)));\n"
24826"typedef long long v2i64 __attribute__((vector_size(16), aligned(16)));\n"
24827"typedef long long v2i64_d __attribute__((vector_size(16), aligned(8)));\n"
24828"typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16)));\n"
24829"typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8)));\n"
24830"typedef float v4f32 __attribute__((vector_size(16), aligned(16)));\n"
24831"typedef float v4f32_w __attribute__((vector_size(16), aligned(4)));\n"
24832"typedef double v2f64 __attribute__ ((vector_size(16), aligned(16)));\n"
24833"typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8)));\n"
24834"\n"
24835"#define __msa_sll_b __builtin_msa_sll_b\n"
24836"#define __msa_sll_h __builtin_msa_sll_h\n"
24837"#define __msa_sll_w __builtin_msa_sll_w\n"
24838"#define __msa_sll_d __builtin_msa_sll_d\n"
24839"#define __msa_slli_b __builtin_msa_slli_b\n"
24840"#define __msa_slli_h __builtin_msa_slli_h\n"
24841"#define __msa_slli_w __builtin_msa_slli_w\n"
24842"#define __msa_slli_d __builtin_msa_slli_d\n"
24843"#define __msa_sra_b __builtin_msa_sra_b\n"
24844"#define __msa_sra_h __builtin_msa_sra_h\n"
24845"#define __msa_sra_w __builtin_msa_sra_w\n"
24846"#define __msa_sra_d __builtin_msa_sra_d\n"
24847"#define __msa_srai_b __builtin_msa_srai_b\n"
24848"#define __msa_srai_h __builtin_msa_srai_h\n"
24849"#define __msa_srai_w __builtin_msa_srai_w\n"
24850"#define __msa_srai_d __builtin_msa_srai_d\n"
24851"#define __msa_srar_b __builtin_msa_srar_b\n"
24852"#define __msa_srar_h __builtin_msa_srar_h\n"
24853"#define __msa_srar_w __builtin_msa_srar_w\n"
24854"#define __msa_srar_d __builtin_msa_srar_d\n"
24855"#define __msa_srari_b __builtin_msa_srari_b\n"
24856"#define __msa_srari_h __builtin_msa_srari_h\n"
24857"#define __msa_srari_w __builtin_msa_srari_w\n"
24858"#define __msa_srari_d __builtin_msa_srari_d\n"
24859"#define __msa_srl_b __builtin_msa_srl_b\n"
24860"#define __msa_srl_h __builtin_msa_srl_h\n"
24861"#define __msa_srl_w __builtin_msa_srl_w\n"
24862"#define __msa_srl_d __builtin_msa_srl_d\n"
24863"#define __msa_srli_b __builtin_msa_srli_b\n"
24864"#define __msa_srli_h __builtin_msa_srli_h\n"
24865"#define __msa_srli_w __builtin_msa_srli_w\n"
24866"#define __msa_srli_d __builtin_msa_srli_d\n"
24867"#define __msa_srlr_b __builtin_msa_srlr_b\n"
24868"#define __msa_srlr_h __builtin_msa_srlr_h\n"
24869"#define __msa_srlr_w __builtin_msa_srlr_w\n"
24870"#define __msa_srlr_d __builtin_msa_srlr_d\n"
24871"#define __msa_srlri_b __builtin_msa_srlri_b\n"
24872"#define __msa_srlri_h __builtin_msa_srlri_h\n"
24873"#define __msa_srlri_w __builtin_msa_srlri_w\n"
24874"#define __msa_srlri_d __builtin_msa_srlri_d\n"
24875"#define __msa_bclr_b __builtin_msa_bclr_b\n"
24876"#define __msa_bclr_h __builtin_msa_bclr_h\n"
24877"#define __msa_bclr_w __builtin_msa_bclr_w\n"
24878"#define __msa_bclr_d __builtin_msa_bclr_d\n"
24879"#define __msa_bclri_b __builtin_msa_bclri_b\n"
24880"#define __msa_bclri_h __builtin_msa_bclri_h\n"
24881"#define __msa_bclri_w __builtin_msa_bclri_w\n"
24882"#define __msa_bclri_d __builtin_msa_bclri_d\n"
24883"#define __msa_bset_b __builtin_msa_bset_b\n"
24884"#define __msa_bset_h __builtin_msa_bset_h\n"
24885"#define __msa_bset_w __builtin_msa_bset_w\n"
24886"#define __msa_bset_d __builtin_msa_bset_d\n"
24887"#define __msa_bseti_b __builtin_msa_bseti_b\n"
24888"#define __msa_bseti_h __builtin_msa_bseti_h\n"
24889"#define __msa_bseti_w __builtin_msa_bseti_w\n"
24890"#define __msa_bseti_d __builtin_msa_bseti_d\n"
24891"#define __msa_bneg_b __builtin_msa_bneg_b\n"
24892"#define __msa_bneg_h __builtin_msa_bneg_h\n"
24893"#define __msa_bneg_w __builtin_msa_bneg_w\n"
24894"#define __msa_bneg_d __builtin_msa_bneg_d\n"
24895"#define __msa_bnegi_b __builtin_msa_bnegi_b\n"
24896"#define __msa_bnegi_h __builtin_msa_bnegi_h\n"
24897"#define __msa_bnegi_w __builtin_msa_bnegi_w\n"
24898"#define __msa_bnegi_d __builtin_msa_bnegi_d\n"
24899"#define __msa_binsl_b __builtin_msa_binsl_b\n"
24900"#define __msa_binsl_h __builtin_msa_binsl_h\n"
24901"#define __msa_binsl_w __builtin_msa_binsl_w\n"
24902"#define __msa_binsl_d __builtin_msa_binsl_d\n"
24903"#define __msa_binsli_b __builtin_msa_binsli_b\n"
24904"#define __msa_binsli_h __builtin_msa_binsli_h\n"
24905"#define __msa_binsli_w __builtin_msa_binsli_w\n"
24906"#define __msa_binsli_d __builtin_msa_binsli_d\n"
24907"#define __msa_binsr_b __builtin_msa_binsr_b\n"
24908"#define __msa_binsr_h __builtin_msa_binsr_h\n"
24909"#define __msa_binsr_w __builtin_msa_binsr_w\n"
24910"#define __msa_binsr_d __builtin_msa_binsr_d\n"
24911"#define __msa_binsri_b __builtin_msa_binsri_b\n"
24912"#define __msa_binsri_h __builtin_msa_binsri_h\n"
24913"#define __msa_binsri_w __builtin_msa_binsri_w\n"
24914"#define __msa_binsri_d __builtin_msa_binsri_d\n"
24915"#define __msa_addv_b __builtin_msa_addv_b\n"
24916"#define __msa_addv_h __builtin_msa_addv_h\n"
24917"#define __msa_addv_w __builtin_msa_addv_w\n"
24918"#define __msa_addv_d __builtin_msa_addv_d\n"
24919"#define __msa_addvi_b __builtin_msa_addvi_b\n"
24920"#define __msa_addvi_h __builtin_msa_addvi_h\n"
24921"#define __msa_addvi_w __builtin_msa_addvi_w\n"
24922"#define __msa_addvi_d __builtin_msa_addvi_d\n"
24923"#define __msa_subv_b __builtin_msa_subv_b\n"
24924"#define __msa_subv_h __builtin_msa_subv_h\n"
24925"#define __msa_subv_w __builtin_msa_subv_w\n"
24926"#define __msa_subv_d __builtin_msa_subv_d\n"
24927"#define __msa_subvi_b __builtin_msa_subvi_b\n"
24928"#define __msa_subvi_h __builtin_msa_subvi_h\n"
24929"#define __msa_subvi_w __builtin_msa_subvi_w\n"
24930"#define __msa_subvi_d __builtin_msa_subvi_d\n"
24931"#define __msa_max_s_b __builtin_msa_max_s_b\n"
24932"#define __msa_max_s_h __builtin_msa_max_s_h\n"
24933"#define __msa_max_s_w __builtin_msa_max_s_w\n"
24934"#define __msa_max_s_d __builtin_msa_max_s_d\n"
24935"#define __msa_maxi_s_b __builtin_msa_maxi_s_b\n"
24936"#define __msa_maxi_s_h __builtin_msa_maxi_s_h\n"
24937"#define __msa_maxi_s_w __builtin_msa_maxi_s_w\n"
24938"#define __msa_maxi_s_d __builtin_msa_maxi_s_d\n"
24939"#define __msa_max_u_b __builtin_msa_max_u_b\n"
24940"#define __msa_max_u_h __builtin_msa_max_u_h\n"
24941"#define __msa_max_u_w __builtin_msa_max_u_w\n"
24942"#define __msa_max_u_d __builtin_msa_max_u_d\n"
24943"#define __msa_maxi_u_b __builtin_msa_maxi_u_b\n"
24944"#define __msa_maxi_u_h __builtin_msa_maxi_u_h\n"
24945"#define __msa_maxi_u_w __builtin_msa_maxi_u_w\n"
24946"#define __msa_maxi_u_d __builtin_msa_maxi_u_d\n"
24947"#define __msa_min_s_b __builtin_msa_min_s_b\n"
24948"#define __msa_min_s_h __builtin_msa_min_s_h\n"
24949"#define __msa_min_s_w __builtin_msa_min_s_w\n"
24950"#define __msa_min_s_d __builtin_msa_min_s_d\n"
24951"#define __msa_mini_s_b __builtin_msa_mini_s_b\n"
24952"#define __msa_mini_s_h __builtin_msa_mini_s_h\n"
24953"#define __msa_mini_s_w __builtin_msa_mini_s_w\n"
24954"#define __msa_mini_s_d __builtin_msa_mini_s_d\n"
24955"#define __msa_min_u_b __builtin_msa_min_u_b\n"
24956"#define __msa_min_u_h __builtin_msa_min_u_h\n"
24957"#define __msa_min_u_w __builtin_msa_min_u_w\n"
24958"#define __msa_min_u_d __builtin_msa_min_u_d\n"
24959"#define __msa_mini_u_b __builtin_msa_mini_u_b\n"
24960"#define __msa_mini_u_h __builtin_msa_mini_u_h\n"
24961"#define __msa_mini_u_w __builtin_msa_mini_u_w\n"
24962"#define __msa_mini_u_d __builtin_msa_mini_u_d\n"
24963"#define __msa_max_a_b __builtin_msa_max_a_b\n"
24964"#define __msa_max_a_h __builtin_msa_max_a_h\n"
24965"#define __msa_max_a_w __builtin_msa_max_a_w\n"
24966"#define __msa_max_a_d __builtin_msa_max_a_d\n"
24967"#define __msa_min_a_b __builtin_msa_min_a_b\n"
24968"#define __msa_min_a_h __builtin_msa_min_a_h\n"
24969"#define __msa_min_a_w __builtin_msa_min_a_w\n"
24970"#define __msa_min_a_d __builtin_msa_min_a_d\n"
24971"#define __msa_ceq_b __builtin_msa_ceq_b\n"
24972"#define __msa_ceq_h __builtin_msa_ceq_h\n"
24973"#define __msa_ceq_w __builtin_msa_ceq_w\n"
24974"#define __msa_ceq_d __builtin_msa_ceq_d\n"
24975"#define __msa_ceqi_b __builtin_msa_ceqi_b\n"
24976"#define __msa_ceqi_h __builtin_msa_ceqi_h\n"
24977"#define __msa_ceqi_w __builtin_msa_ceqi_w\n"
24978"#define __msa_ceqi_d __builtin_msa_ceqi_d\n"
24979"#define __msa_clt_s_b __builtin_msa_clt_s_b\n"
24980"#define __msa_clt_s_h __builtin_msa_clt_s_h\n"
24981"#define __msa_clt_s_w __builtin_msa_clt_s_w\n"
24982"#define __msa_clt_s_d __builtin_msa_clt_s_d\n"
24983"#define __msa_clti_s_b __builtin_msa_clti_s_b\n"
24984"#define __msa_clti_s_h __builtin_msa_clti_s_h\n"
24985"#define __msa_clti_s_w __builtin_msa_clti_s_w\n"
24986"#define __msa_clti_s_d __builtin_msa_clti_s_d\n"
24987"#define __msa_clt_u_b __builtin_msa_clt_u_b\n"
24988"#define __msa_clt_u_h __builtin_msa_clt_u_h\n"
24989"#define __msa_clt_u_w __builtin_msa_clt_u_w\n"
24990"#define __msa_clt_u_d __builtin_msa_clt_u_d\n"
24991"#define __msa_clti_u_b __builtin_msa_clti_u_b\n"
24992"#define __msa_clti_u_h __builtin_msa_clti_u_h\n"
24993"#define __msa_clti_u_w __builtin_msa_clti_u_w\n"
24994"#define __msa_clti_u_d __builtin_msa_clti_u_d\n"
24995"#define __msa_cle_s_b __builtin_msa_cle_s_b\n"
24996"#define __msa_cle_s_h __builtin_msa_cle_s_h\n"
24997"#define __msa_cle_s_w __builtin_msa_cle_s_w\n"
24998"#define __msa_cle_s_d __builtin_msa_cle_s_d\n"
24999"#define __msa_clei_s_b __builtin_msa_clei_s_b\n"
25000"#define __msa_clei_s_h __builtin_msa_clei_s_h\n"
25001"#define __msa_clei_s_w __builtin_msa_clei_s_w\n"
25002"#define __msa_clei_s_d __builtin_msa_clei_s_d\n"
25003"#define __msa_cle_u_b __builtin_msa_cle_u_b\n"
25004"#define __msa_cle_u_h __builtin_msa_cle_u_h\n"
25005"#define __msa_cle_u_w __builtin_msa_cle_u_w\n"
25006"#define __msa_cle_u_d __builtin_msa_cle_u_d\n"
25007"#define __msa_clei_u_b __builtin_msa_clei_u_b\n"
25008"#define __msa_clei_u_h __builtin_msa_clei_u_h\n"
25009"#define __msa_clei_u_w __builtin_msa_clei_u_w\n"
25010"#define __msa_clei_u_d __builtin_msa_clei_u_d\n"
25011"#define __msa_ld_b __builtin_msa_ld_b\n"
25012"#define __msa_ld_h __builtin_msa_ld_h\n"
25013"#define __msa_ld_w __builtin_msa_ld_w\n"
25014"#define __msa_ld_d __builtin_msa_ld_d\n"
25015"#define __msa_st_b __builtin_msa_st_b\n"
25016"#define __msa_st_h __builtin_msa_st_h\n"
25017"#define __msa_st_w __builtin_msa_st_w\n"
25018"#define __msa_st_d __builtin_msa_st_d\n"
25019"#define __msa_sat_s_b __builtin_msa_sat_s_b\n"
25020"#define __msa_sat_s_h __builtin_msa_sat_s_h\n"
25021"#define __msa_sat_s_w __builtin_msa_sat_s_w\n"
25022"#define __msa_sat_s_d __builtin_msa_sat_s_d\n"
25023"#define __msa_sat_u_b __builtin_msa_sat_u_b\n"
25024"#define __msa_sat_u_h __builtin_msa_sat_u_h\n"
25025"#define __msa_sat_u_w __builtin_msa_sat_u_w\n"
25026"#define __msa_sat_u_d __builtin_msa_sat_u_d\n"
25027"#define __msa_add_a_b __builtin_msa_add_a_b\n"
25028"#define __msa_add_a_h __builtin_msa_add_a_h\n"
25029"#define __msa_add_a_w __builtin_msa_add_a_w\n"
25030"#define __msa_add_a_d __builtin_msa_add_a_d\n"
25031"#define __msa_adds_a_b __builtin_msa_adds_a_b\n"
25032"#define __msa_adds_a_h __builtin_msa_adds_a_h\n"
25033"#define __msa_adds_a_w __builtin_msa_adds_a_w\n"
25034"#define __msa_adds_a_d __builtin_msa_adds_a_d\n"
25035"#define __msa_adds_s_b __builtin_msa_adds_s_b\n"
25036"#define __msa_adds_s_h __builtin_msa_adds_s_h\n"
25037"#define __msa_adds_s_w __builtin_msa_adds_s_w\n"
25038"#define __msa_adds_s_d __builtin_msa_adds_s_d\n"
25039"#define __msa_adds_u_b __builtin_msa_adds_u_b\n"
25040"#define __msa_adds_u_h __builtin_msa_adds_u_h\n"
25041"#define __msa_adds_u_w __builtin_msa_adds_u_w\n"
25042"#define __msa_adds_u_d __builtin_msa_adds_u_d\n"
25043"#define __msa_ave_s_b __builtin_msa_ave_s_b\n"
25044"#define __msa_ave_s_h __builtin_msa_ave_s_h\n"
25045"#define __msa_ave_s_w __builtin_msa_ave_s_w\n"
25046"#define __msa_ave_s_d __builtin_msa_ave_s_d\n"
25047"#define __msa_ave_u_b __builtin_msa_ave_u_b\n"
25048"#define __msa_ave_u_h __builtin_msa_ave_u_h\n"
25049"#define __msa_ave_u_w __builtin_msa_ave_u_w\n"
25050"#define __msa_ave_u_d __builtin_msa_ave_u_d\n"
25051"#define __msa_aver_s_b __builtin_msa_aver_s_b\n"
25052"#define __msa_aver_s_h __builtin_msa_aver_s_h\n"
25053"#define __msa_aver_s_w __builtin_msa_aver_s_w\n"
25054"#define __msa_aver_s_d __builtin_msa_aver_s_d\n"
25055"#define __msa_aver_u_b __builtin_msa_aver_u_b\n"
25056"#define __msa_aver_u_h __builtin_msa_aver_u_h\n"
25057"#define __msa_aver_u_w __builtin_msa_aver_u_w\n"
25058"#define __msa_aver_u_d __builtin_msa_aver_u_d\n"
25059"#define __msa_subs_s_b __builtin_msa_subs_s_b\n"
25060"#define __msa_subs_s_h __builtin_msa_subs_s_h\n"
25061"#define __msa_subs_s_w __builtin_msa_subs_s_w\n"
25062"#define __msa_subs_s_d __builtin_msa_subs_s_d\n"
25063"#define __msa_subs_u_b __builtin_msa_subs_u_b\n"
25064"#define __msa_subs_u_h __builtin_msa_subs_u_h\n"
25065"#define __msa_subs_u_w __builtin_msa_subs_u_w\n"
25066"#define __msa_subs_u_d __builtin_msa_subs_u_d\n"
25067"#define __msa_subsuu_s_b __builtin_msa_subsuu_s_b\n"
25068"#define __msa_subsuu_s_h __builtin_msa_subsuu_s_h\n"
25069"#define __msa_subsuu_s_w __builtin_msa_subsuu_s_w\n"
25070"#define __msa_subsuu_s_d __builtin_msa_subsuu_s_d\n"
25071"#define __msa_subsus_u_b __builtin_msa_subsus_u_b\n"
25072"#define __msa_subsus_u_h __builtin_msa_subsus_u_h\n"
25073"#define __msa_subsus_u_w __builtin_msa_subsus_u_w\n"
25074"#define __msa_subsus_u_d __builtin_msa_subsus_u_d\n"
25075"#define __msa_asub_s_b __builtin_msa_asub_s_b\n"
25076"#define __msa_asub_s_h __builtin_msa_asub_s_h\n"
25077"#define __msa_asub_s_w __builtin_msa_asub_s_w\n"
25078"#define __msa_asub_s_d __builtin_msa_asub_s_d\n"
25079"#define __msa_asub_u_b __builtin_msa_asub_u_b\n"
25080"#define __msa_asub_u_h __builtin_msa_asub_u_h\n"
25081"#define __msa_asub_u_w __builtin_msa_asub_u_w\n"
25082"#define __msa_asub_u_d __builtin_msa_asub_u_d\n"
25083"#define __msa_mulv_b __builtin_msa_mulv_b\n"
25084"#define __msa_mulv_h __builtin_msa_mulv_h\n"
25085"#define __msa_mulv_w __builtin_msa_mulv_w\n"
25086"#define __msa_mulv_d __builtin_msa_mulv_d\n"
25087"#define __msa_maddv_b __builtin_msa_maddv_b\n"
25088"#define __msa_maddv_h __builtin_msa_maddv_h\n"
25089"#define __msa_maddv_w __builtin_msa_maddv_w\n"
25090"#define __msa_maddv_d __builtin_msa_maddv_d\n"
25091"#define __msa_msubv_b __builtin_msa_msubv_b\n"
25092"#define __msa_msubv_h __builtin_msa_msubv_h\n"
25093"#define __msa_msubv_w __builtin_msa_msubv_w\n"
25094"#define __msa_msubv_d __builtin_msa_msubv_d\n"
25095"#define __msa_div_s_b __builtin_msa_div_s_b\n"
25096"#define __msa_div_s_h __builtin_msa_div_s_h\n"
25097"#define __msa_div_s_w __builtin_msa_div_s_w\n"
25098"#define __msa_div_s_d __builtin_msa_div_s_d\n"
25099"#define __msa_div_u_b __builtin_msa_div_u_b\n"
25100"#define __msa_div_u_h __builtin_msa_div_u_h\n"
25101"#define __msa_div_u_w __builtin_msa_div_u_w\n"
25102"#define __msa_div_u_d __builtin_msa_div_u_d\n"
25103"#define __msa_hadd_s_h __builtin_msa_hadd_s_h\n"
25104"#define __msa_hadd_s_w __builtin_msa_hadd_s_w\n"
25105"#define __msa_hadd_s_d __builtin_msa_hadd_s_d\n"
25106"#define __msa_hadd_u_h __builtin_msa_hadd_u_h\n"
25107"#define __msa_hadd_u_w __builtin_msa_hadd_u_w\n"
25108"#define __msa_hadd_u_d __builtin_msa_hadd_u_d\n"
25109"#define __msa_hsub_s_h __builtin_msa_hsub_s_h\n"
25110"#define __msa_hsub_s_w __builtin_msa_hsub_s_w\n"
25111"#define __msa_hsub_s_d __builtin_msa_hsub_s_d\n"
25112"#define __msa_hsub_u_h __builtin_msa_hsub_u_h\n"
25113"#define __msa_hsub_u_w __builtin_msa_hsub_u_w\n"
25114"#define __msa_hsub_u_d __builtin_msa_hsub_u_d\n"
25115"#define __msa_mod_s_b __builtin_msa_mod_s_b\n"
25116"#define __msa_mod_s_h __builtin_msa_mod_s_h\n"
25117"#define __msa_mod_s_w __builtin_msa_mod_s_w\n"
25118"#define __msa_mod_s_d __builtin_msa_mod_s_d\n"
25119"#define __msa_mod_u_b __builtin_msa_mod_u_b\n"
25120"#define __msa_mod_u_h __builtin_msa_mod_u_h\n"
25121"#define __msa_mod_u_w __builtin_msa_mod_u_w\n"
25122"#define __msa_mod_u_d __builtin_msa_mod_u_d\n"
25123"#define __msa_dotp_s_h __builtin_msa_dotp_s_h\n"
25124"#define __msa_dotp_s_w __builtin_msa_dotp_s_w\n"
25125"#define __msa_dotp_s_d __builtin_msa_dotp_s_d\n"
25126"#define __msa_dotp_u_h __builtin_msa_dotp_u_h\n"
25127"#define __msa_dotp_u_w __builtin_msa_dotp_u_w\n"
25128"#define __msa_dotp_u_d __builtin_msa_dotp_u_d\n"
25129"#define __msa_dpadd_s_h __builtin_msa_dpadd_s_h\n"
25130"#define __msa_dpadd_s_w __builtin_msa_dpadd_s_w\n"
25131"#define __msa_dpadd_s_d __builtin_msa_dpadd_s_d\n"
25132"#define __msa_dpadd_u_h __builtin_msa_dpadd_u_h\n"
25133"#define __msa_dpadd_u_w __builtin_msa_dpadd_u_w\n"
25134"#define __msa_dpadd_u_d __builtin_msa_dpadd_u_d\n"
25135"#define __msa_dpsub_s_h __builtin_msa_dpsub_s_h\n"
25136"#define __msa_dpsub_s_w __builtin_msa_dpsub_s_w\n"
25137"#define __msa_dpsub_s_d __builtin_msa_dpsub_s_d\n"
25138"#define __msa_dpsub_u_h __builtin_msa_dpsub_u_h\n"
25139"#define __msa_dpsub_u_w __builtin_msa_dpsub_u_w\n"
25140"#define __msa_dpsub_u_d __builtin_msa_dpsub_u_d\n"
25141"#define __msa_sld_b __builtin_msa_sld_b\n"
25142"#define __msa_sld_h __builtin_msa_sld_h\n"
25143"#define __msa_sld_w __builtin_msa_sld_w\n"
25144"#define __msa_sld_d __builtin_msa_sld_d\n"
25145"#define __msa_sldi_b __builtin_msa_sldi_b\n"
25146"#define __msa_sldi_h __builtin_msa_sldi_h\n"
25147"#define __msa_sldi_w __builtin_msa_sldi_w\n"
25148"#define __msa_sldi_d __builtin_msa_sldi_d\n"
25149"#define __msa_splat_b __builtin_msa_splat_b\n"
25150"#define __msa_splat_h __builtin_msa_splat_h\n"
25151"#define __msa_splat_w __builtin_msa_splat_w\n"
25152"#define __msa_splat_d __builtin_msa_splat_d\n"
25153"#define __msa_splati_b __builtin_msa_splati_b\n"
25154"#define __msa_splati_h __builtin_msa_splati_h\n"
25155"#define __msa_splati_w __builtin_msa_splati_w\n"
25156"#define __msa_splati_d __builtin_msa_splati_d\n"
25157"#define __msa_pckev_b __builtin_msa_pckev_b\n"
25158"#define __msa_pckev_h __builtin_msa_pckev_h\n"
25159"#define __msa_pckev_w __builtin_msa_pckev_w\n"
25160"#define __msa_pckev_d __builtin_msa_pckev_d\n"
25161"#define __msa_pckod_b __builtin_msa_pckod_b\n"
25162"#define __msa_pckod_h __builtin_msa_pckod_h\n"
25163"#define __msa_pckod_w __builtin_msa_pckod_w\n"
25164"#define __msa_pckod_d __builtin_msa_pckod_d\n"
25165"#define __msa_ilvl_b __builtin_msa_ilvl_b\n"
25166"#define __msa_ilvl_h __builtin_msa_ilvl_h\n"
25167"#define __msa_ilvl_w __builtin_msa_ilvl_w\n"
25168"#define __msa_ilvl_d __builtin_msa_ilvl_d\n"
25169"#define __msa_ilvr_b __builtin_msa_ilvr_b\n"
25170"#define __msa_ilvr_h __builtin_msa_ilvr_h\n"
25171"#define __msa_ilvr_w __builtin_msa_ilvr_w\n"
25172"#define __msa_ilvr_d __builtin_msa_ilvr_d\n"
25173"#define __msa_ilvev_b __builtin_msa_ilvev_b\n"
25174"#define __msa_ilvev_h __builtin_msa_ilvev_h\n"
25175"#define __msa_ilvev_w __builtin_msa_ilvev_w\n"
25176"#define __msa_ilvev_d __builtin_msa_ilvev_d\n"
25177"#define __msa_ilvod_b __builtin_msa_ilvod_b\n"
25178"#define __msa_ilvod_h __builtin_msa_ilvod_h\n"
25179"#define __msa_ilvod_w __builtin_msa_ilvod_w\n"
25180"#define __msa_ilvod_d __builtin_msa_ilvod_d\n"
25181"#define __msa_vshf_b __builtin_msa_vshf_b\n"
25182"#define __msa_vshf_h __builtin_msa_vshf_h\n"
25183"#define __msa_vshf_w __builtin_msa_vshf_w\n"
25184"#define __msa_vshf_d __builtin_msa_vshf_d\n"
25185"#define __msa_and_v __builtin_msa_and_v\n"
25186"#define __msa_andi_b __builtin_msa_andi_b\n"
25187"#define __msa_or_v __builtin_msa_or_v\n"
25188"#define __msa_ori_b __builtin_msa_ori_b\n"
25189"#define __msa_nor_v __builtin_msa_nor_v\n"
25190"#define __msa_nori_b __builtin_msa_nori_b\n"
25191"#define __msa_xor_v __builtin_msa_xor_v\n"
25192"#define __msa_xori_b __builtin_msa_xori_b\n"
25193"#define __msa_bmnz_v __builtin_msa_bmnz_v\n"
25194"#define __msa_bmnzi_b __builtin_msa_bmnzi_b\n"
25195"#define __msa_bmz_v __builtin_msa_bmz_v\n"
25196"#define __msa_bmzi_b __builtin_msa_bmzi_b\n"
25197"#define __msa_bsel_v __builtin_msa_bsel_v\n"
25198"#define __msa_bseli_b __builtin_msa_bseli_b\n"
25199"#define __msa_shf_b __builtin_msa_shf_b\n"
25200"#define __msa_shf_h __builtin_msa_shf_h\n"
25201"#define __msa_shf_w __builtin_msa_shf_w\n"
25202"#define __msa_test_bnz_v __builtin_msa_bnz_v\n"
25203"#define __msa_test_bz_v __builtin_msa_bz_v\n"
25204"#define __msa_fill_b __builtin_msa_fill_b\n"
25205"#define __msa_fill_h __builtin_msa_fill_h\n"
25206"#define __msa_fill_w __builtin_msa_fill_w\n"
25207"#define __msa_fill_d __builtin_msa_fill_d\n"
25208"#define __msa_pcnt_b __builtin_msa_pcnt_b\n"
25209"#define __msa_pcnt_h __builtin_msa_pcnt_h\n"
25210"#define __msa_pcnt_w __builtin_msa_pcnt_w\n"
25211"#define __msa_pcnt_d __builtin_msa_pcnt_d\n"
25212"#define __msa_nloc_b __builtin_msa_nloc_b\n"
25213"#define __msa_nloc_h __builtin_msa_nloc_h\n"
25214"#define __msa_nloc_w __builtin_msa_nloc_w\n"
25215"#define __msa_nloc_d __builtin_msa_nloc_d\n"
25216"#define __msa_nlzc_b __builtin_msa_nlzc_b\n"
25217"#define __msa_nlzc_h __builtin_msa_nlzc_h\n"
25218"#define __msa_nlzc_w __builtin_msa_nlzc_w\n"
25219"#define __msa_nlzc_d __builtin_msa_nlzc_d\n"
25220"#define __msa_copy_s_b __builtin_msa_copy_s_b\n"
25221"#define __msa_copy_s_h __builtin_msa_copy_s_h\n"
25222"#define __msa_copy_s_w __builtin_msa_copy_s_w\n"
25223"#define __msa_copy_s_d __builtin_msa_copy_s_d\n"
25224"#define __msa_copy_u_b __builtin_msa_copy_u_b\n"
25225"#define __msa_copy_u_h __builtin_msa_copy_u_h\n"
25226"#define __msa_copy_u_w __builtin_msa_copy_u_w\n"
25227"#define __msa_copy_u_d __builtin_msa_copy_u_d\n"
25228"#define __msa_insert_b __builtin_msa_insert_b\n"
25229"#define __msa_insert_h __builtin_msa_insert_h\n"
25230"#define __msa_insert_w __builtin_msa_insert_w\n"
25231"#define __msa_insert_d __builtin_msa_insert_d\n"
25232"#define __msa_insve_b __builtin_msa_insve_b\n"
25233"#define __msa_insve_h __builtin_msa_insve_h\n"
25234"#define __msa_insve_w __builtin_msa_insve_w\n"
25235"#define __msa_insve_d __builtin_msa_insve_d\n"
25236"#define __msa_test_bnz_b __builtin_msa_bnz_b\n"
25237"#define __msa_test_bnz_h __builtin_msa_bnz_h\n"
25238"#define __msa_test_bnz_w __builtin_msa_bnz_w\n"
25239"#define __msa_test_bnz_d __builtin_msa_bnz_d\n"
25240"#define __msa_test_bz_b __builtin_msa_bz_b\n"
25241"#define __msa_test_bz_h __builtin_msa_bz_h\n"
25242"#define __msa_test_bz_w __builtin_msa_bz_w\n"
25243"#define __msa_test_bz_d __builtin_msa_bz_d\n"
25244"#define __msa_ldi_b __builtin_msa_ldi_b\n"
25245"#define __msa_ldi_h __builtin_msa_ldi_h\n"
25246"#define __msa_ldi_w __builtin_msa_ldi_w\n"
25247"#define __msa_ldi_d __builtin_msa_ldi_d\n"
25248"#define __msa_fcaf_w __builtin_msa_fcaf_w\n"
25249"#define __msa_fcaf_d __builtin_msa_fcaf_d\n"
25250"#define __msa_fcor_w __builtin_msa_fcor_w\n"
25251"#define __msa_fcor_d __builtin_msa_fcor_d\n"
25252"#define __msa_fcun_w __builtin_msa_fcun_w\n"
25253"#define __msa_fcun_d __builtin_msa_fcun_d\n"
25254"#define __msa_fcune_w __builtin_msa_fcune_w\n"
25255"#define __msa_fcune_d __builtin_msa_fcune_d\n"
25256"#define __msa_fcueq_w __builtin_msa_fcueq_w\n"
25257"#define __msa_fcueq_d __builtin_msa_fcueq_d\n"
25258"#define __msa_fceq_w __builtin_msa_fceq_w\n"
25259"#define __msa_fceq_d __builtin_msa_fceq_d\n"
25260"#define __msa_fcne_w __builtin_msa_fcne_w\n"
25261"#define __msa_fcne_d __builtin_msa_fcne_d\n"
25262"#define __msa_fclt_w __builtin_msa_fclt_w\n"
25263"#define __msa_fclt_d __builtin_msa_fclt_d\n"
25264"#define __msa_fcult_w __builtin_msa_fcult_w\n"
25265"#define __msa_fcult_d __builtin_msa_fcult_d\n"
25266"#define __msa_fcle_w __builtin_msa_fcle_w\n"
25267"#define __msa_fcle_d __builtin_msa_fcle_d\n"
25268"#define __msa_fcule_w __builtin_msa_fcule_w\n"
25269"#define __msa_fcule_d __builtin_msa_fcule_d\n"
25270"#define __msa_fsaf_w __builtin_msa_fsaf_w\n"
25271"#define __msa_fsaf_d __builtin_msa_fsaf_d\n"
25272"#define __msa_fsor_w __builtin_msa_fsor_w\n"
25273"#define __msa_fsor_d __builtin_msa_fsor_d\n"
25274"#define __msa_fsun_w __builtin_msa_fsun_w\n"
25275"#define __msa_fsun_d __builtin_msa_fsun_d\n"
25276"#define __msa_fsune_w __builtin_msa_fsune_w\n"
25277"#define __msa_fsune_d __builtin_msa_fsune_d\n"
25278"#define __msa_fsueq_w __builtin_msa_fsueq_w\n"
25279"#define __msa_fsueq_d __builtin_msa_fsueq_d\n"
25280"#define __msa_fseq_w __builtin_msa_fseq_w\n"
25281"#define __msa_fseq_d __builtin_msa_fseq_d\n"
25282"#define __msa_fsne_w __builtin_msa_fsne_w\n"
25283"#define __msa_fsne_d __builtin_msa_fsne_d\n"
25284"#define __msa_fslt_w __builtin_msa_fslt_w\n"
25285"#define __msa_fslt_d __builtin_msa_fslt_d\n"
25286"#define __msa_fsult_w __builtin_msa_fsult_w\n"
25287"#define __msa_fsult_d __builtin_msa_fsult_d\n"
25288"#define __msa_fsle_w __builtin_msa_fsle_w\n"
25289"#define __msa_fsle_d __builtin_msa_fsle_d\n"
25290"#define __msa_fsule_w __builtin_msa_fsule_w\n"
25291"#define __msa_fsule_d __builtin_msa_fsule_d\n"
25292"#define __msa_fadd_w __builtin_msa_fadd_w\n"
25293"#define __msa_fadd_d __builtin_msa_fadd_d\n"
25294"#define __msa_fsub_w __builtin_msa_fsub_w\n"
25295"#define __msa_fsub_d __builtin_msa_fsub_d\n"
25296"#define __msa_fmul_w __builtin_msa_fmul_w\n"
25297"#define __msa_fmul_d __builtin_msa_fmul_d\n"
25298"#define __msa_fdiv_w __builtin_msa_fdiv_w\n"
25299"#define __msa_fdiv_d __builtin_msa_fdiv_d\n"
25300"#define __msa_fmadd_w __builtin_msa_fmadd_w\n"
25301"#define __msa_fmadd_d __builtin_msa_fmadd_d\n"
25302"#define __msa_fmsub_w __builtin_msa_fmsub_w\n"
25303"#define __msa_fmsub_d __builtin_msa_fmsub_d\n"
25304"#define __msa_fexp2_w __builtin_msa_fexp2_w\n"
25305"#define __msa_fexp2_d __builtin_msa_fexp2_d\n"
25306"#define __msa_fexdo_h __builtin_msa_fexdo_h\n"
25307"#define __msa_fexdo_w __builtin_msa_fexdo_w\n"
25308"#define __msa_ftq_h __builtin_msa_ftq_h\n"
25309"#define __msa_ftq_w __builtin_msa_ftq_w\n"
25310"#define __msa_fmin_w __builtin_msa_fmin_w\n"
25311"#define __msa_fmin_d __builtin_msa_fmin_d\n"
25312"#define __msa_fmin_a_w __builtin_msa_fmin_a_w\n"
25313"#define __msa_fmin_a_d __builtin_msa_fmin_a_d\n"
25314"#define __msa_fmax_w __builtin_msa_fmax_w\n"
25315"#define __msa_fmax_d __builtin_msa_fmax_d\n"
25316"#define __msa_fmax_a_w __builtin_msa_fmax_a_w\n"
25317"#define __msa_fmax_a_d __builtin_msa_fmax_a_d\n"
25318"#define __msa_mul_q_h __builtin_msa_mul_q_h\n"
25319"#define __msa_mul_q_w __builtin_msa_mul_q_w\n"
25320"#define __msa_mulr_q_h __builtin_msa_mulr_q_h\n"
25321"#define __msa_mulr_q_w __builtin_msa_mulr_q_w\n"
25322"#define __msa_madd_q_h __builtin_msa_madd_q_h\n"
25323"#define __msa_madd_q_w __builtin_msa_madd_q_w\n"
25324"#define __msa_maddr_q_h __builtin_msa_maddr_q_h\n"
25325"#define __msa_maddr_q_w __builtin_msa_maddr_q_w\n"
25326"#define __msa_msub_q_h __builtin_msa_msub_q_h\n"
25327"#define __msa_msub_q_w __builtin_msa_msub_q_w\n"
25328"#define __msa_msubr_q_h __builtin_msa_msubr_q_h\n"
25329"#define __msa_msubr_q_w __builtin_msa_msubr_q_w\n"
25330"#define __msa_fclass_w __builtin_msa_fclass_w\n"
25331"#define __msa_fclass_d __builtin_msa_fclass_d\n"
25332"#define __msa_fsqrt_w __builtin_msa_fsqrt_w\n"
25333"#define __msa_fsqrt_d __builtin_msa_fsqrt_d\n"
25334"#define __msa_frcp_w __builtin_msa_frcp_w\n"
25335"#define __msa_frcp_d __builtin_msa_frcp_d\n"
25336"#define __msa_frint_w __builtin_msa_frint_w\n"
25337"#define __msa_frint_d __builtin_msa_frint_d\n"
25338"#define __msa_frsqrt_w __builtin_msa_frsqrt_w\n"
25339"#define __msa_frsqrt_d __builtin_msa_frsqrt_d\n"
25340"#define __msa_flog2_w __builtin_msa_flog2_w\n"
25341"#define __msa_flog2_d __builtin_msa_flog2_d\n"
25342"#define __msa_fexupl_w __builtin_msa_fexupl_w\n"
25343"#define __msa_fexupl_d __builtin_msa_fexupl_d\n"
25344"#define __msa_fexupr_w __builtin_msa_fexupr_w\n"
25345"#define __msa_fexupr_d __builtin_msa_fexupr_d\n"
25346"#define __msa_ffql_w __builtin_msa_ffql_w\n"
25347"#define __msa_ffql_d __builtin_msa_ffql_d\n"
25348"#define __msa_ffqr_w __builtin_msa_ffqr_w\n"
25349"#define __msa_ffqr_d __builtin_msa_ffqr_d\n"
25350"#define __msa_ftint_s_w __builtin_msa_ftint_s_w\n"
25351"#define __msa_ftint_s_d __builtin_msa_ftint_s_d\n"
25352"#define __msa_ftint_u_w __builtin_msa_ftint_u_w\n"
25353"#define __msa_ftint_u_d __builtin_msa_ftint_u_d\n"
25354"#define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w\n"
25355"#define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d\n"
25356"#define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w\n"
25357"#define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d\n"
25358"#define __msa_ffint_s_w __builtin_msa_ffint_s_w\n"
25359"#define __msa_ffint_s_d __builtin_msa_ffint_s_d\n"
25360"#define __msa_ffint_u_w __builtin_msa_ffint_u_w\n"
25361"#define __msa_ffint_u_d __builtin_msa_ffint_u_d\n"
25362"#define __msa_cfcmsa __builtin_msa_cfcmsa\n"
25363"#define __msa_move_v __builtin_msa_move_v\n"
25364"#define __msa_cast_to_vector_float __builtin_msa_cast_to_vector_float\n"
25365"#define __msa_cast_to_vector_double __builtin_msa_cast_to_vector_double\n"
25366"#define __msa_cast_to_scalar_float __builtin_msa_cast_to_scalar_float\n"
25367"#define __msa_cast_to_scalar_double __builtin_msa_cast_to_scalar_double\n"
25368"#endif /* defined(__mips_msa) */\n"
25369"#endif /* _MSA_H */\n"
25370"" } ,
25371 { "/builtins/mwaitxintrin.h" , "/*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------===\n"
25372" *\n"
25373" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
25374" * of this software and associated documentation files (the \"Software\"), to deal\n"
25375" * in the Software without restriction, including without limitation the rights\n"
25376" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
25377" * copies of the Software, and to permit persons to whom the Software is\n"
25378" * furnished to do so, subject to the following conditions:\n"
25379" *\n"
25380" * The above copyright notice and this permission notice shall be included in\n"
25381" * all copies or substantial portions of the Software.\n"
25382" *\n"
25383" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
25384" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
25385" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
25386" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
25387" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
25388" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
25389" * THE SOFTWARE.\n"
25390" *\n"
25391" *===-----------------------------------------------------------------------===\n"
25392" */\n"
25393"\n"
25394"#ifndef __X86INTRIN_H\n"
25395"#error \"Never use <mwaitxintrin.h> directly; include <x86intrin.h> instead.\"\n"
25396"#endif\n"
25397"\n"
25398"#ifndef __MWAITXINTRIN_H\n"
25399"#define __MWAITXINTRIN_H\n"
25400"\n"
25401"/* Define the default attributes for the functions in this file. */\n"
25402"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mwaitx\")))\n"
25403"static __inline__ void __DEFAULT_FN_ATTRS\n"
25404"_mm_monitorx(void const * __p, unsigned __extensions, unsigned __hints)\n"
25405"{\n"
25406" __builtin_ia32_monitorx((void *)__p, __extensions, __hints);\n"
25407"}\n"
25408"\n"
25409"static __inline__ void __DEFAULT_FN_ATTRS\n"
25410"_mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock)\n"
25411"{\n"
25412" __builtin_ia32_mwaitx(__extensions, __hints, __clock);\n"
25413"}\n"
25414"\n"
25415"#undef __DEFAULT_FN_ATTRS\n"
25416"\n"
25417"#endif /* __MWAITXINTRIN_H */\n"
25418"" } ,
25419 { "/builtins/nmmintrin.h" , "/*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------===\n"
25420" *\n"
25421" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
25422" * of this software and associated documentation files (the \"Software\"), to deal\n"
25423" * in the Software without restriction, including without limitation the rights\n"
25424" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
25425" * copies of the Software, and to permit persons to whom the Software is\n"
25426" * furnished to do so, subject to the following conditions:\n"
25427" *\n"
25428" * The above copyright notice and this permission notice shall be included in\n"
25429" * all copies or substantial portions of the Software.\n"
25430" *\n"
25431" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
25432" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
25433" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
25434" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
25435" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
25436" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
25437" * THE SOFTWARE.\n"
25438" *\n"
25439" *===-----------------------------------------------------------------------===\n"
25440" */\n"
25441"\n"
25442"#ifndef __NMMINTRIN_H\n"
25443"#define __NMMINTRIN_H\n"
25444"\n"
25445"/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,\n"
25446" just include it now then. */\n"
25447"#include <smmintrin.h>\n"
25448"#endif /* __NMMINTRIN_H */\n"
25449"" } ,
25450 { "/builtins/omp-tools.h" , "/*\n"
25451" * include/50/omp-tools.h.var\n"
25452" */\n"
25453"\n"
25454"//===----------------------------------------------------------------------===//\n"
25455"//\n"
25456"// The LLVM Compiler Infrastructure\n"
25457"//\n"
25458"// This file is dual licensed under the MIT and the University of Illinois Open\n"
25459"// Source Licenses. See LICENSE.txt for details.\n"
25460"//\n"
25461"//===----------------------------------------------------------------------===//\n"
25462"\n"
25463"#ifndef __OMPT__\n"
25464"#define __OMPT__\n"
25465"\n"
25466"/*****************************************************************************\n"
25467" * system include files\n"
25468" *****************************************************************************/\n"
25469"\n"
25470"#include <stdint.h>\n"
25471"#include <stddef.h>\n"
25472"\n"
25473"/*****************************************************************************\n"
25474" * iteration macros\n"
25475" *****************************************************************************/\n"
25476"\n"
25477"#define FOREACH_OMPT_INQUIRY_FN(macro) \\\n"
25478" macro (ompt_enumerate_states) \\\n"
25479" macro (ompt_enumerate_mutex_impls) \\\n"
25480" \\\n"
25481" macro (ompt_set_callback) \\\n"
25482" macro (ompt_get_callback) \\\n"
25483" \\\n"
25484" macro (ompt_get_state) \\\n"
25485" \\\n"
25486" macro (ompt_get_parallel_info) \\\n"
25487" macro (ompt_get_task_info) \\\n"
25488" macro (ompt_get_task_memory) \\\n"
25489" macro (ompt_get_thread_data) \\\n"
25490" macro (ompt_get_unique_id) \\\n"
25491" macro (ompt_finalize_tool) \\\n"
25492" \\\n"
25493" macro(ompt_get_num_procs) \\\n"
25494" macro(ompt_get_num_places) \\\n"
25495" macro(ompt_get_place_proc_ids) \\\n"
25496" macro(ompt_get_place_num) \\\n"
25497" macro(ompt_get_partition_place_nums) \\\n"
25498" macro(ompt_get_proc_id) \\\n"
25499" \\\n"
25500" macro(ompt_get_target_info) \\\n"
25501" macro(ompt_get_num_devices)\n"
25502"\n"
25503"#define FOREACH_OMPT_STATE(macro) \\\n"
25504" \\\n"
25505" /* first available state */ \\\n"
25506" macro (ompt_state_undefined, 0x102) /* undefined thread state */ \\\n"
25507" \\\n"
25508" /* work states (0..15) */ \\\n"
25509" macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \\\n"
25510" macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \\\n"
25511" macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \\\n"
25512" \\\n"
25513" /* barrier wait states (16..31) */ \\\n"
25514" macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \\\n"
25515" macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \\\n"
25516" /* implicit barrier at the end of parallel region */\\\n"
25517" macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \\\n"
25518" /* implicit barrier at the end of worksharing */ \\\n"
25519" macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \\\n"
25520" macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \\\n"
25521" \\\n"
25522" /* task wait states (32..63) */ \\\n"
25523" macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \\\n"
25524" macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \\\n"
25525" \\\n"
25526" /* mutex wait states (64..127) */ \\\n"
25527" macro (ompt_state_wait_mutex, 0x040) \\\n"
25528" macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \\\n"
25529" macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \\\n"
25530" macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \\\n"
25531" macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \\\n"
25532" \\\n"
25533" /* target wait states (128..255) */ \\\n"
25534" macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \\\n"
25535" macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \\\n"
25536" macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \\\n"
25537" \\\n"
25538" /* misc (256..511) */ \\\n"
25539" macro (ompt_state_idle, 0x100) /* waiting for work */ \\\n"
25540" macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \\\n"
25541" \\\n"
25542" /* implementation-specific states (512..) */\n"
25543"\n"
25544"\n"
25545"#define FOREACH_KMP_MUTEX_IMPL(macro) \\\n"
25546" macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \\\n"
25547" macro (kmp_mutex_impl_spin, 1) /* based on spin */ \\\n"
25548" macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \\\n"
25549" macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */\n"
25550"\n"
25551"#define FOREACH_OMPT_EVENT(macro) \\\n"
25552" \\\n"
25553" /*--- Mandatory Events ---*/ \\\n"
25554" macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \\\n"
25555" macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \\\n"
25556" \\\n"
25557" macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \\\n"
25558" macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \\\n"
25559" \\\n"
25560" macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \\\n"
25561" macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \\\n"
25562" macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \\\n"
25563" \\\n"
25564" macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \\\n"
25565" macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \\\n"
25566" macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \\\n"
25567" \\\n"
25568" macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \\\n"
25569" \\\n"
25570" macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \\\n"
25571" macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \\\n"
25572" \\\n"
25573" macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \\\n"
25574" macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \\\n"
25575" \\\n"
25576" /* Optional Events */ \\\n"
25577" macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \\\n"
25578" \\\n"
25579" macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \\\n"
25580" \\\n"
25581" macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \\\n"
25582" macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \\\n"
25583" \\\n"
25584" macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \\\n"
25585" \\\n"
25586" macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \\\n"
25587" \\\n"
25588" macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \\\n"
25589" \\\n"
25590" macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \\\n"
25591" \\\n"
25592" macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \\\n"
25593" macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \\\n"
25594" \\\n"
25595" macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \\\n"
25596" macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \\\n"
25597" \\\n"
25598" macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \\\n"
25599" \\\n"
25600" macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \\\n"
25601" \\\n"
25602" macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \\\n"
25603" \\\n"
25604" macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \\\n"
25605" \\\n"
25606" macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */\n"
25607"\n"
25608"/*****************************************************************************\n"
25609" * implementation specific types\n"
25610" *****************************************************************************/\n"
25611"\n"
25612"typedef enum kmp_mutex_impl_t {\n"
25613"#define kmp_mutex_impl_macro(impl, code) impl = code,\n"
25614" FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)\n"
25615"#undef kmp_mutex_impl_macro\n"
25616"} kmp_mutex_impl_t;\n"
25617"\n"
25618"/*****************************************************************************\n"
25619" * definitions generated from spec\n"
25620" *****************************************************************************/\n"
25621"\n"
25622"typedef enum ompt_callbacks_t {\n"
25623" ompt_callback_thread_begin = 1,\n"
25624" ompt_callback_thread_end = 2,\n"
25625" ompt_callback_parallel_begin = 3,\n"
25626" ompt_callback_parallel_end = 4,\n"
25627" ompt_callback_task_create = 5,\n"
25628" ompt_callback_task_schedule = 6,\n"
25629" ompt_callback_implicit_task = 7,\n"
25630" ompt_callback_target = 8,\n"
25631" ompt_callback_target_data_op = 9,\n"
25632" ompt_callback_target_submit = 10,\n"
25633" ompt_callback_control_tool = 11,\n"
25634" ompt_callback_device_initialize = 12,\n"
25635" ompt_callback_device_finalize = 13,\n"
25636" ompt_callback_device_load = 14,\n"
25637" ompt_callback_device_unload = 15,\n"
25638" ompt_callback_sync_region_wait = 16,\n"
25639" ompt_callback_mutex_released = 17,\n"
25640" ompt_callback_dependences = 18,\n"
25641" ompt_callback_task_dependence = 19,\n"
25642" ompt_callback_work = 20,\n"
25643" ompt_callback_master = 21,\n"
25644" ompt_callback_target_map = 22,\n"
25645" ompt_callback_sync_region = 23,\n"
25646" ompt_callback_lock_init = 24,\n"
25647" ompt_callback_lock_destroy = 25,\n"
25648" ompt_callback_mutex_acquire = 26,\n"
25649" ompt_callback_mutex_acquired = 27,\n"
25650" ompt_callback_nest_lock = 28,\n"
25651" ompt_callback_flush = 29,\n"
25652" ompt_callback_cancel = 30,\n"
25653" ompt_callback_reduction = 31,\n"
25654" ompt_callback_dispatch = 32\n"
25655"} ompt_callbacks_t;\n"
25656"\n"
25657"typedef enum ompt_record_t {\n"
25658" ompt_record_ompt = 1,\n"
25659" ompt_record_native = 2,\n"
25660" ompt_record_invalid = 3\n"
25661"} ompt_record_t;\n"
25662"\n"
25663"typedef enum ompt_record_native_t {\n"
25664" ompt_record_native_info = 1,\n"
25665" ompt_record_native_event = 2\n"
25666"} ompt_record_native_t;\n"
25667"\n"
25668"typedef enum ompt_set_result_t {\n"
25669" ompt_set_error = 0,\n"
25670" ompt_set_never = 1,\n"
25671" ompt_set_impossible = 2,\n"
25672" ompt_set_sometimes = 3,\n"
25673" ompt_set_sometimes_paired = 4,\n"
25674" ompt_set_always = 5\n"
25675"} ompt_set_result_t;\n"
25676"\n"
25677"typedef uint64_t ompt_id_t;\n"
25678"\n"
25679"typedef uint64_t ompt_device_time_t;\n"
25680"\n"
25681"typedef uint64_t ompt_buffer_cursor_t;\n"
25682"\n"
25683"typedef enum ompt_thread_t {\n"
25684" ompt_thread_initial = 1,\n"
25685" ompt_thread_worker = 2,\n"
25686" ompt_thread_other = 3,\n"
25687" ompt_thread_unknown = 4\n"
25688"} ompt_thread_t;\n"
25689"\n"
25690"typedef enum ompt_scope_endpoint_t {\n"
25691" ompt_scope_begin = 1,\n"
25692" ompt_scope_end = 2\n"
25693"} ompt_scope_endpoint_t;\n"
25694"\n"
25695"typedef enum ompt_dispatch_t {\n"
25696" ompt_dispatch_iteration = 1,\n"
25697" ompt_dispatch_section = 2\n"
25698"} ompt_dispatch_t;\n"
25699"\n"
25700"typedef enum ompt_sync_region_t {\n"
25701" ompt_sync_region_barrier = 1,\n"
25702" ompt_sync_region_barrier_implicit = 2,\n"
25703" ompt_sync_region_barrier_explicit = 3,\n"
25704" ompt_sync_region_barrier_implementation = 4,\n"
25705" ompt_sync_region_taskwait = 5,\n"
25706" ompt_sync_region_taskgroup = 6,\n"
25707" ompt_sync_region_reduction = 7\n"
25708"} ompt_sync_region_t;\n"
25709"\n"
25710"typedef enum ompt_target_data_op_t {\n"
25711" ompt_target_data_alloc = 1,\n"
25712" ompt_target_data_transfer_to_device = 2,\n"
25713" ompt_target_data_transfer_from_device = 3,\n"
25714" ompt_target_data_delete = 4,\n"
25715" ompt_target_data_associate = 5,\n"
25716" ompt_target_data_disassociate = 6\n"
25717"} ompt_target_data_op_t;\n"
25718"\n"
25719"typedef enum ompt_work_t {\n"
25720" ompt_work_loop = 1,\n"
25721" ompt_work_sections = 2,\n"
25722" ompt_work_single_executor = 3,\n"
25723" ompt_work_single_other = 4,\n"
25724" ompt_work_workshare = 5,\n"
25725" ompt_work_distribute = 6,\n"
25726" ompt_work_taskloop = 7\n"
25727"} ompt_work_t;\n"
25728"\n"
25729"typedef enum ompt_mutex_t {\n"
25730" ompt_mutex_lock = 1,\n"
25731" ompt_mutex_test_lock = 2,\n"
25732" ompt_mutex_nest_lock = 3,\n"
25733" ompt_mutex_test_nest_lock = 4,\n"
25734" ompt_mutex_critical = 5,\n"
25735" ompt_mutex_atomic = 6,\n"
25736" ompt_mutex_ordered = 7\n"
25737"} ompt_mutex_t;\n"
25738"\n"
25739"typedef enum ompt_native_mon_flag_t {\n"
25740" ompt_native_data_motion_explicit = 0x01,\n"
25741" ompt_native_data_motion_implicit = 0x02,\n"
25742" ompt_native_kernel_invocation = 0x04,\n"
25743" ompt_native_kernel_execution = 0x08,\n"
25744" ompt_native_driver = 0x10,\n"
25745" ompt_native_runtime = 0x20,\n"
25746" ompt_native_overhead = 0x40,\n"
25747" ompt_native_idleness = 0x80\n"
25748"} ompt_native_mon_flag_t;\n"
25749"\n"
25750"typedef enum ompt_task_flag_t {\n"
25751" ompt_task_initial = 0x00000001,\n"
25752" ompt_task_implicit = 0x00000002,\n"
25753" ompt_task_explicit = 0x00000004,\n"
25754" ompt_task_target = 0x00000008,\n"
25755" ompt_task_undeferred = 0x08000000,\n"
25756" ompt_task_untied = 0x10000000,\n"
25757" ompt_task_final = 0x20000000,\n"
25758" ompt_task_mergeable = 0x40000000,\n"
25759" ompt_task_merged = 0x80000000\n"
25760"} ompt_task_flag_t;\n"
25761"\n"
25762"typedef enum ompt_task_status_t {\n"
25763" ompt_task_complete = 1,\n"
25764" ompt_task_yield = 2,\n"
25765" ompt_task_cancel = 3,\n"
25766" ompt_task_detach = 4,\n"
25767" ompt_task_early_fulfill = 5,\n"
25768" ompt_task_late_fulfill = 6,\n"
25769" ompt_task_switch = 7\n"
25770"} ompt_task_status_t;\n"
25771"\n"
25772"typedef enum ompt_target_t {\n"
25773" ompt_target = 1,\n"
25774" ompt_target_enter_data = 2,\n"
25775" ompt_target_exit_data = 3,\n"
25776" ompt_target_update = 4\n"
25777"} ompt_target_t;\n"
25778"\n"
25779"typedef enum ompt_parallel_flag_t {\n"
25780" ompt_parallel_invoker_program = 0x00000001,\n"
25781" ompt_parallel_invoker_runtime = 0x00000002,\n"
25782" ompt_parallel_league = 0x40000000,\n"
25783" ompt_parallel_team = 0x80000000\n"
25784"} ompt_parallel_flag_t;\n"
25785"\n"
25786"typedef enum ompt_target_map_flag_t {\n"
25787" ompt_target_map_flag_to = 0x01,\n"
25788" ompt_target_map_flag_from = 0x02,\n"
25789" ompt_target_map_flag_alloc = 0x04,\n"
25790" ompt_target_map_flag_release = 0x08,\n"
25791" ompt_target_map_flag_delete = 0x10,\n"
25792" ompt_target_map_flag_implicit = 0x20\n"
25793"} ompt_target_map_flag_t;\n"
25794"\n"
25795"typedef enum ompt_dependence_type_t {\n"
25796" ompt_dependence_type_in = 1,\n"
25797" ompt_dependence_type_out = 2,\n"
25798" ompt_dependence_type_inout = 3,\n"
25799" ompt_dependence_type_mutexinoutset = 4,\n"
25800" ompt_dependence_type_source = 5,\n"
25801" ompt_dependence_type_sink = 6\n"
25802"} ompt_dependence_type_t;\n"
25803"\n"
25804"typedef enum ompt_cancel_flag_t {\n"
25805" ompt_cancel_parallel = 0x01,\n"
25806" ompt_cancel_sections = 0x02,\n"
25807" ompt_cancel_loop = 0x04,\n"
25808" ompt_cancel_taskgroup = 0x08,\n"
25809" ompt_cancel_activated = 0x10,\n"
25810" ompt_cancel_detected = 0x20,\n"
25811" ompt_cancel_discarded_task = 0x40\n"
25812"} ompt_cancel_flag_t;\n"
25813"\n"
25814"typedef uint64_t ompt_hwid_t;\n"
25815"\n"
25816"typedef uint64_t ompt_wait_id_t;\n"
25817"\n"
25818"typedef enum ompt_frame_flag_t {\n"
25819" ompt_frame_runtime = 0x00,\n"
25820" ompt_frame_application = 0x01,\n"
25821" ompt_frame_cfa = 0x10,\n"
25822" ompt_frame_framepointer = 0x20,\n"
25823" ompt_frame_stackaddress = 0x30\n"
25824"} ompt_frame_flag_t; \n"
25825"\n"
25826"typedef enum ompt_state_t {\n"
25827" ompt_state_work_serial = 0x000,\n"
25828" ompt_state_work_parallel = 0x001,\n"
25829" ompt_state_work_reduction = 0x002,\n"
25830"\n"
25831" ompt_state_wait_barrier = 0x010,\n"
25832" ompt_state_wait_barrier_implicit_parallel = 0x011,\n"
25833" ompt_state_wait_barrier_implicit_workshare = 0x012,\n"
25834" ompt_state_wait_barrier_implicit = 0x013,\n"
25835" ompt_state_wait_barrier_explicit = 0x014,\n"
25836"\n"
25837" ompt_state_wait_taskwait = 0x020,\n"
25838" ompt_state_wait_taskgroup = 0x021,\n"
25839"\n"
25840" ompt_state_wait_mutex = 0x040,\n"
25841" ompt_state_wait_lock = 0x041,\n"
25842" ompt_state_wait_critical = 0x042,\n"
25843" ompt_state_wait_atomic = 0x043,\n"
25844" ompt_state_wait_ordered = 0x044,\n"
25845"\n"
25846" ompt_state_wait_target = 0x080,\n"
25847" ompt_state_wait_target_map = 0x081,\n"
25848" ompt_state_wait_target_update = 0x082,\n"
25849"\n"
25850" ompt_state_idle = 0x100,\n"
25851" ompt_state_overhead = 0x101,\n"
25852" ompt_state_undefined = 0x102\n"
25853"} ompt_state_t;\n"
25854"\n"
25855"typedef uint64_t (*ompt_get_unique_id_t) (void);\n"
25856"\n"
25857"typedef uint64_t ompd_size_t;\n"
25858"\n"
25859"typedef uint64_t ompd_wait_id_t;\n"
25860"\n"
25861"typedef uint64_t ompd_addr_t;\n"
25862"typedef int64_t ompd_word_t;\n"
25863"typedef uint64_t ompd_seg_t;\n"
25864"\n"
25865"typedef uint64_t ompd_device_t;\n"
25866"\n"
25867"typedef uint64_t ompd_thread_id_t;\n"
25868"\n"
25869"typedef enum ompd_scope_t {\n"
25870" ompd_scope_global = 1,\n"
25871" ompd_scope_address_space = 2,\n"
25872" ompd_scope_thread = 3,\n"
25873" ompd_scope_parallel = 4,\n"
25874" ompd_scope_implicit_task = 5,\n"
25875" ompd_scope_task = 6\n"
25876"} ompd_scope_t;\n"
25877"\n"
25878"typedef uint64_t ompd_icv_id_t;\n"
25879"\n"
25880"typedef enum ompd_rc_t {\n"
25881" ompd_rc_ok = 0,\n"
25882" ompd_rc_unavailable = 1,\n"
25883" ompd_rc_stale_handle = 2,\n"
25884" ompd_rc_bad_input = 3,\n"
25885" ompd_rc_error = 4,\n"
25886" ompd_rc_unsupported = 5,\n"
25887" ompd_rc_needs_state_tracking = 6,\n"
25888" ompd_rc_incompatible = 7,\n"
25889" ompd_rc_device_read_error = 8,\n"
25890" ompd_rc_device_write_error = 9,\n"
25891" ompd_rc_nomem = 10,\n"
25892"} ompd_rc_t;\n"
25893"\n"
25894"typedef void (*ompt_interface_fn_t) (void);\n"
25895"\n"
25896"typedef ompt_interface_fn_t (*ompt_function_lookup_t) (\n"
25897" const char *interface_function_name\n"
25898");\n"
25899"\n"
25900"typedef union ompt_data_t {\n"
25901" uint64_t value;\n"
25902" void *ptr;\n"
25903"} ompt_data_t;\n"
25904"\n"
25905"typedef struct ompt_frame_t {\n"
25906" ompt_data_t exit_frame;\n"
25907" ompt_data_t enter_frame;\n"
25908" int exit_frame_flags;\n"
25909" int enter_frame_flags;\n"
25910"} ompt_frame_t;\n"
25911"\n"
25912"typedef void (*ompt_callback_t) (void);\n"
25913"\n"
25914"typedef void ompt_device_t;\n"
25915"\n"
25916"typedef void ompt_buffer_t;\n"
25917"\n"
25918"typedef void (*ompt_callback_buffer_request_t) (\n"
25919" int device_num,\n"
25920" ompt_buffer_t **buffer,\n"
25921" size_t *bytes\n"
25922");\n"
25923"\n"
25924"typedef void (*ompt_callback_buffer_complete_t) (\n"
25925" int device_num,\n"
25926" ompt_buffer_t *buffer,\n"
25927" size_t bytes,\n"
25928" ompt_buffer_cursor_t begin,\n"
25929" int buffer_owned\n"
25930");\n"
25931"\n"
25932"typedef void (*ompt_finalize_t) (\n"
25933" ompt_data_t *tool_data\n"
25934");\n"
25935"\n"
25936"typedef int (*ompt_initialize_t) (\n"
25937" ompt_function_lookup_t lookup,\n"
25938" int initial_device_num,\n"
25939" ompt_data_t *tool_data\n"
25940");\n"
25941"\n"
25942"typedef struct ompt_start_tool_result_t {\n"
25943" ompt_initialize_t initialize;\n"
25944" ompt_finalize_t finalize;\n"
25945" ompt_data_t tool_data;\n"
25946"} ompt_start_tool_result_t;\n"
25947"\n"
25948"typedef struct ompt_record_abstract_t {\n"
25949" ompt_record_native_t rclass;\n"
25950" const char *type;\n"
25951" ompt_device_time_t start_time;\n"
25952" ompt_device_time_t end_time;\n"
25953" ompt_hwid_t hwid;\n"
25954"} ompt_record_abstract_t;\n"
25955"\n"
25956"typedef struct ompt_dependence_t {\n"
25957" ompt_data_t variable;\n"
25958" ompt_dependence_type_t dependence_type;\n"
25959"} ompt_dependence_t;\n"
25960"\n"
25961"typedef int (*ompt_enumerate_states_t) (\n"
25962" int current_state,\n"
25963" int *next_state,\n"
25964" const char **next_state_name\n"
25965");\n"
25966"\n"
25967"typedef int (*ompt_enumerate_mutex_impls_t) (\n"
25968" int current_impl,\n"
25969" int *next_impl,\n"
25970" const char **next_impl_name\n"
25971");\n"
25972"\n"
25973"typedef ompt_set_result_t (*ompt_set_callback_t) (\n"
25974" ompt_callbacks_t event,\n"
25975" ompt_callback_t callback\n"
25976");\n"
25977"\n"
25978"typedef int (*ompt_get_callback_t) (\n"
25979" ompt_callbacks_t event,\n"
25980" ompt_callback_t *callback\n"
25981");\n"
25982"\n"
25983"typedef ompt_data_t *(*ompt_get_thread_data_t) (void);\n"
25984"\n"
25985"typedef int (*ompt_get_num_procs_t) (void);\n"
25986"\n"
25987"typedef int (*ompt_get_num_places_t) (void);\n"
25988"\n"
25989"typedef int (*ompt_get_place_proc_ids_t) (\n"
25990" int place_num,\n"
25991" int ids_size,\n"
25992" int *ids\n"
25993");\n"
25994"\n"
25995"typedef int (*ompt_get_place_num_t) (void);\n"
25996"\n"
25997"typedef int (*ompt_get_partition_place_nums_t) (\n"
25998" int place_nums_size,\n"
25999" int *place_nums\n"
26000");\n"
26001"\n"
26002"typedef int (*ompt_get_proc_id_t) (void);\n"
26003"\n"
26004"typedef int (*ompt_get_state_t) (\n"
26005" ompt_wait_id_t *wait_id\n"
26006");\n"
26007"\n"
26008"typedef int (*ompt_get_parallel_info_t) (\n"
26009" int ancestor_level,\n"
26010" ompt_data_t **parallel_data,\n"
26011" int *team_size\n"
26012");\n"
26013"\n"
26014"typedef int (*ompt_get_task_info_t) (\n"
26015" int ancestor_level,\n"
26016" int *flags,\n"
26017" ompt_data_t **task_data,\n"
26018" ompt_frame_t **task_frame,\n"
26019" ompt_data_t **parallel_data,\n"
26020" int *thread_num\n"
26021");\n"
26022"\n"
26023"typedef int (*ompt_get_task_memory_t)(\n"
26024" void **addr,\n"
26025" size_t *size,\n"
26026" int block\n"
26027");\n"
26028"\n"
26029"typedef int (*ompt_get_target_info_t) (\n"
26030" uint64_t *device_num,\n"
26031" ompt_id_t *target_id,\n"
26032" ompt_id_t *host_op_id\n"
26033");\n"
26034"\n"
26035"typedef int (*ompt_get_num_devices_t) (void);\n"
26036"\n"
26037"typedef void (*ompt_finalize_tool_t) (void);\n"
26038"\n"
26039"typedef int (*ompt_get_device_num_procs_t) (\n"
26040" ompt_device_t *device\n"
26041");\n"
26042"\n"
26043"typedef ompt_device_time_t (*ompt_get_device_time_t) (\n"
26044" ompt_device_t *device\n"
26045");\n"
26046"\n"
26047"typedef double (*ompt_translate_time_t) (\n"
26048" ompt_device_t *device,\n"
26049" ompt_device_time_t time\n"
26050");\n"
26051"\n"
26052"typedef ompt_set_result_t (*ompt_set_trace_ompt_t) (\n"
26053" ompt_device_t *device,\n"
26054" unsigned int enable,\n"
26055" unsigned int etype\n"
26056");\n"
26057"\n"
26058"typedef ompt_set_result_t (*ompt_set_trace_native_t) (\n"
26059" ompt_device_t *device,\n"
26060" int enable,\n"
26061" int flags\n"
26062");\n"
26063"\n"
26064"typedef int (*ompt_start_trace_t) (\n"
26065" ompt_device_t *device,\n"
26066" ompt_callback_buffer_request_t request,\n"
26067" ompt_callback_buffer_complete_t complete\n"
26068");\n"
26069"\n"
26070"typedef int (*ompt_pause_trace_t) (\n"
26071" ompt_device_t *device,\n"
26072" int begin_pause\n"
26073");\n"
26074"\n"
26075"typedef int (*ompt_flush_trace_t) (\n"
26076" ompt_device_t *device\n"
26077");\n"
26078"\n"
26079"typedef int (*ompt_stop_trace_t) (\n"
26080" ompt_device_t *device\n"
26081");\n"
26082"\n"
26083"typedef int (*ompt_advance_buffer_cursor_t) (\n"
26084" ompt_device_t *device,\n"
26085" ompt_buffer_t *buffer,\n"
26086" size_t size,\n"
26087" ompt_buffer_cursor_t current,\n"
26088" ompt_buffer_cursor_t *next\n"
26089");\n"
26090"\n"
26091"typedef ompt_record_t (*ompt_get_record_type_t) (\n"
26092" ompt_buffer_t *buffer,\n"
26093" ompt_buffer_cursor_t current\n"
26094");\n"
26095"\n"
26096"typedef void *(*ompt_get_record_native_t) (\n"
26097" ompt_buffer_t *buffer,\n"
26098" ompt_buffer_cursor_t current,\n"
26099" ompt_id_t *host_op_id\n"
26100");\n"
26101"\n"
26102"typedef ompt_record_abstract_t *\n"
26103"(*ompt_get_record_abstract_t) (\n"
26104" void *native_record\n"
26105");\n"
26106"\n"
26107"typedef void (*ompt_callback_thread_begin_t) (\n"
26108" ompt_thread_t thread_type,\n"
26109" ompt_data_t *thread_data\n"
26110");\n"
26111"\n"
26112"typedef struct ompt_record_thread_begin_t {\n"
26113" ompt_thread_t thread_type;\n"
26114"} ompt_record_thread_begin_t;\n"
26115"\n"
26116"typedef void (*ompt_callback_thread_end_t) (\n"
26117" ompt_data_t *thread_data\n"
26118");\n"
26119"\n"
26120"typedef void (*ompt_callback_parallel_begin_t) (\n"
26121" ompt_data_t *encountering_task_data,\n"
26122" const ompt_frame_t *encountering_task_frame,\n"
26123" ompt_data_t *parallel_data,\n"
26124" unsigned int requested_parallelism,\n"
26125" int flags,\n"
26126" const void *codeptr_ra\n"
26127");\n"
26128"\n"
26129"typedef struct ompt_record_parallel_begin_t {\n"
26130" ompt_id_t encountering_task_id;\n"
26131" ompt_id_t parallel_id;\n"
26132" unsigned int requested_parallelism;\n"
26133" int flags;\n"
26134" const void *codeptr_ra;\n"
26135"} ompt_record_parallel_begin_t;\n"
26136"\n"
26137"typedef void (*ompt_callback_parallel_end_t) (\n"
26138" ompt_data_t *parallel_data,\n"
26139" ompt_data_t *encountering_task_data,\n"
26140" int flags,\n"
26141" const void *codeptr_ra\n"
26142");\n"
26143"\n"
26144"typedef struct ompt_record_parallel_end_t {\n"
26145" ompt_id_t parallel_id;\n"
26146" ompt_id_t encountering_task_id;\n"
26147" int flags;\n"
26148" const void *codeptr_ra;\n"
26149"} ompt_record_parallel_end_t;\n"
26150"\n"
26151"typedef void (*ompt_callback_work_t) (\n"
26152" ompt_work_t wstype,\n"
26153" ompt_scope_endpoint_t endpoint,\n"
26154" ompt_data_t *parallel_data,\n"
26155" ompt_data_t *task_data,\n"
26156" uint64_t count,\n"
26157" const void *codeptr_ra\n"
26158");\n"
26159"\n"
26160"typedef struct ompt_record_work_t {\n"
26161" ompt_work_t wstype;\n"
26162" ompt_scope_endpoint_t endpoint;\n"
26163" ompt_id_t parallel_id;\n"
26164" ompt_id_t task_id;\n"
26165" uint64_t count;\n"
26166" const void *codeptr_ra;\n"
26167"} ompt_record_work_t;\n"
26168"\n"
26169"typedef void (*ompt_callback_dispatch_t) (\n"
26170" ompt_data_t *parallel_data,\n"
26171" ompt_data_t *task_data,\n"
26172" ompt_dispatch_t kind,\n"
26173" ompt_data_t instance \n"
26174");\n"
26175"\n"
26176"typedef struct ompt_record_dispatch_t {\n"
26177" ompt_id_t parallel_id;\n"
26178" ompt_id_t task_id;\n"
26179" ompt_dispatch_t kind;\n"
26180" ompt_data_t instance; \n"
26181"} ompt_record_dispatch_t;\n"
26182"\n"
26183"typedef void (*ompt_callback_task_create_t) (\n"
26184" ompt_data_t *encountering_task_data,\n"
26185" const ompt_frame_t *encountering_task_frame,\n"
26186" ompt_data_t *new_task_data,\n"
26187" int flags,\n"
26188" int has_dependences,\n"
26189" const void *codeptr_ra\n"
26190");\n"
26191"\n"
26192"typedef struct ompt_record_task_create_t {\n"
26193" ompt_id_t encountering_task_id;\n"
26194" ompt_id_t new_task_id;\n"
26195" int flags;\n"
26196" int has_dependences;\n"
26197" const void *codeptr_ra;\n"
26198"} ompt_record_task_create_t;\n"
26199"\n"
26200"typedef void (*ompt_callback_dependences_t) (\n"
26201" ompt_data_t *task_data,\n"
26202" const ompt_dependence_t *deps,\n"
26203" int ndeps\n"
26204");\n"
26205"\n"
26206"typedef struct ompt_record_dependences_t {\n"
26207" ompt_id_t task_id;\n"
26208" ompt_dependence_t dep;\n"
26209" int ndeps;\n"
26210"} ompt_record_dependences_t;\n"
26211"\n"
26212"typedef void (*ompt_callback_task_dependence_t) (\n"
26213" ompt_data_t *src_task_data,\n"
26214" ompt_data_t *sink_task_data\n"
26215");\n"
26216"\n"
26217"typedef struct ompt_record_task_dependence_t {\n"
26218" ompt_id_t src_task_id;\n"
26219" ompt_id_t sink_task_id;\n"
26220"} ompt_record_task_dependence_t;\n"
26221"\n"
26222"typedef void (*ompt_callback_task_schedule_t) (\n"
26223" ompt_data_t *prior_task_data,\n"
26224" ompt_task_status_t prior_task_status,\n"
26225" ompt_data_t *next_task_data\n"
26226");\n"
26227"\n"
26228"typedef struct ompt_record_task_schedule_t {\n"
26229" ompt_id_t prior_task_id;\n"
26230" ompt_task_status_t prior_task_status;\n"
26231" ompt_id_t next_task_id;\n"
26232"} ompt_record_task_schedule_t;\n"
26233"\n"
26234"typedef void (*ompt_callback_implicit_task_t) (\n"
26235" ompt_scope_endpoint_t endpoint,\n"
26236" ompt_data_t *parallel_data,\n"
26237" ompt_data_t *task_data,\n"
26238" unsigned int actual_parallelism,\n"
26239" unsigned int index,\n"
26240" int flags\n"
26241");\n"
26242"\n"
26243"typedef struct ompt_record_implicit_task_t {\n"
26244" ompt_scope_endpoint_t endpoint;\n"
26245" ompt_id_t parallel_id;\n"
26246" ompt_id_t task_id;\n"
26247" unsigned int actual_parallelism;\n"
26248" unsigned int index;\n"
26249" int flags;\n"
26250"} ompt_record_implicit_task_t;\n"
26251"\n"
26252"typedef void (*ompt_callback_master_t) (\n"
26253" ompt_scope_endpoint_t endpoint,\n"
26254" ompt_data_t *parallel_data,\n"
26255" ompt_data_t *task_data,\n"
26256" const void *codeptr_ra\n"
26257");\n"
26258"\n"
26259"typedef struct ompt_record_master_t {\n"
26260" ompt_scope_endpoint_t endpoint;\n"
26261" ompt_id_t parallel_id;\n"
26262" ompt_id_t task_id;\n"
26263" const void *codeptr_ra;\n"
26264"} ompt_record_master_t;\n"
26265"\n"
26266"typedef void (*ompt_callback_sync_region_t) (\n"
26267" ompt_sync_region_t kind,\n"
26268" ompt_scope_endpoint_t endpoint,\n"
26269" ompt_data_t *parallel_data,\n"
26270" ompt_data_t *task_data,\n"
26271" const void *codeptr_ra\n"
26272");\n"
26273"\n"
26274"typedef struct ompt_record_sync_region_t {\n"
26275" ompt_sync_region_t kind;\n"
26276" ompt_scope_endpoint_t endpoint;\n"
26277" ompt_id_t parallel_id;\n"
26278" ompt_id_t task_id;\n"
26279" const void *codeptr_ra;\n"
26280"} ompt_record_sync_region_t;\n"
26281"\n"
26282"typedef void (*ompt_callback_mutex_acquire_t) (\n"
26283" ompt_mutex_t kind,\n"
26284" unsigned int hint,\n"
26285" unsigned int impl,\n"
26286" ompt_wait_id_t wait_id,\n"
26287" const void *codeptr_ra\n"
26288");\n"
26289"\n"
26290"typedef struct ompt_record_mutex_acquire_t {\n"
26291" ompt_mutex_t kind;\n"
26292" unsigned int hint;\n"
26293" unsigned int impl;\n"
26294" ompt_wait_id_t wait_id;\n"
26295" const void *codeptr_ra;\n"
26296"} ompt_record_mutex_acquire_t;\n"
26297"\n"
26298"typedef void (*ompt_callback_mutex_t) (\n"
26299" ompt_mutex_t kind,\n"
26300" ompt_wait_id_t wait_id,\n"
26301" const void *codeptr_ra\n"
26302");\n"
26303"\n"
26304"typedef struct ompt_record_mutex_t {\n"
26305" ompt_mutex_t kind;\n"
26306" ompt_wait_id_t wait_id;\n"
26307" const void *codeptr_ra;\n"
26308"} ompt_record_mutex_t;\n"
26309"\n"
26310"typedef void (*ompt_callback_nest_lock_t) (\n"
26311" ompt_scope_endpoint_t endpoint,\n"
26312" ompt_wait_id_t wait_id,\n"
26313" const void *codeptr_ra\n"
26314");\n"
26315"\n"
26316"typedef struct ompt_record_nest_lock_t {\n"
26317" ompt_scope_endpoint_t endpoint;\n"
26318" ompt_wait_id_t wait_id;\n"
26319" const void *codeptr_ra;\n"
26320"} ompt_record_nest_lock_t;\n"
26321"\n"
26322"typedef void (*ompt_callback_flush_t) (\n"
26323" ompt_data_t *thread_data,\n"
26324" const void *codeptr_ra\n"
26325");\n"
26326"\n"
26327"typedef struct ompt_record_flush_t {\n"
26328" const void *codeptr_ra;\n"
26329"} ompt_record_flush_t;\n"
26330"\n"
26331"typedef void (*ompt_callback_cancel_t) (\n"
26332" ompt_data_t *task_data,\n"
26333" int flags,\n"
26334" const void *codeptr_ra\n"
26335");\n"
26336"\n"
26337"typedef struct ompt_record_cancel_t {\n"
26338" ompt_id_t task_id;\n"
26339" int flags;\n"
26340" const void *codeptr_ra;\n"
26341"} ompt_record_cancel_t;\n"
26342"\n"
26343"typedef void (*ompt_callback_device_initialize_t) (\n"
26344" int device_num,\n"
26345" const char *type,\n"
26346" ompt_device_t *device,\n"
26347" ompt_function_lookup_t lookup,\n"
26348" const char *documentation\n"
26349");\n"
26350"\n"
26351"typedef void (*ompt_callback_device_finalize_t) (\n"
26352" int device_num\n"
26353");\n"
26354"\n"
26355"typedef void (*ompt_callback_device_load_t) (\n"
26356" int device_num,\n"
26357" const char *filename,\n"
26358" int64_t offset_in_file,\n"
26359" void *vma_in_file,\n"
26360" size_t bytes,\n"
26361" void *host_addr,\n"
26362" void *device_addr,\n"
26363" uint64_t module_id\n"
26364");\n"
26365"\n"
26366"typedef void (*ompt_callback_device_unload_t) (\n"
26367" int device_num,\n"
26368" uint64_t module_id\n"
26369");\n"
26370"\n"
26371"typedef void (*ompt_callback_target_data_op_t) (\n"
26372" ompt_id_t target_id,\n"
26373" ompt_id_t host_op_id,\n"
26374" ompt_target_data_op_t optype,\n"
26375" void *src_addr,\n"
26376" int src_device_num,\n"
26377" void *dest_addr,\n"
26378" int dest_device_num,\n"
26379" size_t bytes,\n"
26380" const void *codeptr_ra\n"
26381");\n"
26382"\n"
26383"typedef struct ompt_record_target_data_op_t {\n"
26384" ompt_id_t host_op_id;\n"
26385" ompt_target_data_op_t optype;\n"
26386" void *src_addr;\n"
26387" int src_device_num;\n"
26388" void *dest_addr;\n"
26389" int dest_device_num;\n"
26390" size_t bytes;\n"
26391" ompt_device_time_t end_time;\n"
26392" const void *codeptr_ra;\n"
26393"} ompt_record_target_data_op_t;\n"
26394"\n"
26395"typedef void (*ompt_callback_target_t) (\n"
26396" ompt_target_t kind,\n"
26397" ompt_scope_endpoint_t endpoint,\n"
26398" int device_num,\n"
26399" ompt_data_t *task_data,\n"
26400" ompt_id_t target_id,\n"
26401" const void *codeptr_ra\n"
26402");\n"
26403"\n"
26404"typedef struct ompt_record_target_t {\n"
26405" ompt_target_t kind;\n"
26406" ompt_scope_endpoint_t endpoint;\n"
26407" int device_num;\n"
26408" ompt_id_t task_id;\n"
26409" ompt_id_t target_id;\n"
26410" const void *codeptr_ra;\n"
26411"} ompt_record_target_t;\n"
26412"\n"
26413"typedef void (*ompt_callback_target_map_t) (\n"
26414" ompt_id_t target_id,\n"
26415" unsigned int nitems,\n"
26416" void **host_addr,\n"
26417" void **device_addr,\n"
26418" size_t *bytes,\n"
26419" unsigned int *mapping_flags,\n"
26420" const void *codeptr_ra\n"
26421");\n"
26422"\n"
26423"typedef struct ompt_record_target_map_t {\n"
26424" ompt_id_t target_id;\n"
26425" unsigned int nitems;\n"
26426" void **host_addr;\n"
26427" void **device_addr;\n"
26428" size_t *bytes;\n"
26429" unsigned int *mapping_flags;\n"
26430" const void *codeptr_ra;\n"
26431"} ompt_record_target_map_t;\n"
26432"\n"
26433"typedef void (*ompt_callback_target_submit_t) (\n"
26434" ompt_id_t target_id,\n"
26435" ompt_id_t host_op_id,\n"
26436" unsigned int requested_num_teams\n"
26437");\n"
26438"\n"
26439"typedef struct ompt_record_target_kernel_t {\n"
26440" ompt_id_t host_op_id;\n"
26441" unsigned int requested_num_teams;\n"
26442" unsigned int granted_num_teams;\n"
26443" ompt_device_time_t end_time;\n"
26444"} ompt_record_target_kernel_t;\n"
26445"\n"
26446"typedef int (*ompt_callback_control_tool_t) (\n"
26447" uint64_t command,\n"
26448" uint64_t modifier,\n"
26449" void *arg,\n"
26450" const void *codeptr_ra\n"
26451");\n"
26452"\n"
26453"typedef struct ompt_record_control_tool_t {\n"
26454" uint64_t command;\n"
26455" uint64_t modifier;\n"
26456" const void *codeptr_ra;\n"
26457"} ompt_record_control_tool_t;\n"
26458"\n"
26459"typedef struct ompd_address_t {\n"
26460" ompd_seg_t segment;\n"
26461" ompd_addr_t address;\n"
26462"} ompd_address_t;\n"
26463"\n"
26464"typedef struct ompd_frame_info_t {\n"
26465" ompd_address_t frame_address;\n"
26466" ompd_word_t frame_flag;\n"
26467"} ompd_frame_info_t;\n"
26468"\n"
26469"typedef struct _ompd_aspace_handle ompd_address_space_handle_t;\n"
26470"typedef struct _ompd_thread_handle ompd_thread_handle_t;\n"
26471"typedef struct _ompd_parallel_handle ompd_parallel_handle_t;\n"
26472"typedef struct _ompd_task_handle ompd_task_handle_t;\n"
26473"\n"
26474"typedef struct _ompd_aspace_cont ompd_address_space_context_t;\n"
26475"typedef struct _ompd_thread_cont ompd_thread_context_t;\n"
26476"\n"
26477"typedef struct ompd_device_type_sizes_t {\n"
26478" uint8_t sizeof_char;\n"
26479" uint8_t sizeof_short;\n"
26480" uint8_t sizeof_int;\n"
26481" uint8_t sizeof_long;\n"
26482" uint8_t sizeof_long_long;\n"
26483" uint8_t sizeof_pointer;\n"
26484"} ompd_device_type_sizes_t;\n"
26485"\n"
26486"typedef struct ompt_record_ompt_t {\n"
26487" ompt_callbacks_t type;\n"
26488" ompt_device_time_t time;\n"
26489" ompt_id_t thread_id;\n"
26490" ompt_id_t target_id;\n"
26491" union {\n"
26492" ompt_record_thread_begin_t thread_begin;\n"
26493" ompt_record_parallel_begin_t parallel_begin;\n"
26494" ompt_record_parallel_end_t parallel_end;\n"
26495" ompt_record_work_t work;\n"
26496" ompt_record_dispatch_t dispatch;\n"
26497" ompt_record_task_create_t task_create;\n"
26498" ompt_record_dependences_t dependences;\n"
26499" ompt_record_task_dependence_t task_dependence;\n"
26500" ompt_record_task_schedule_t task_schedule;\n"
26501" ompt_record_implicit_task_t implicit_task;\n"
26502" ompt_record_master_t master;\n"
26503" ompt_record_sync_region_t sync_region;\n"
26504" ompt_record_mutex_acquire_t mutex_acquire;\n"
26505" ompt_record_mutex_t mutex;\n"
26506" ompt_record_nest_lock_t nest_lock;\n"
26507" ompt_record_flush_t flush;\n"
26508" ompt_record_cancel_t cancel;\n"
26509" ompt_record_target_t target;\n"
26510" ompt_record_target_data_op_t target_data_op;\n"
26511" ompt_record_target_map_t target_map;\n"
26512" ompt_record_target_kernel_t target_kernel;\n"
26513" ompt_record_control_tool_t control_tool;\n"
26514" } record;\n"
26515"} ompt_record_ompt_t;\n"
26516"\n"
26517"typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (\n"
26518" ompt_buffer_t *buffer,\n"
26519" ompt_buffer_cursor_t current\n"
26520");\n"
26521"\n"
26522"#define ompt_id_none 0\n"
26523"#define ompt_data_none {0}\n"
26524"#define ompt_time_none 0\n"
26525"#define ompt_hwid_none 0\n"
26526"#define ompt_addr_none ~0\n"
26527"#define ompt_mutex_impl_none 0\n"
26528"#define ompt_wait_id_none 0\n"
26529"\n"
26530"#define ompd_segment_none 0\n"
26531"\n"
26532"#endif /* __OMPT__ */\n"
26533"" } ,
26534 { "/builtins/omp.h" , "/*\n"
26535" * include/50/omp.h.var\n"
26536" */\n"
26537"\n"
26538"\n"
26539"//===----------------------------------------------------------------------===//\n"
26540"//\n"
26541"// The LLVM Compiler Infrastructure\n"
26542"//\n"
26543"// This file is dual licensed under the MIT and the University of Illinois Open\n"
26544"// Source Licenses. See LICENSE.txt for details.\n"
26545"//\n"
26546"//===----------------------------------------------------------------------===//\n"
26547"\n"
26548"\n"
26549"#ifndef __OMP_H\n"
26550"# define __OMP_H\n"
26551"\n"
26552"# define KMP_VERSION_MAJOR 5\n"
26553"# define KMP_VERSION_MINOR 0\n"
26554"# define KMP_VERSION_BUILD 20140926\n"
26555"# define KMP_BUILD_DATE \"No_Timestamp\"\n"
26556"\n"
26557"# ifdef __cplusplus\n"
26558" extern \"C\" {\n"
26559"# endif\n"
26560"\n"
26561"# define omp_set_affinity_format ompc_set_affinity_format\n"
26562"# define omp_get_affinity_format ompc_get_affinity_format\n"
26563"# define omp_display_affinity ompc_display_affinity\n"
26564"# define omp_capture_affinity ompc_capture_affinity\n"
26565"\n"
26566"# if defined(_WIN32)\n"
26567"# define __KAI_KMPC_CONVENTION __cdecl\n"
26568"# ifndef __KMP_IMP\n"
26569"# define __KMP_IMP __declspec(dllimport)\n"
26570"# endif\n"
26571"# else\n"
26572"# define __KAI_KMPC_CONVENTION\n"
26573"# ifndef __KMP_IMP\n"
26574"# define __KMP_IMP\n"
26575"# endif\n"
26576"# endif\n"
26577"\n"
26578" /* schedule kind constants */\n"
26579" typedef enum omp_sched_t {\n"
26580" omp_sched_static = 1,\n"
26581" omp_sched_dynamic = 2,\n"
26582" omp_sched_guided = 3,\n"
26583" omp_sched_auto = 4\n"
26584" } omp_sched_t;\n"
26585"\n"
26586" /* set API functions */\n"
26587" extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int);\n"
26588" extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int);\n"
26589" extern void __KAI_KMPC_CONVENTION omp_set_nested (int);\n"
26590" extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int);\n"
26591" extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int);\n"
26592"\n"
26593" /* query API functions */\n"
26594" extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void);\n"
26595" extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void);\n"
26596" extern int __KAI_KMPC_CONVENTION omp_get_nested (void);\n"
26597" extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void);\n"
26598" extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void);\n"
26599" extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void);\n"
26600" extern int __KAI_KMPC_CONVENTION omp_in_parallel (void);\n"
26601" extern int __KAI_KMPC_CONVENTION omp_in_final (void);\n"
26602" extern int __KAI_KMPC_CONVENTION omp_get_active_level (void);\n"
26603" extern int __KAI_KMPC_CONVENTION omp_get_level (void);\n"
26604" extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int);\n"
26605" extern int __KAI_KMPC_CONVENTION omp_get_team_size (int);\n"
26606" extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void);\n"
26607" extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void);\n"
26608" extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *);\n"
26609" extern int __KAI_KMPC_CONVENTION omp_get_max_task_priority (void);\n"
26610"\n"
26611" /* lock API functions */\n"
26612" typedef struct omp_lock_t {\n"
26613" void * _lk;\n"
26614" } omp_lock_t;\n"
26615"\n"
26616" extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *);\n"
26617" extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *);\n"
26618" extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *);\n"
26619" extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *);\n"
26620" extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *);\n"
26621"\n"
26622" /* nested lock API functions */\n"
26623" typedef struct omp_nest_lock_t {\n"
26624" void * _lk;\n"
26625" } omp_nest_lock_t;\n"
26626"\n"
26627" extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *);\n"
26628" extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *);\n"
26629" extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *);\n"
26630" extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *);\n"
26631" extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *);\n"
26632"\n"
26633" /* OpenMP 5.0 Synchronization hints*/\n"
26634" typedef enum omp_sync_hint_t {\n"
26635" omp_sync_hint_none = 0,\n"
26636" omp_lock_hint_none = omp_sync_hint_none,\n"
26637" omp_sync_hint_uncontended = 1,\n"
26638" omp_lock_hint_uncontended = omp_sync_hint_uncontended,\n"
26639" omp_sync_hint_contended = (1<<1),\n"
26640" omp_lock_hint_contended = omp_sync_hint_contended,\n"
26641" omp_sync_hint_nonspeculative = (1<<2),\n"
26642" omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative,\n"
26643" omp_sync_hint_speculative = (1<<3),\n"
26644" omp_lock_hint_speculative = omp_sync_hint_speculative,\n"
26645" kmp_lock_hint_hle = (1<<16),\n"
26646" kmp_lock_hint_rtm = (1<<17),\n"
26647" kmp_lock_hint_adaptive = (1<<18)\n"
26648" } omp_sync_hint_t;\n"
26649"\n"
26650" /* lock hint type for dynamic user lock */\n"
26651" typedef omp_sync_hint_t omp_lock_hint_t;\n"
26652"\n"
26653" /* hinted lock initializers */\n"
26654" extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t);\n"
26655" extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t);\n"
26656"\n"
26657" /* time API functions */\n"
26658" extern double __KAI_KMPC_CONVENTION omp_get_wtime (void);\n"
26659" extern double __KAI_KMPC_CONVENTION omp_get_wtick (void);\n"
26660"\n"
26661" /* OpenMP 4.0 */\n"
26662" extern int __KAI_KMPC_CONVENTION omp_get_default_device (void);\n"
26663" extern void __KAI_KMPC_CONVENTION omp_set_default_device (int);\n"
26664" extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void);\n"
26665" extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void);\n"
26666" extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void);\n"
26667" extern int __KAI_KMPC_CONVENTION omp_get_team_num (void);\n"
26668" extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void);\n"
26669"\n"
26670"# include <stdlib.h>\n"
26671" /* OpenMP 4.5 */\n"
26672" extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void);\n"
26673" extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int);\n"
26674" extern void __KAI_KMPC_CONVENTION omp_target_free(void *, int);\n"
26675" extern int __KAI_KMPC_CONVENTION omp_target_is_present(void *, int);\n"
26676" extern int __KAI_KMPC_CONVENTION omp_target_memcpy(void *, void *, size_t, size_t, size_t, int, int);\n"
26677" extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect(void *, void *, size_t, int, const size_t *,\n"
26678" const size_t *, const size_t *, const size_t *, const size_t *, int, int);\n"
26679" extern int __KAI_KMPC_CONVENTION omp_target_associate_ptr(void *, void *, size_t, size_t, int);\n"
26680" extern int __KAI_KMPC_CONVENTION omp_target_disassociate_ptr(void *, int);\n"
26681"\n"
26682" /* OpenMP 5.0 */\n"
26683" extern int __KAI_KMPC_CONVENTION omp_get_device_num (void);\n"
26684"\n"
26685" /* kmp API functions */\n"
26686" extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void);\n"
26687" extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int);\n"
26688" extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void);\n"
26689" extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t);\n"
26690" extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void);\n"
26691" extern int __KAI_KMPC_CONVENTION kmp_get_library (void);\n"
26692" extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int);\n"
26693" extern void __KAI_KMPC_CONVENTION kmp_set_library (int);\n"
26694" extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void);\n"
26695" extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void);\n"
26696" extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void);\n"
26697" extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *);\n"
26698" extern void __KAI_KMPC_CONVENTION kmp_set_disp_num_buffers (int);\n"
26699"\n"
26700" /* Intel affinity API */\n"
26701" typedef void * kmp_affinity_mask_t;\n"
26702"\n"
26703" extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *);\n"
26704" extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *);\n"
26705" extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void);\n"
26706" extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *);\n"
26707" extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *);\n"
26708" extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *);\n"
26709" extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);\n"
26710" extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *);\n"
26711"\n"
26712" /* OpenMP 4.0 affinity API */\n"
26713" typedef enum omp_proc_bind_t {\n"
26714" omp_proc_bind_false = 0,\n"
26715" omp_proc_bind_true = 1,\n"
26716" omp_proc_bind_master = 2,\n"
26717" omp_proc_bind_close = 3,\n"
26718" omp_proc_bind_spread = 4\n"
26719" } omp_proc_bind_t;\n"
26720"\n"
26721" extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void);\n"
26722"\n"
26723" /* OpenMP 4.5 affinity API */\n"
26724" extern int __KAI_KMPC_CONVENTION omp_get_num_places (void);\n"
26725" extern int __KAI_KMPC_CONVENTION omp_get_place_num_procs (int);\n"
26726" extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *);\n"
26727" extern int __KAI_KMPC_CONVENTION omp_get_place_num (void);\n"
26728" extern int __KAI_KMPC_CONVENTION omp_get_partition_num_places (void);\n"
26729" extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *);\n"
26730"\n"
26731" extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t);\n"
26732" extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t);\n"
26733" extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t);\n"
26734" extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t);\n"
26735" extern void __KAI_KMPC_CONVENTION kmp_free (void *);\n"
26736"\n"
26737" extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void);\n"
26738" extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void);\n"
26739"\n"
26740" /* OpenMP 5.0 Tool Control */\n"
26741" typedef enum omp_control_tool_result_t {\n"
26742" omp_control_tool_notool = -2,\n"
26743" omp_control_tool_nocallback = -1,\n"
26744" omp_control_tool_success = 0,\n"
26745" omp_control_tool_ignored = 1\n"
26746" } omp_control_tool_result_t;\n"
26747"\n"
26748" typedef enum omp_control_tool_t {\n"
26749" omp_control_tool_start = 1,\n"
26750" omp_control_tool_pause = 2,\n"
26751" omp_control_tool_flush = 3,\n"
26752" omp_control_tool_end = 4\n"
26753" } omp_control_tool_t;\n"
26754" \n"
26755" extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*);\n"
26756"\n"
26757" /* OpenMP 5.0 Memory Management */\n"
26758" typedef void *omp_allocator_t;\n"
26759" extern __KMP_IMP const omp_allocator_t *OMP_NULL_ALLOCATOR;\n"
26760" extern __KMP_IMP const omp_allocator_t *omp_default_mem_alloc;\n"
26761" extern __KMP_IMP const omp_allocator_t *omp_large_cap_mem_alloc;\n"
26762" extern __KMP_IMP const omp_allocator_t *omp_const_mem_alloc;\n"
26763" extern __KMP_IMP const omp_allocator_t *omp_high_bw_mem_alloc;\n"
26764" extern __KMP_IMP const omp_allocator_t *omp_low_lat_mem_alloc;\n"
26765" extern __KMP_IMP const omp_allocator_t *omp_cgroup_mem_alloc;\n"
26766" extern __KMP_IMP const omp_allocator_t *omp_pteam_mem_alloc;\n"
26767" extern __KMP_IMP const omp_allocator_t *omp_thread_mem_alloc;\n"
26768"\n"
26769" extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(const omp_allocator_t *);\n"
26770" extern const omp_allocator_t * __KAI_KMPC_CONVENTION omp_get_default_allocator(void);\n"
26771"#ifdef __cplusplus\n"
26772" extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR);\n"
26773" extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR);\n"
26774"#else\n"
26775" extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator);\n"
26776" extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator);\n"
26777"#endif\n"
26778"\n"
26779" /* OpenMP 5.0 Affinity Format */\n"
26780" extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *);\n"
26781" extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t);\n"
26782" extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *);\n"
26783" extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *);\n"
26784"\n"
26785"# undef __KAI_KMPC_CONVENTION\n"
26786"# undef __KMP_IMP\n"
26787"\n"
26788" /* Warning:\n"
26789" The following typedefs are not standard, deprecated and will be removed in a future release.\n"
26790" */\n"
26791" typedef int omp_int_t;\n"
26792" typedef double omp_wtime_t;\n"
26793"\n"
26794"# ifdef __cplusplus\n"
26795" }\n"
26796"# endif\n"
26797"\n"
26798"#endif /* __OMP_H */\n"
26799"" } ,
26800 { "/builtins/ompt.h" , "/*\n"
26801" * include/50/omp-tools.h.var\n"
26802" */\n"
26803"\n"
26804"//===----------------------------------------------------------------------===//\n"
26805"//\n"
26806"// The LLVM Compiler Infrastructure\n"
26807"//\n"
26808"// This file is dual licensed under the MIT and the University of Illinois Open\n"
26809"// Source Licenses. See LICENSE.txt for details.\n"
26810"//\n"
26811"//===----------------------------------------------------------------------===//\n"
26812"\n"
26813"#ifndef __OMPT__\n"
26814"#define __OMPT__\n"
26815"\n"
26816"/*****************************************************************************\n"
26817" * system include files\n"
26818" *****************************************************************************/\n"
26819"\n"
26820"#include <stdint.h>\n"
26821"#include <stddef.h>\n"
26822"\n"
26823"/*****************************************************************************\n"
26824" * iteration macros\n"
26825" *****************************************************************************/\n"
26826"\n"
26827"#define FOREACH_OMPT_INQUIRY_FN(macro) \\\n"
26828" macro (ompt_enumerate_states) \\\n"
26829" macro (ompt_enumerate_mutex_impls) \\\n"
26830" \\\n"
26831" macro (ompt_set_callback) \\\n"
26832" macro (ompt_get_callback) \\\n"
26833" \\\n"
26834" macro (ompt_get_state) \\\n"
26835" \\\n"
26836" macro (ompt_get_parallel_info) \\\n"
26837" macro (ompt_get_task_info) \\\n"
26838" macro (ompt_get_task_memory) \\\n"
26839" macro (ompt_get_thread_data) \\\n"
26840" macro (ompt_get_unique_id) \\\n"
26841" macro (ompt_finalize_tool) \\\n"
26842" \\\n"
26843" macro(ompt_get_num_procs) \\\n"
26844" macro(ompt_get_num_places) \\\n"
26845" macro(ompt_get_place_proc_ids) \\\n"
26846" macro(ompt_get_place_num) \\\n"
26847" macro(ompt_get_partition_place_nums) \\\n"
26848" macro(ompt_get_proc_id) \\\n"
26849" \\\n"
26850" macro(ompt_get_target_info) \\\n"
26851" macro(ompt_get_num_devices)\n"
26852"\n"
26853"#define FOREACH_OMPT_STATE(macro) \\\n"
26854" \\\n"
26855" /* first available state */ \\\n"
26856" macro (ompt_state_undefined, 0x102) /* undefined thread state */ \\\n"
26857" \\\n"
26858" /* work states (0..15) */ \\\n"
26859" macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \\\n"
26860" macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \\\n"
26861" macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \\\n"
26862" \\\n"
26863" /* barrier wait states (16..31) */ \\\n"
26864" macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \\\n"
26865" macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \\\n"
26866" /* implicit barrier at the end of parallel region */\\\n"
26867" macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \\\n"
26868" /* implicit barrier at the end of worksharing */ \\\n"
26869" macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \\\n"
26870" macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \\\n"
26871" \\\n"
26872" /* task wait states (32..63) */ \\\n"
26873" macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \\\n"
26874" macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \\\n"
26875" \\\n"
26876" /* mutex wait states (64..127) */ \\\n"
26877" macro (ompt_state_wait_mutex, 0x040) \\\n"
26878" macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \\\n"
26879" macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \\\n"
26880" macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \\\n"
26881" macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \\\n"
26882" \\\n"
26883" /* target wait states (128..255) */ \\\n"
26884" macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \\\n"
26885" macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \\\n"
26886" macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \\\n"
26887" \\\n"
26888" /* misc (256..511) */ \\\n"
26889" macro (ompt_state_idle, 0x100) /* waiting for work */ \\\n"
26890" macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \\\n"
26891" \\\n"
26892" /* implementation-specific states (512..) */\n"
26893"\n"
26894"\n"
26895"#define FOREACH_KMP_MUTEX_IMPL(macro) \\\n"
26896" macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \\\n"
26897" macro (kmp_mutex_impl_spin, 1) /* based on spin */ \\\n"
26898" macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \\\n"
26899" macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */\n"
26900"\n"
26901"#define FOREACH_OMPT_EVENT(macro) \\\n"
26902" \\\n"
26903" /*--- Mandatory Events ---*/ \\\n"
26904" macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \\\n"
26905" macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \\\n"
26906" \\\n"
26907" macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \\\n"
26908" macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \\\n"
26909" \\\n"
26910" macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \\\n"
26911" macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \\\n"
26912" macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \\\n"
26913" \\\n"
26914" macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \\\n"
26915" macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \\\n"
26916" macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \\\n"
26917" \\\n"
26918" macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \\\n"
26919" \\\n"
26920" macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \\\n"
26921" macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \\\n"
26922" \\\n"
26923" macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \\\n"
26924" macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \\\n"
26925" \\\n"
26926" /* Optional Events */ \\\n"
26927" macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \\\n"
26928" \\\n"
26929" macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \\\n"
26930" \\\n"
26931" macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \\\n"
26932" macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \\\n"
26933" \\\n"
26934" macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \\\n"
26935" \\\n"
26936" macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \\\n"
26937" \\\n"
26938" macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \\\n"
26939" \\\n"
26940" macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \\\n"
26941" \\\n"
26942" macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \\\n"
26943" macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \\\n"
26944" \\\n"
26945" macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \\\n"
26946" macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \\\n"
26947" \\\n"
26948" macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \\\n"
26949" \\\n"
26950" macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \\\n"
26951" \\\n"
26952" macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \\\n"
26953" \\\n"
26954" macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \\\n"
26955" \\\n"
26956" macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */\n"
26957"\n"
26958"/*****************************************************************************\n"
26959" * implementation specific types\n"
26960" *****************************************************************************/\n"
26961"\n"
26962"typedef enum kmp_mutex_impl_t {\n"
26963"#define kmp_mutex_impl_macro(impl, code) impl = code,\n"
26964" FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)\n"
26965"#undef kmp_mutex_impl_macro\n"
26966"} kmp_mutex_impl_t;\n"
26967"\n"
26968"/*****************************************************************************\n"
26969" * definitions generated from spec\n"
26970" *****************************************************************************/\n"
26971"\n"
26972"typedef enum ompt_callbacks_t {\n"
26973" ompt_callback_thread_begin = 1,\n"
26974" ompt_callback_thread_end = 2,\n"
26975" ompt_callback_parallel_begin = 3,\n"
26976" ompt_callback_parallel_end = 4,\n"
26977" ompt_callback_task_create = 5,\n"
26978" ompt_callback_task_schedule = 6,\n"
26979" ompt_callback_implicit_task = 7,\n"
26980" ompt_callback_target = 8,\n"
26981" ompt_callback_target_data_op = 9,\n"
26982" ompt_callback_target_submit = 10,\n"
26983" ompt_callback_control_tool = 11,\n"
26984" ompt_callback_device_initialize = 12,\n"
26985" ompt_callback_device_finalize = 13,\n"
26986" ompt_callback_device_load = 14,\n"
26987" ompt_callback_device_unload = 15,\n"
26988" ompt_callback_sync_region_wait = 16,\n"
26989" ompt_callback_mutex_released = 17,\n"
26990" ompt_callback_dependences = 18,\n"
26991" ompt_callback_task_dependence = 19,\n"
26992" ompt_callback_work = 20,\n"
26993" ompt_callback_master = 21,\n"
26994" ompt_callback_target_map = 22,\n"
26995" ompt_callback_sync_region = 23,\n"
26996" ompt_callback_lock_init = 24,\n"
26997" ompt_callback_lock_destroy = 25,\n"
26998" ompt_callback_mutex_acquire = 26,\n"
26999" ompt_callback_mutex_acquired = 27,\n"
27000" ompt_callback_nest_lock = 28,\n"
27001" ompt_callback_flush = 29,\n"
27002" ompt_callback_cancel = 30,\n"
27003" ompt_callback_reduction = 31,\n"
27004" ompt_callback_dispatch = 32\n"
27005"} ompt_callbacks_t;\n"
27006"\n"
27007"typedef enum ompt_record_t {\n"
27008" ompt_record_ompt = 1,\n"
27009" ompt_record_native = 2,\n"
27010" ompt_record_invalid = 3\n"
27011"} ompt_record_t;\n"
27012"\n"
27013"typedef enum ompt_record_native_t {\n"
27014" ompt_record_native_info = 1,\n"
27015" ompt_record_native_event = 2\n"
27016"} ompt_record_native_t;\n"
27017"\n"
27018"typedef enum ompt_set_result_t {\n"
27019" ompt_set_error = 0,\n"
27020" ompt_set_never = 1,\n"
27021" ompt_set_impossible = 2,\n"
27022" ompt_set_sometimes = 3,\n"
27023" ompt_set_sometimes_paired = 4,\n"
27024" ompt_set_always = 5\n"
27025"} ompt_set_result_t;\n"
27026"\n"
27027"typedef uint64_t ompt_id_t;\n"
27028"\n"
27029"typedef uint64_t ompt_device_time_t;\n"
27030"\n"
27031"typedef uint64_t ompt_buffer_cursor_t;\n"
27032"\n"
27033"typedef enum ompt_thread_t {\n"
27034" ompt_thread_initial = 1,\n"
27035" ompt_thread_worker = 2,\n"
27036" ompt_thread_other = 3,\n"
27037" ompt_thread_unknown = 4\n"
27038"} ompt_thread_t;\n"
27039"\n"
27040"typedef enum ompt_scope_endpoint_t {\n"
27041" ompt_scope_begin = 1,\n"
27042" ompt_scope_end = 2\n"
27043"} ompt_scope_endpoint_t;\n"
27044"\n"
27045"typedef enum ompt_dispatch_t {\n"
27046" ompt_dispatch_iteration = 1,\n"
27047" ompt_dispatch_section = 2\n"
27048"} ompt_dispatch_t;\n"
27049"\n"
27050"typedef enum ompt_sync_region_t {\n"
27051" ompt_sync_region_barrier = 1,\n"
27052" ompt_sync_region_barrier_implicit = 2,\n"
27053" ompt_sync_region_barrier_explicit = 3,\n"
27054" ompt_sync_region_barrier_implementation = 4,\n"
27055" ompt_sync_region_taskwait = 5,\n"
27056" ompt_sync_region_taskgroup = 6,\n"
27057" ompt_sync_region_reduction = 7\n"
27058"} ompt_sync_region_t;\n"
27059"\n"
27060"typedef enum ompt_target_data_op_t {\n"
27061" ompt_target_data_alloc = 1,\n"
27062" ompt_target_data_transfer_to_device = 2,\n"
27063" ompt_target_data_transfer_from_device = 3,\n"
27064" ompt_target_data_delete = 4,\n"
27065" ompt_target_data_associate = 5,\n"
27066" ompt_target_data_disassociate = 6\n"
27067"} ompt_target_data_op_t;\n"
27068"\n"
27069"typedef enum ompt_work_t {\n"
27070" ompt_work_loop = 1,\n"
27071" ompt_work_sections = 2,\n"
27072" ompt_work_single_executor = 3,\n"
27073" ompt_work_single_other = 4,\n"
27074" ompt_work_workshare = 5,\n"
27075" ompt_work_distribute = 6,\n"
27076" ompt_work_taskloop = 7\n"
27077"} ompt_work_t;\n"
27078"\n"
27079"typedef enum ompt_mutex_t {\n"
27080" ompt_mutex_lock = 1,\n"
27081" ompt_mutex_test_lock = 2,\n"
27082" ompt_mutex_nest_lock = 3,\n"
27083" ompt_mutex_test_nest_lock = 4,\n"
27084" ompt_mutex_critical = 5,\n"
27085" ompt_mutex_atomic = 6,\n"
27086" ompt_mutex_ordered = 7\n"
27087"} ompt_mutex_t;\n"
27088"\n"
27089"typedef enum ompt_native_mon_flag_t {\n"
27090" ompt_native_data_motion_explicit = 0x01,\n"
27091" ompt_native_data_motion_implicit = 0x02,\n"
27092" ompt_native_kernel_invocation = 0x04,\n"
27093" ompt_native_kernel_execution = 0x08,\n"
27094" ompt_native_driver = 0x10,\n"
27095" ompt_native_runtime = 0x20,\n"
27096" ompt_native_overhead = 0x40,\n"
27097" ompt_native_idleness = 0x80\n"
27098"} ompt_native_mon_flag_t;\n"
27099"\n"
27100"typedef enum ompt_task_flag_t {\n"
27101" ompt_task_initial = 0x00000001,\n"
27102" ompt_task_implicit = 0x00000002,\n"
27103" ompt_task_explicit = 0x00000004,\n"
27104" ompt_task_target = 0x00000008,\n"
27105" ompt_task_undeferred = 0x08000000,\n"
27106" ompt_task_untied = 0x10000000,\n"
27107" ompt_task_final = 0x20000000,\n"
27108" ompt_task_mergeable = 0x40000000,\n"
27109" ompt_task_merged = 0x80000000\n"
27110"} ompt_task_flag_t;\n"
27111"\n"
27112"typedef enum ompt_task_status_t {\n"
27113" ompt_task_complete = 1,\n"
27114" ompt_task_yield = 2,\n"
27115" ompt_task_cancel = 3,\n"
27116" ompt_task_detach = 4,\n"
27117" ompt_task_early_fulfill = 5,\n"
27118" ompt_task_late_fulfill = 6,\n"
27119" ompt_task_switch = 7\n"
27120"} ompt_task_status_t;\n"
27121"\n"
27122"typedef enum ompt_target_t {\n"
27123" ompt_target = 1,\n"
27124" ompt_target_enter_data = 2,\n"
27125" ompt_target_exit_data = 3,\n"
27126" ompt_target_update = 4\n"
27127"} ompt_target_t;\n"
27128"\n"
27129"typedef enum ompt_parallel_flag_t {\n"
27130" ompt_parallel_invoker_program = 0x00000001,\n"
27131" ompt_parallel_invoker_runtime = 0x00000002,\n"
27132" ompt_parallel_league = 0x40000000,\n"
27133" ompt_parallel_team = 0x80000000\n"
27134"} ompt_parallel_flag_t;\n"
27135"\n"
27136"typedef enum ompt_target_map_flag_t {\n"
27137" ompt_target_map_flag_to = 0x01,\n"
27138" ompt_target_map_flag_from = 0x02,\n"
27139" ompt_target_map_flag_alloc = 0x04,\n"
27140" ompt_target_map_flag_release = 0x08,\n"
27141" ompt_target_map_flag_delete = 0x10,\n"
27142" ompt_target_map_flag_implicit = 0x20\n"
27143"} ompt_target_map_flag_t;\n"
27144"\n"
27145"typedef enum ompt_dependence_type_t {\n"
27146" ompt_dependence_type_in = 1,\n"
27147" ompt_dependence_type_out = 2,\n"
27148" ompt_dependence_type_inout = 3,\n"
27149" ompt_dependence_type_mutexinoutset = 4,\n"
27150" ompt_dependence_type_source = 5,\n"
27151" ompt_dependence_type_sink = 6\n"
27152"} ompt_dependence_type_t;\n"
27153"\n"
27154"typedef enum ompt_cancel_flag_t {\n"
27155" ompt_cancel_parallel = 0x01,\n"
27156" ompt_cancel_sections = 0x02,\n"
27157" ompt_cancel_loop = 0x04,\n"
27158" ompt_cancel_taskgroup = 0x08,\n"
27159" ompt_cancel_activated = 0x10,\n"
27160" ompt_cancel_detected = 0x20,\n"
27161" ompt_cancel_discarded_task = 0x40\n"
27162"} ompt_cancel_flag_t;\n"
27163"\n"
27164"typedef uint64_t ompt_hwid_t;\n"
27165"\n"
27166"typedef uint64_t ompt_wait_id_t;\n"
27167"\n"
27168"typedef enum ompt_frame_flag_t {\n"
27169" ompt_frame_runtime = 0x00,\n"
27170" ompt_frame_application = 0x01,\n"
27171" ompt_frame_cfa = 0x10,\n"
27172" ompt_frame_framepointer = 0x20,\n"
27173" ompt_frame_stackaddress = 0x30\n"
27174"} ompt_frame_flag_t; \n"
27175"\n"
27176"typedef enum ompt_state_t {\n"
27177" ompt_state_work_serial = 0x000,\n"
27178" ompt_state_work_parallel = 0x001,\n"
27179" ompt_state_work_reduction = 0x002,\n"
27180"\n"
27181" ompt_state_wait_barrier = 0x010,\n"
27182" ompt_state_wait_barrier_implicit_parallel = 0x011,\n"
27183" ompt_state_wait_barrier_implicit_workshare = 0x012,\n"
27184" ompt_state_wait_barrier_implicit = 0x013,\n"
27185" ompt_state_wait_barrier_explicit = 0x014,\n"
27186"\n"
27187" ompt_state_wait_taskwait = 0x020,\n"
27188" ompt_state_wait_taskgroup = 0x021,\n"
27189"\n"
27190" ompt_state_wait_mutex = 0x040,\n"
27191" ompt_state_wait_lock = 0x041,\n"
27192" ompt_state_wait_critical = 0x042,\n"
27193" ompt_state_wait_atomic = 0x043,\n"
27194" ompt_state_wait_ordered = 0x044,\n"
27195"\n"
27196" ompt_state_wait_target = 0x080,\n"
27197" ompt_state_wait_target_map = 0x081,\n"
27198" ompt_state_wait_target_update = 0x082,\n"
27199"\n"
27200" ompt_state_idle = 0x100,\n"
27201" ompt_state_overhead = 0x101,\n"
27202" ompt_state_undefined = 0x102\n"
27203"} ompt_state_t;\n"
27204"\n"
27205"typedef uint64_t (*ompt_get_unique_id_t) (void);\n"
27206"\n"
27207"typedef uint64_t ompd_size_t;\n"
27208"\n"
27209"typedef uint64_t ompd_wait_id_t;\n"
27210"\n"
27211"typedef uint64_t ompd_addr_t;\n"
27212"typedef int64_t ompd_word_t;\n"
27213"typedef uint64_t ompd_seg_t;\n"
27214"\n"
27215"typedef uint64_t ompd_device_t;\n"
27216"\n"
27217"typedef uint64_t ompd_thread_id_t;\n"
27218"\n"
27219"typedef enum ompd_scope_t {\n"
27220" ompd_scope_global = 1,\n"
27221" ompd_scope_address_space = 2,\n"
27222" ompd_scope_thread = 3,\n"
27223" ompd_scope_parallel = 4,\n"
27224" ompd_scope_implicit_task = 5,\n"
27225" ompd_scope_task = 6\n"
27226"} ompd_scope_t;\n"
27227"\n"
27228"typedef uint64_t ompd_icv_id_t;\n"
27229"\n"
27230"typedef enum ompd_rc_t {\n"
27231" ompd_rc_ok = 0,\n"
27232" ompd_rc_unavailable = 1,\n"
27233" ompd_rc_stale_handle = 2,\n"
27234" ompd_rc_bad_input = 3,\n"
27235" ompd_rc_error = 4,\n"
27236" ompd_rc_unsupported = 5,\n"
27237" ompd_rc_needs_state_tracking = 6,\n"
27238" ompd_rc_incompatible = 7,\n"
27239" ompd_rc_device_read_error = 8,\n"
27240" ompd_rc_device_write_error = 9,\n"
27241" ompd_rc_nomem = 10,\n"
27242"} ompd_rc_t;\n"
27243"\n"
27244"typedef void (*ompt_interface_fn_t) (void);\n"
27245"\n"
27246"typedef ompt_interface_fn_t (*ompt_function_lookup_t) (\n"
27247" const char *interface_function_name\n"
27248");\n"
27249"\n"
27250"typedef union ompt_data_t {\n"
27251" uint64_t value;\n"
27252" void *ptr;\n"
27253"} ompt_data_t;\n"
27254"\n"
27255"typedef struct ompt_frame_t {\n"
27256" ompt_data_t exit_frame;\n"
27257" ompt_data_t enter_frame;\n"
27258" int exit_frame_flags;\n"
27259" int enter_frame_flags;\n"
27260"} ompt_frame_t;\n"
27261"\n"
27262"typedef void (*ompt_callback_t) (void);\n"
27263"\n"
27264"typedef void ompt_device_t;\n"
27265"\n"
27266"typedef void ompt_buffer_t;\n"
27267"\n"
27268"typedef void (*ompt_callback_buffer_request_t) (\n"
27269" int device_num,\n"
27270" ompt_buffer_t **buffer,\n"
27271" size_t *bytes\n"
27272");\n"
27273"\n"
27274"typedef void (*ompt_callback_buffer_complete_t) (\n"
27275" int device_num,\n"
27276" ompt_buffer_t *buffer,\n"
27277" size_t bytes,\n"
27278" ompt_buffer_cursor_t begin,\n"
27279" int buffer_owned\n"
27280");\n"
27281"\n"
27282"typedef void (*ompt_finalize_t) (\n"
27283" ompt_data_t *tool_data\n"
27284");\n"
27285"\n"
27286"typedef int (*ompt_initialize_t) (\n"
27287" ompt_function_lookup_t lookup,\n"
27288" int initial_device_num,\n"
27289" ompt_data_t *tool_data\n"
27290");\n"
27291"\n"
27292"typedef struct ompt_start_tool_result_t {\n"
27293" ompt_initialize_t initialize;\n"
27294" ompt_finalize_t finalize;\n"
27295" ompt_data_t tool_data;\n"
27296"} ompt_start_tool_result_t;\n"
27297"\n"
27298"typedef struct ompt_record_abstract_t {\n"
27299" ompt_record_native_t rclass;\n"
27300" const char *type;\n"
27301" ompt_device_time_t start_time;\n"
27302" ompt_device_time_t end_time;\n"
27303" ompt_hwid_t hwid;\n"
27304"} ompt_record_abstract_t;\n"
27305"\n"
27306"typedef struct ompt_dependence_t {\n"
27307" ompt_data_t variable;\n"
27308" ompt_dependence_type_t dependence_type;\n"
27309"} ompt_dependence_t;\n"
27310"\n"
27311"typedef int (*ompt_enumerate_states_t) (\n"
27312" int current_state,\n"
27313" int *next_state,\n"
27314" const char **next_state_name\n"
27315");\n"
27316"\n"
27317"typedef int (*ompt_enumerate_mutex_impls_t) (\n"
27318" int current_impl,\n"
27319" int *next_impl,\n"
27320" const char **next_impl_name\n"
27321");\n"
27322"\n"
27323"typedef ompt_set_result_t (*ompt_set_callback_t) (\n"
27324" ompt_callbacks_t event,\n"
27325" ompt_callback_t callback\n"
27326");\n"
27327"\n"
27328"typedef int (*ompt_get_callback_t) (\n"
27329" ompt_callbacks_t event,\n"
27330" ompt_callback_t *callback\n"
27331");\n"
27332"\n"
27333"typedef ompt_data_t *(*ompt_get_thread_data_t) (void);\n"
27334"\n"
27335"typedef int (*ompt_get_num_procs_t) (void);\n"
27336"\n"
27337"typedef int (*ompt_get_num_places_t) (void);\n"
27338"\n"
27339"typedef int (*ompt_get_place_proc_ids_t) (\n"
27340" int place_num,\n"
27341" int ids_size,\n"
27342" int *ids\n"
27343");\n"
27344"\n"
27345"typedef int (*ompt_get_place_num_t) (void);\n"
27346"\n"
27347"typedef int (*ompt_get_partition_place_nums_t) (\n"
27348" int place_nums_size,\n"
27349" int *place_nums\n"
27350");\n"
27351"\n"
27352"typedef int (*ompt_get_proc_id_t) (void);\n"
27353"\n"
27354"typedef int (*ompt_get_state_t) (\n"
27355" ompt_wait_id_t *wait_id\n"
27356");\n"
27357"\n"
27358"typedef int (*ompt_get_parallel_info_t) (\n"
27359" int ancestor_level,\n"
27360" ompt_data_t **parallel_data,\n"
27361" int *team_size\n"
27362");\n"
27363"\n"
27364"typedef int (*ompt_get_task_info_t) (\n"
27365" int ancestor_level,\n"
27366" int *flags,\n"
27367" ompt_data_t **task_data,\n"
27368" ompt_frame_t **task_frame,\n"
27369" ompt_data_t **parallel_data,\n"
27370" int *thread_num\n"
27371");\n"
27372"\n"
27373"typedef int (*ompt_get_task_memory_t)(\n"
27374" void **addr,\n"
27375" size_t *size,\n"
27376" int block\n"
27377");\n"
27378"\n"
27379"typedef int (*ompt_get_target_info_t) (\n"
27380" uint64_t *device_num,\n"
27381" ompt_id_t *target_id,\n"
27382" ompt_id_t *host_op_id\n"
27383");\n"
27384"\n"
27385"typedef int (*ompt_get_num_devices_t) (void);\n"
27386"\n"
27387"typedef void (*ompt_finalize_tool_t) (void);\n"
27388"\n"
27389"typedef int (*ompt_get_device_num_procs_t) (\n"
27390" ompt_device_t *device\n"
27391");\n"
27392"\n"
27393"typedef ompt_device_time_t (*ompt_get_device_time_t) (\n"
27394" ompt_device_t *device\n"
27395");\n"
27396"\n"
27397"typedef double (*ompt_translate_time_t) (\n"
27398" ompt_device_t *device,\n"
27399" ompt_device_time_t time\n"
27400");\n"
27401"\n"
27402"typedef ompt_set_result_t (*ompt_set_trace_ompt_t) (\n"
27403" ompt_device_t *device,\n"
27404" unsigned int enable,\n"
27405" unsigned int etype\n"
27406");\n"
27407"\n"
27408"typedef ompt_set_result_t (*ompt_set_trace_native_t) (\n"
27409" ompt_device_t *device,\n"
27410" int enable,\n"
27411" int flags\n"
27412");\n"
27413"\n"
27414"typedef int (*ompt_start_trace_t) (\n"
27415" ompt_device_t *device,\n"
27416" ompt_callback_buffer_request_t request,\n"
27417" ompt_callback_buffer_complete_t complete\n"
27418");\n"
27419"\n"
27420"typedef int (*ompt_pause_trace_t) (\n"
27421" ompt_device_t *device,\n"
27422" int begin_pause\n"
27423");\n"
27424"\n"
27425"typedef int (*ompt_flush_trace_t) (\n"
27426" ompt_device_t *device\n"
27427");\n"
27428"\n"
27429"typedef int (*ompt_stop_trace_t) (\n"
27430" ompt_device_t *device\n"
27431");\n"
27432"\n"
27433"typedef int (*ompt_advance_buffer_cursor_t) (\n"
27434" ompt_device_t *device,\n"
27435" ompt_buffer_t *buffer,\n"
27436" size_t size,\n"
27437" ompt_buffer_cursor_t current,\n"
27438" ompt_buffer_cursor_t *next\n"
27439");\n"
27440"\n"
27441"typedef ompt_record_t (*ompt_get_record_type_t) (\n"
27442" ompt_buffer_t *buffer,\n"
27443" ompt_buffer_cursor_t current\n"
27444");\n"
27445"\n"
27446"typedef void *(*ompt_get_record_native_t) (\n"
27447" ompt_buffer_t *buffer,\n"
27448" ompt_buffer_cursor_t current,\n"
27449" ompt_id_t *host_op_id\n"
27450");\n"
27451"\n"
27452"typedef ompt_record_abstract_t *\n"
27453"(*ompt_get_record_abstract_t) (\n"
27454" void *native_record\n"
27455");\n"
27456"\n"
27457"typedef void (*ompt_callback_thread_begin_t) (\n"
27458" ompt_thread_t thread_type,\n"
27459" ompt_data_t *thread_data\n"
27460");\n"
27461"\n"
27462"typedef struct ompt_record_thread_begin_t {\n"
27463" ompt_thread_t thread_type;\n"
27464"} ompt_record_thread_begin_t;\n"
27465"\n"
27466"typedef void (*ompt_callback_thread_end_t) (\n"
27467" ompt_data_t *thread_data\n"
27468");\n"
27469"\n"
27470"typedef void (*ompt_callback_parallel_begin_t) (\n"
27471" ompt_data_t *encountering_task_data,\n"
27472" const ompt_frame_t *encountering_task_frame,\n"
27473" ompt_data_t *parallel_data,\n"
27474" unsigned int requested_parallelism,\n"
27475" int flags,\n"
27476" const void *codeptr_ra\n"
27477");\n"
27478"\n"
27479"typedef struct ompt_record_parallel_begin_t {\n"
27480" ompt_id_t encountering_task_id;\n"
27481" ompt_id_t parallel_id;\n"
27482" unsigned int requested_parallelism;\n"
27483" int flags;\n"
27484" const void *codeptr_ra;\n"
27485"} ompt_record_parallel_begin_t;\n"
27486"\n"
27487"typedef void (*ompt_callback_parallel_end_t) (\n"
27488" ompt_data_t *parallel_data,\n"
27489" ompt_data_t *encountering_task_data,\n"
27490" int flags,\n"
27491" const void *codeptr_ra\n"
27492");\n"
27493"\n"
27494"typedef struct ompt_record_parallel_end_t {\n"
27495" ompt_id_t parallel_id;\n"
27496" ompt_id_t encountering_task_id;\n"
27497" int flags;\n"
27498" const void *codeptr_ra;\n"
27499"} ompt_record_parallel_end_t;\n"
27500"\n"
27501"typedef void (*ompt_callback_work_t) (\n"
27502" ompt_work_t wstype,\n"
27503" ompt_scope_endpoint_t endpoint,\n"
27504" ompt_data_t *parallel_data,\n"
27505" ompt_data_t *task_data,\n"
27506" uint64_t count,\n"
27507" const void *codeptr_ra\n"
27508");\n"
27509"\n"
27510"typedef struct ompt_record_work_t {\n"
27511" ompt_work_t wstype;\n"
27512" ompt_scope_endpoint_t endpoint;\n"
27513" ompt_id_t parallel_id;\n"
27514" ompt_id_t task_id;\n"
27515" uint64_t count;\n"
27516" const void *codeptr_ra;\n"
27517"} ompt_record_work_t;\n"
27518"\n"
27519"typedef void (*ompt_callback_dispatch_t) (\n"
27520" ompt_data_t *parallel_data,\n"
27521" ompt_data_t *task_data,\n"
27522" ompt_dispatch_t kind,\n"
27523" ompt_data_t instance \n"
27524");\n"
27525"\n"
27526"typedef struct ompt_record_dispatch_t {\n"
27527" ompt_id_t parallel_id;\n"
27528" ompt_id_t task_id;\n"
27529" ompt_dispatch_t kind;\n"
27530" ompt_data_t instance; \n"
27531"} ompt_record_dispatch_t;\n"
27532"\n"
27533"typedef void (*ompt_callback_task_create_t) (\n"
27534" ompt_data_t *encountering_task_data,\n"
27535" const ompt_frame_t *encountering_task_frame,\n"
27536" ompt_data_t *new_task_data,\n"
27537" int flags,\n"
27538" int has_dependences,\n"
27539" const void *codeptr_ra\n"
27540");\n"
27541"\n"
27542"typedef struct ompt_record_task_create_t {\n"
27543" ompt_id_t encountering_task_id;\n"
27544" ompt_id_t new_task_id;\n"
27545" int flags;\n"
27546" int has_dependences;\n"
27547" const void *codeptr_ra;\n"
27548"} ompt_record_task_create_t;\n"
27549"\n"
27550"typedef void (*ompt_callback_dependences_t) (\n"
27551" ompt_data_t *task_data,\n"
27552" const ompt_dependence_t *deps,\n"
27553" int ndeps\n"
27554");\n"
27555"\n"
27556"typedef struct ompt_record_dependences_t {\n"
27557" ompt_id_t task_id;\n"
27558" ompt_dependence_t dep;\n"
27559" int ndeps;\n"
27560"} ompt_record_dependences_t;\n"
27561"\n"
27562"typedef void (*ompt_callback_task_dependence_t) (\n"
27563" ompt_data_t *src_task_data,\n"
27564" ompt_data_t *sink_task_data\n"
27565");\n"
27566"\n"
27567"typedef struct ompt_record_task_dependence_t {\n"
27568" ompt_id_t src_task_id;\n"
27569" ompt_id_t sink_task_id;\n"
27570"} ompt_record_task_dependence_t;\n"
27571"\n"
27572"typedef void (*ompt_callback_task_schedule_t) (\n"
27573" ompt_data_t *prior_task_data,\n"
27574" ompt_task_status_t prior_task_status,\n"
27575" ompt_data_t *next_task_data\n"
27576");\n"
27577"\n"
27578"typedef struct ompt_record_task_schedule_t {\n"
27579" ompt_id_t prior_task_id;\n"
27580" ompt_task_status_t prior_task_status;\n"
27581" ompt_id_t next_task_id;\n"
27582"} ompt_record_task_schedule_t;\n"
27583"\n"
27584"typedef void (*ompt_callback_implicit_task_t) (\n"
27585" ompt_scope_endpoint_t endpoint,\n"
27586" ompt_data_t *parallel_data,\n"
27587" ompt_data_t *task_data,\n"
27588" unsigned int actual_parallelism,\n"
27589" unsigned int index,\n"
27590" int flags\n"
27591");\n"
27592"\n"
27593"typedef struct ompt_record_implicit_task_t {\n"
27594" ompt_scope_endpoint_t endpoint;\n"
27595" ompt_id_t parallel_id;\n"
27596" ompt_id_t task_id;\n"
27597" unsigned int actual_parallelism;\n"
27598" unsigned int index;\n"
27599" int flags;\n"
27600"} ompt_record_implicit_task_t;\n"
27601"\n"
27602"typedef void (*ompt_callback_master_t) (\n"
27603" ompt_scope_endpoint_t endpoint,\n"
27604" ompt_data_t *parallel_data,\n"
27605" ompt_data_t *task_data,\n"
27606" const void *codeptr_ra\n"
27607");\n"
27608"\n"
27609"typedef struct ompt_record_master_t {\n"
27610" ompt_scope_endpoint_t endpoint;\n"
27611" ompt_id_t parallel_id;\n"
27612" ompt_id_t task_id;\n"
27613" const void *codeptr_ra;\n"
27614"} ompt_record_master_t;\n"
27615"\n"
27616"typedef void (*ompt_callback_sync_region_t) (\n"
27617" ompt_sync_region_t kind,\n"
27618" ompt_scope_endpoint_t endpoint,\n"
27619" ompt_data_t *parallel_data,\n"
27620" ompt_data_t *task_data,\n"
27621" const void *codeptr_ra\n"
27622");\n"
27623"\n"
27624"typedef struct ompt_record_sync_region_t {\n"
27625" ompt_sync_region_t kind;\n"
27626" ompt_scope_endpoint_t endpoint;\n"
27627" ompt_id_t parallel_id;\n"
27628" ompt_id_t task_id;\n"
27629" const void *codeptr_ra;\n"
27630"} ompt_record_sync_region_t;\n"
27631"\n"
27632"typedef void (*ompt_callback_mutex_acquire_t) (\n"
27633" ompt_mutex_t kind,\n"
27634" unsigned int hint,\n"
27635" unsigned int impl,\n"
27636" ompt_wait_id_t wait_id,\n"
27637" const void *codeptr_ra\n"
27638");\n"
27639"\n"
27640"typedef struct ompt_record_mutex_acquire_t {\n"
27641" ompt_mutex_t kind;\n"
27642" unsigned int hint;\n"
27643" unsigned int impl;\n"
27644" ompt_wait_id_t wait_id;\n"
27645" const void *codeptr_ra;\n"
27646"} ompt_record_mutex_acquire_t;\n"
27647"\n"
27648"typedef void (*ompt_callback_mutex_t) (\n"
27649" ompt_mutex_t kind,\n"
27650" ompt_wait_id_t wait_id,\n"
27651" const void *codeptr_ra\n"
27652");\n"
27653"\n"
27654"typedef struct ompt_record_mutex_t {\n"
27655" ompt_mutex_t kind;\n"
27656" ompt_wait_id_t wait_id;\n"
27657" const void *codeptr_ra;\n"
27658"} ompt_record_mutex_t;\n"
27659"\n"
27660"typedef void (*ompt_callback_nest_lock_t) (\n"
27661" ompt_scope_endpoint_t endpoint,\n"
27662" ompt_wait_id_t wait_id,\n"
27663" const void *codeptr_ra\n"
27664");\n"
27665"\n"
27666"typedef struct ompt_record_nest_lock_t {\n"
27667" ompt_scope_endpoint_t endpoint;\n"
27668" ompt_wait_id_t wait_id;\n"
27669" const void *codeptr_ra;\n"
27670"} ompt_record_nest_lock_t;\n"
27671"\n"
27672"typedef void (*ompt_callback_flush_t) (\n"
27673" ompt_data_t *thread_data,\n"
27674" const void *codeptr_ra\n"
27675");\n"
27676"\n"
27677"typedef struct ompt_record_flush_t {\n"
27678" const void *codeptr_ra;\n"
27679"} ompt_record_flush_t;\n"
27680"\n"
27681"typedef void (*ompt_callback_cancel_t) (\n"
27682" ompt_data_t *task_data,\n"
27683" int flags,\n"
27684" const void *codeptr_ra\n"
27685");\n"
27686"\n"
27687"typedef struct ompt_record_cancel_t {\n"
27688" ompt_id_t task_id;\n"
27689" int flags;\n"
27690" const void *codeptr_ra;\n"
27691"} ompt_record_cancel_t;\n"
27692"\n"
27693"typedef void (*ompt_callback_device_initialize_t) (\n"
27694" int device_num,\n"
27695" const char *type,\n"
27696" ompt_device_t *device,\n"
27697" ompt_function_lookup_t lookup,\n"
27698" const char *documentation\n"
27699");\n"
27700"\n"
27701"typedef void (*ompt_callback_device_finalize_t) (\n"
27702" int device_num\n"
27703");\n"
27704"\n"
27705"typedef void (*ompt_callback_device_load_t) (\n"
27706" int device_num,\n"
27707" const char *filename,\n"
27708" int64_t offset_in_file,\n"
27709" void *vma_in_file,\n"
27710" size_t bytes,\n"
27711" void *host_addr,\n"
27712" void *device_addr,\n"
27713" uint64_t module_id\n"
27714");\n"
27715"\n"
27716"typedef void (*ompt_callback_device_unload_t) (\n"
27717" int device_num,\n"
27718" uint64_t module_id\n"
27719");\n"
27720"\n"
27721"typedef void (*ompt_callback_target_data_op_t) (\n"
27722" ompt_id_t target_id,\n"
27723" ompt_id_t host_op_id,\n"
27724" ompt_target_data_op_t optype,\n"
27725" void *src_addr,\n"
27726" int src_device_num,\n"
27727" void *dest_addr,\n"
27728" int dest_device_num,\n"
27729" size_t bytes,\n"
27730" const void *codeptr_ra\n"
27731");\n"
27732"\n"
27733"typedef struct ompt_record_target_data_op_t {\n"
27734" ompt_id_t host_op_id;\n"
27735" ompt_target_data_op_t optype;\n"
27736" void *src_addr;\n"
27737" int src_device_num;\n"
27738" void *dest_addr;\n"
27739" int dest_device_num;\n"
27740" size_t bytes;\n"
27741" ompt_device_time_t end_time;\n"
27742" const void *codeptr_ra;\n"
27743"} ompt_record_target_data_op_t;\n"
27744"\n"
27745"typedef void (*ompt_callback_target_t) (\n"
27746" ompt_target_t kind,\n"
27747" ompt_scope_endpoint_t endpoint,\n"
27748" int device_num,\n"
27749" ompt_data_t *task_data,\n"
27750" ompt_id_t target_id,\n"
27751" const void *codeptr_ra\n"
27752");\n"
27753"\n"
27754"typedef struct ompt_record_target_t {\n"
27755" ompt_target_t kind;\n"
27756" ompt_scope_endpoint_t endpoint;\n"
27757" int device_num;\n"
27758" ompt_id_t task_id;\n"
27759" ompt_id_t target_id;\n"
27760" const void *codeptr_ra;\n"
27761"} ompt_record_target_t;\n"
27762"\n"
27763"typedef void (*ompt_callback_target_map_t) (\n"
27764" ompt_id_t target_id,\n"
27765" unsigned int nitems,\n"
27766" void **host_addr,\n"
27767" void **device_addr,\n"
27768" size_t *bytes,\n"
27769" unsigned int *mapping_flags,\n"
27770" const void *codeptr_ra\n"
27771");\n"
27772"\n"
27773"typedef struct ompt_record_target_map_t {\n"
27774" ompt_id_t target_id;\n"
27775" unsigned int nitems;\n"
27776" void **host_addr;\n"
27777" void **device_addr;\n"
27778" size_t *bytes;\n"
27779" unsigned int *mapping_flags;\n"
27780" const void *codeptr_ra;\n"
27781"} ompt_record_target_map_t;\n"
27782"\n"
27783"typedef void (*ompt_callback_target_submit_t) (\n"
27784" ompt_id_t target_id,\n"
27785" ompt_id_t host_op_id,\n"
27786" unsigned int requested_num_teams\n"
27787");\n"
27788"\n"
27789"typedef struct ompt_record_target_kernel_t {\n"
27790" ompt_id_t host_op_id;\n"
27791" unsigned int requested_num_teams;\n"
27792" unsigned int granted_num_teams;\n"
27793" ompt_device_time_t end_time;\n"
27794"} ompt_record_target_kernel_t;\n"
27795"\n"
27796"typedef int (*ompt_callback_control_tool_t) (\n"
27797" uint64_t command,\n"
27798" uint64_t modifier,\n"
27799" void *arg,\n"
27800" const void *codeptr_ra\n"
27801");\n"
27802"\n"
27803"typedef struct ompt_record_control_tool_t {\n"
27804" uint64_t command;\n"
27805" uint64_t modifier;\n"
27806" const void *codeptr_ra;\n"
27807"} ompt_record_control_tool_t;\n"
27808"\n"
27809"typedef struct ompd_address_t {\n"
27810" ompd_seg_t segment;\n"
27811" ompd_addr_t address;\n"
27812"} ompd_address_t;\n"
27813"\n"
27814"typedef struct ompd_frame_info_t {\n"
27815" ompd_address_t frame_address;\n"
27816" ompd_word_t frame_flag;\n"
27817"} ompd_frame_info_t;\n"
27818"\n"
27819"typedef struct _ompd_aspace_handle ompd_address_space_handle_t;\n"
27820"typedef struct _ompd_thread_handle ompd_thread_handle_t;\n"
27821"typedef struct _ompd_parallel_handle ompd_parallel_handle_t;\n"
27822"typedef struct _ompd_task_handle ompd_task_handle_t;\n"
27823"\n"
27824"typedef struct _ompd_aspace_cont ompd_address_space_context_t;\n"
27825"typedef struct _ompd_thread_cont ompd_thread_context_t;\n"
27826"\n"
27827"typedef struct ompd_device_type_sizes_t {\n"
27828" uint8_t sizeof_char;\n"
27829" uint8_t sizeof_short;\n"
27830" uint8_t sizeof_int;\n"
27831" uint8_t sizeof_long;\n"
27832" uint8_t sizeof_long_long;\n"
27833" uint8_t sizeof_pointer;\n"
27834"} ompd_device_type_sizes_t;\n"
27835"\n"
27836"typedef struct ompt_record_ompt_t {\n"
27837" ompt_callbacks_t type;\n"
27838" ompt_device_time_t time;\n"
27839" ompt_id_t thread_id;\n"
27840" ompt_id_t target_id;\n"
27841" union {\n"
27842" ompt_record_thread_begin_t thread_begin;\n"
27843" ompt_record_parallel_begin_t parallel_begin;\n"
27844" ompt_record_parallel_end_t parallel_end;\n"
27845" ompt_record_work_t work;\n"
27846" ompt_record_dispatch_t dispatch;\n"
27847" ompt_record_task_create_t task_create;\n"
27848" ompt_record_dependences_t dependences;\n"
27849" ompt_record_task_dependence_t task_dependence;\n"
27850" ompt_record_task_schedule_t task_schedule;\n"
27851" ompt_record_implicit_task_t implicit_task;\n"
27852" ompt_record_master_t master;\n"
27853" ompt_record_sync_region_t sync_region;\n"
27854" ompt_record_mutex_acquire_t mutex_acquire;\n"
27855" ompt_record_mutex_t mutex;\n"
27856" ompt_record_nest_lock_t nest_lock;\n"
27857" ompt_record_flush_t flush;\n"
27858" ompt_record_cancel_t cancel;\n"
27859" ompt_record_target_t target;\n"
27860" ompt_record_target_data_op_t target_data_op;\n"
27861" ompt_record_target_map_t target_map;\n"
27862" ompt_record_target_kernel_t target_kernel;\n"
27863" ompt_record_control_tool_t control_tool;\n"
27864" } record;\n"
27865"} ompt_record_ompt_t;\n"
27866"\n"
27867"typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (\n"
27868" ompt_buffer_t *buffer,\n"
27869" ompt_buffer_cursor_t current\n"
27870");\n"
27871"\n"
27872"#define ompt_id_none 0\n"
27873"#define ompt_data_none {0}\n"
27874"#define ompt_time_none 0\n"
27875"#define ompt_hwid_none 0\n"
27876"#define ompt_addr_none ~0\n"
27877"#define ompt_mutex_impl_none 0\n"
27878"#define ompt_wait_id_none 0\n"
27879"\n"
27880"#define ompd_segment_none 0\n"
27881"\n"
27882"#endif /* __OMPT__ */\n"
27883"" } ,
27884 { "/builtins/opencl-c.h" , "//===--- opencl-c.h - OpenCL C language builtin function header -----------===//\n"
27885"//\n"
27886"// The LLVM Compiler Infrastructure\n"
27887"//\n"
27888"// This file is distributed under the University of Illinois Open Source\n"
27889"// License. See LICENSE.TXT for details.\n"
27890"//\n"
27891"//===----------------------------------------------------------------------===//\n"
27892"\n"
27893"#ifndef _OPENCL_H_\n"
27894"#define _OPENCL_H_\n"
27895"\n"
27896"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
27897"#ifndef cl_khr_depth_images\n"
27898"#define cl_khr_depth_images\n"
27899"#endif //cl_khr_depth_images\n"
27900"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
27901"\n"
27902"#if __OPENCL_C_VERSION__ < CL_VERSION_2_0\n"
27903"#ifdef cl_khr_3d_image_writes\n"
27904"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"
27905"#endif //cl_khr_3d_image_writes\n"
27906"#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0\n"
27907"\n"
27908"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
27909"#ifndef cl_intel_planar_yuv\n"
27910"#define cl_intel_planar_yuv\n"
27911"#endif // cl_intel_planar_yuv\n"
27912"#pragma OPENCL EXTENSION cl_intel_planar_yuv : begin\n"
27913"#pragma OPENCL EXTENSION cl_intel_planar_yuv : end\n"
27914"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
27915"\n"
27916"#define __ovld __attribute__((overloadable))\n"
27917"#define __conv __attribute__((convergent))\n"
27918"\n"
27919"// Optimizations\n"
27920"#define __purefn __attribute__((pure))\n"
27921"#define __cnfn __attribute__((const))\n"
27922"\n"
27923"// built-in scalar data types:\n"
27924"\n"
27925"/**\n"
27926" * An unsigned 8-bit integer.\n"
27927" */\n"
27928"typedef unsigned char uchar;\n"
27929"\n"
27930"/**\n"
27931" * An unsigned 16-bit integer.\n"
27932" */\n"
27933"typedef unsigned short ushort;\n"
27934"\n"
27935"/**\n"
27936" * An unsigned 32-bit integer.\n"
27937" */\n"
27938"typedef unsigned int uint;\n"
27939"\n"
27940"/**\n"
27941" * An unsigned 64-bit integer.\n"
27942" */\n"
27943"typedef unsigned long ulong;\n"
27944"\n"
27945"/**\n"
27946" * The unsigned integer type of the result of the sizeof operator. This\n"
27947" * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS\n"
27948" * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if\n"
27949" * CL_DEVICE_ADDRESS_BITS is 64-bits.\n"
27950" */\n"
27951"typedef __SIZE_TYPE__ size_t;\n"
27952"\n"
27953"/**\n"
27954" * A signed integer type that is the result of subtracting two pointers.\n"
27955" * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS\n"
27956" * defined in table 4.3 is 32-bits and is a 64-bit signed integer if\n"
27957" * CL_DEVICE_ADDRESS_BITS is 64-bits.\n"
27958" */\n"
27959"typedef __PTRDIFF_TYPE__ ptrdiff_t;\n"
27960"\n"
27961"/**\n"
27962"* A signed integer type with the property that any valid pointer to\n"
27963"* void can be converted to this type, then converted back to pointer\n"
27964"* to void, and the result will compare equal to the original pointer.\n"
27965"*/\n"
27966"typedef __INTPTR_TYPE__ intptr_t;\n"
27967"\n"
27968"/**\n"
27969"* An unsigned integer type with the property that any valid pointer to\n"
27970"* void can be converted to this type, then converted back to pointer\n"
27971"* to void, and the result will compare equal to the original pointer.\n"
27972"*/\n"
27973"typedef __UINTPTR_TYPE__ uintptr_t;\n"
27974"\n"
27975"// built-in vector data types:\n"
27976"typedef char char2 __attribute__((ext_vector_type(2)));\n"
27977"typedef char char3 __attribute__((ext_vector_type(3)));\n"
27978"typedef char char4 __attribute__((ext_vector_type(4)));\n"
27979"typedef char char8 __attribute__((ext_vector_type(8)));\n"
27980"typedef char char16 __attribute__((ext_vector_type(16)));\n"
27981"typedef uchar uchar2 __attribute__((ext_vector_type(2)));\n"
27982"typedef uchar uchar3 __attribute__((ext_vector_type(3)));\n"
27983"typedef uchar uchar4 __attribute__((ext_vector_type(4)));\n"
27984"typedef uchar uchar8 __attribute__((ext_vector_type(8)));\n"
27985"typedef uchar uchar16 __attribute__((ext_vector_type(16)));\n"
27986"typedef short short2 __attribute__((ext_vector_type(2)));\n"
27987"typedef short short3 __attribute__((ext_vector_type(3)));\n"
27988"typedef short short4 __attribute__((ext_vector_type(4)));\n"
27989"typedef short short8 __attribute__((ext_vector_type(8)));\n"
27990"typedef short short16 __attribute__((ext_vector_type(16)));\n"
27991"typedef ushort ushort2 __attribute__((ext_vector_type(2)));\n"
27992"typedef ushort ushort3 __attribute__((ext_vector_type(3)));\n"
27993"typedef ushort ushort4 __attribute__((ext_vector_type(4)));\n"
27994"typedef ushort ushort8 __attribute__((ext_vector_type(8)));\n"
27995"typedef ushort ushort16 __attribute__((ext_vector_type(16)));\n"
27996"typedef int int2 __attribute__((ext_vector_type(2)));\n"
27997"typedef int int3 __attribute__((ext_vector_type(3)));\n"
27998"typedef int int4 __attribute__((ext_vector_type(4)));\n"
27999"typedef int int8 __attribute__((ext_vector_type(8)));\n"
28000"typedef int int16 __attribute__((ext_vector_type(16)));\n"
28001"typedef uint uint2 __attribute__((ext_vector_type(2)));\n"
28002"typedef uint uint3 __attribute__((ext_vector_type(3)));\n"
28003"typedef uint uint4 __attribute__((ext_vector_type(4)));\n"
28004"typedef uint uint8 __attribute__((ext_vector_type(8)));\n"
28005"typedef uint uint16 __attribute__((ext_vector_type(16)));\n"
28006"typedef long long2 __attribute__((ext_vector_type(2)));\n"
28007"typedef long long3 __attribute__((ext_vector_type(3)));\n"
28008"typedef long long4 __attribute__((ext_vector_type(4)));\n"
28009"typedef long long8 __attribute__((ext_vector_type(8)));\n"
28010"typedef long long16 __attribute__((ext_vector_type(16)));\n"
28011"typedef ulong ulong2 __attribute__((ext_vector_type(2)));\n"
28012"typedef ulong ulong3 __attribute__((ext_vector_type(3)));\n"
28013"typedef ulong ulong4 __attribute__((ext_vector_type(4)));\n"
28014"typedef ulong ulong8 __attribute__((ext_vector_type(8)));\n"
28015"typedef ulong ulong16 __attribute__((ext_vector_type(16)));\n"
28016"typedef float float2 __attribute__((ext_vector_type(2)));\n"
28017"typedef float float3 __attribute__((ext_vector_type(3)));\n"
28018"typedef float float4 __attribute__((ext_vector_type(4)));\n"
28019"typedef float float8 __attribute__((ext_vector_type(8)));\n"
28020"typedef float float16 __attribute__((ext_vector_type(16)));\n"
28021"#ifdef cl_khr_fp16\n"
28022"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
28023"typedef half half2 __attribute__((ext_vector_type(2)));\n"
28024"typedef half half3 __attribute__((ext_vector_type(3)));\n"
28025"typedef half half4 __attribute__((ext_vector_type(4)));\n"
28026"typedef half half8 __attribute__((ext_vector_type(8)));\n"
28027"typedef half half16 __attribute__((ext_vector_type(16)));\n"
28028"#endif\n"
28029"#ifdef cl_khr_fp64\n"
28030"#if __OPENCL_C_VERSION__ < CL_VERSION_1_2\n"
28031"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
28032"#endif\n"
28033"typedef double double2 __attribute__((ext_vector_type(2)));\n"
28034"typedef double double3 __attribute__((ext_vector_type(3)));\n"
28035"typedef double double4 __attribute__((ext_vector_type(4)));\n"
28036"typedef double double8 __attribute__((ext_vector_type(8)));\n"
28037"typedef double double16 __attribute__((ext_vector_type(16)));\n"
28038"#endif\n"
28039"\n"
28040"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
28041"#define NULL ((void*)0)\n"
28042"#endif\n"
28043"\n"
28044"/**\n"
28045" * Value of maximum non-infinite single-precision floating-point\n"
28046" * number.\n"
28047" */\n"
28048"#define MAXFLOAT 0x1.fffffep127f\n"
28049"\n"
28050"/**\n"
28051" * A positive float constant expression. HUGE_VALF evaluates\n"
28052" * to +infinity. Used as an error value returned by the built-in\n"
28053" * math functions.\n"
28054" */\n"
28055"#define HUGE_VALF (__builtin_huge_valf())\n"
28056"\n"
28057"/**\n"
28058" * A positive double constant expression. HUGE_VAL evaluates\n"
28059" * to +infinity. Used as an error value returned by the built-in\n"
28060" * math functions.\n"
28061" */\n"
28062"#define HUGE_VAL (__builtin_huge_val())\n"
28063"\n"
28064"/**\n"
28065" * A constant expression of type float representing positive or\n"
28066" * unsigned infinity.\n"
28067" */\n"
28068"#define INFINITY (__builtin_inff())\n"
28069"\n"
28070"/**\n"
28071" * A constant expression of type float representing a quiet NaN.\n"
28072" */\n"
28073"#define NAN as_float(INT_MAX)\n"
28074"\n"
28075"#define FP_ILOGB0 INT_MIN\n"
28076"#define FP_ILOGBNAN INT_MAX\n"
28077"\n"
28078"#define FLT_DIG 6\n"
28079"#define FLT_MANT_DIG 24\n"
28080"#define FLT_MAX_10_EXP +38\n"
28081"#define FLT_MAX_EXP +128\n"
28082"#define FLT_MIN_10_EXP -37\n"
28083"#define FLT_MIN_EXP -125\n"
28084"#define FLT_RADIX 2\n"
28085"#define FLT_MAX 0x1.fffffep127f\n"
28086"#define FLT_MIN 0x1.0p-126f\n"
28087"#define FLT_EPSILON 0x1.0p-23f\n"
28088"\n"
28089"#define M_E_F 2.71828182845904523536028747135266250f\n"
28090"#define M_LOG2E_F 1.44269504088896340735992468100189214f\n"
28091"#define M_LOG10E_F 0.434294481903251827651128918916605082f\n"
28092"#define M_LN2_F 0.693147180559945309417232121458176568f\n"
28093"#define M_LN10_F 2.30258509299404568401799145468436421f\n"
28094"#define M_PI_F 3.14159265358979323846264338327950288f\n"
28095"#define M_PI_2_F 1.57079632679489661923132169163975144f\n"
28096"#define M_PI_4_F 0.785398163397448309615660845819875721f\n"
28097"#define M_1_PI_F 0.318309886183790671537767526745028724f\n"
28098"#define M_2_PI_F 0.636619772367581343075535053490057448f\n"
28099"#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f\n"
28100"#define M_SQRT2_F 1.41421356237309504880168872420969808f\n"
28101"#define M_SQRT1_2_F 0.707106781186547524400844362104849039f\n"
28102"\n"
28103"#define DBL_DIG 15\n"
28104"#define DBL_MANT_DIG 53\n"
28105"#define DBL_MAX_10_EXP +308\n"
28106"#define DBL_MAX_EXP +1024\n"
28107"#define DBL_MIN_10_EXP -307\n"
28108"#define DBL_MIN_EXP -1021\n"
28109"#define DBL_RADIX 2\n"
28110"#define DBL_MAX 0x1.fffffffffffffp1023\n"
28111"#define DBL_MIN 0x1.0p-1022\n"
28112"#define DBL_EPSILON 0x1.0p-52\n"
28113"\n"
28114"#define M_E 0x1.5bf0a8b145769p+1\n"
28115"#define M_LOG2E 0x1.71547652b82fep+0\n"
28116"#define M_LOG10E 0x1.bcb7b1526e50ep-2\n"
28117"#define M_LN2 0x1.62e42fefa39efp-1\n"
28118"#define M_LN10 0x1.26bb1bbb55516p+1\n"
28119"#define M_PI 0x1.921fb54442d18p+1\n"
28120"#define M_PI_2 0x1.921fb54442d18p+0\n"
28121"#define M_PI_4 0x1.921fb54442d18p-1\n"
28122"#define M_1_PI 0x1.45f306dc9c883p-2\n"
28123"#define M_2_PI 0x1.45f306dc9c883p-1\n"
28124"#define M_2_SQRTPI 0x1.20dd750429b6dp+0\n"
28125"#define M_SQRT2 0x1.6a09e667f3bcdp+0\n"
28126"#define M_SQRT1_2 0x1.6a09e667f3bcdp-1\n"
28127"\n"
28128"#ifdef cl_khr_fp16\n"
28129"\n"
28130"#define HALF_DIG 3\n"
28131"#define HALF_MANT_DIG 11\n"
28132"#define HALF_MAX_10_EXP +4\n"
28133"#define HALF_MAX_EXP +16\n"
28134"#define HALF_MIN_10_EXP -4\n"
28135"#define HALF_MIN_EXP -13\n"
28136"#define HALF_RADIX 2\n"
28137"#define HALF_MAX ((0x1.ffcp15h))\n"
28138"#define HALF_MIN ((0x1.0p-14h))\n"
28139"#define HALF_EPSILON ((0x1.0p-10h))\n"
28140"\n"
28141"#define M_E_H 2.71828182845904523536028747135266250h\n"
28142"#define M_LOG2E_H 1.44269504088896340735992468100189214h\n"
28143"#define M_LOG10E_H 0.434294481903251827651128918916605082h\n"
28144"#define M_LN2_H 0.693147180559945309417232121458176568h\n"
28145"#define M_LN10_H 2.30258509299404568401799145468436421h\n"
28146"#define M_PI_H 3.14159265358979323846264338327950288h\n"
28147"#define M_PI_2_H 1.57079632679489661923132169163975144h\n"
28148"#define M_PI_4_H 0.785398163397448309615660845819875721h\n"
28149"#define M_1_PI_H 0.318309886183790671537767526745028724h\n"
28150"#define M_2_PI_H 0.636619772367581343075535053490057448h\n"
28151"#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h\n"
28152"#define M_SQRT2_H 1.41421356237309504880168872420969808h\n"
28153"#define M_SQRT1_2_H 0.707106781186547524400844362104849039h\n"
28154"\n"
28155"#endif //cl_khr_fp16\n"
28156"\n"
28157"#define CHAR_BIT 8\n"
28158"#define SCHAR_MAX 127\n"
28159"#define SCHAR_MIN (-128)\n"
28160"#define UCHAR_MAX 255\n"
28161"#define CHAR_MAX SCHAR_MAX\n"
28162"#define CHAR_MIN SCHAR_MIN\n"
28163"#define USHRT_MAX 65535\n"
28164"#define SHRT_MAX 32767\n"
28165"#define SHRT_MIN (-32768)\n"
28166"#define UINT_MAX 0xffffffff\n"
28167"#define INT_MAX 2147483647\n"
28168"#define INT_MIN (-2147483647-1)\n"
28169"#define ULONG_MAX 0xffffffffffffffffUL\n"
28170"#define LONG_MAX 0x7fffffffffffffffL\n"
28171"#define LONG_MIN (-0x7fffffffffffffffL-1)\n"
28172"\n"
28173"// OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions\n"
28174"\n"
28175"char __ovld __cnfn convert_char_rte(char);\n"
28176"char __ovld __cnfn convert_char_sat_rte(char);\n"
28177"char __ovld __cnfn convert_char_rtz(char);\n"
28178"char __ovld __cnfn convert_char_sat_rtz(char);\n"
28179"char __ovld __cnfn convert_char_rtp(char);\n"
28180"char __ovld __cnfn convert_char_sat_rtp(char);\n"
28181"char __ovld __cnfn convert_char_rtn(char);\n"
28182"char __ovld __cnfn convert_char_sat_rtn(char);\n"
28183"char __ovld __cnfn convert_char(char);\n"
28184"char __ovld __cnfn convert_char_sat(char);\n"
28185"char __ovld __cnfn convert_char_rte(uchar);\n"
28186"char __ovld __cnfn convert_char_sat_rte(uchar);\n"
28187"char __ovld __cnfn convert_char_rtz(uchar);\n"
28188"char __ovld __cnfn convert_char_sat_rtz(uchar);\n"
28189"char __ovld __cnfn convert_char_rtp(uchar);\n"
28190"char __ovld __cnfn convert_char_sat_rtp(uchar);\n"
28191"char __ovld __cnfn convert_char_rtn(uchar);\n"
28192"char __ovld __cnfn convert_char_sat_rtn(uchar);\n"
28193"char __ovld __cnfn convert_char(uchar);\n"
28194"char __ovld __cnfn convert_char_sat(uchar);\n"
28195"char __ovld __cnfn convert_char_rte(short);\n"
28196"char __ovld __cnfn convert_char_sat_rte(short);\n"
28197"char __ovld __cnfn convert_char_rtz(short);\n"
28198"char __ovld __cnfn convert_char_sat_rtz(short);\n"
28199"char __ovld __cnfn convert_char_rtp(short);\n"
28200"char __ovld __cnfn convert_char_sat_rtp(short);\n"
28201"char __ovld __cnfn convert_char_rtn(short);\n"
28202"char __ovld __cnfn convert_char_sat_rtn(short);\n"
28203"char __ovld __cnfn convert_char(short);\n"
28204"char __ovld __cnfn convert_char_sat(short);\n"
28205"char __ovld __cnfn convert_char_rte(ushort);\n"
28206"char __ovld __cnfn convert_char_sat_rte(ushort);\n"
28207"char __ovld __cnfn convert_char_rtz(ushort);\n"
28208"char __ovld __cnfn convert_char_sat_rtz(ushort);\n"
28209"char __ovld __cnfn convert_char_rtp(ushort);\n"
28210"char __ovld __cnfn convert_char_sat_rtp(ushort);\n"
28211"char __ovld __cnfn convert_char_rtn(ushort);\n"
28212"char __ovld __cnfn convert_char_sat_rtn(ushort);\n"
28213"char __ovld __cnfn convert_char(ushort);\n"
28214"char __ovld __cnfn convert_char_sat(ushort);\n"
28215"char __ovld __cnfn convert_char_rte(int);\n"
28216"char __ovld __cnfn convert_char_sat_rte(int);\n"
28217"char __ovld __cnfn convert_char_rtz(int);\n"
28218"char __ovld __cnfn convert_char_sat_rtz(int);\n"
28219"char __ovld __cnfn convert_char_rtp(int);\n"
28220"char __ovld __cnfn convert_char_sat_rtp(int);\n"
28221"char __ovld __cnfn convert_char_rtn(int);\n"
28222"char __ovld __cnfn convert_char_sat_rtn(int);\n"
28223"char __ovld __cnfn convert_char(int);\n"
28224"char __ovld __cnfn convert_char_sat(int);\n"
28225"char __ovld __cnfn convert_char_rte(uint);\n"
28226"char __ovld __cnfn convert_char_sat_rte(uint);\n"
28227"char __ovld __cnfn convert_char_rtz(uint);\n"
28228"char __ovld __cnfn convert_char_sat_rtz(uint);\n"
28229"char __ovld __cnfn convert_char_rtp(uint);\n"
28230"char __ovld __cnfn convert_char_sat_rtp(uint);\n"
28231"char __ovld __cnfn convert_char_rtn(uint);\n"
28232"char __ovld __cnfn convert_char_sat_rtn(uint);\n"
28233"char __ovld __cnfn convert_char(uint);\n"
28234"char __ovld __cnfn convert_char_sat(uint);\n"
28235"char __ovld __cnfn convert_char_rte(long);\n"
28236"char __ovld __cnfn convert_char_sat_rte(long);\n"
28237"char __ovld __cnfn convert_char_rtz(long);\n"
28238"char __ovld __cnfn convert_char_sat_rtz(long);\n"
28239"char __ovld __cnfn convert_char_rtp(long);\n"
28240"char __ovld __cnfn convert_char_sat_rtp(long);\n"
28241"char __ovld __cnfn convert_char_rtn(long);\n"
28242"char __ovld __cnfn convert_char_sat_rtn(long);\n"
28243"char __ovld __cnfn convert_char(long);\n"
28244"char __ovld __cnfn convert_char_sat(long);\n"
28245"char __ovld __cnfn convert_char_rte(ulong);\n"
28246"char __ovld __cnfn convert_char_sat_rte(ulong);\n"
28247"char __ovld __cnfn convert_char_rtz(ulong);\n"
28248"char __ovld __cnfn convert_char_sat_rtz(ulong);\n"
28249"char __ovld __cnfn convert_char_rtp(ulong);\n"
28250"char __ovld __cnfn convert_char_sat_rtp(ulong);\n"
28251"char __ovld __cnfn convert_char_rtn(ulong);\n"
28252"char __ovld __cnfn convert_char_sat_rtn(ulong);\n"
28253"char __ovld __cnfn convert_char(ulong);\n"
28254"char __ovld __cnfn convert_char_sat(ulong);\n"
28255"char __ovld __cnfn convert_char_rte(float);\n"
28256"char __ovld __cnfn convert_char_sat_rte(float);\n"
28257"char __ovld __cnfn convert_char_rtz(float);\n"
28258"char __ovld __cnfn convert_char_sat_rtz(float);\n"
28259"char __ovld __cnfn convert_char_rtp(float);\n"
28260"char __ovld __cnfn convert_char_sat_rtp(float);\n"
28261"char __ovld __cnfn convert_char_rtn(float);\n"
28262"char __ovld __cnfn convert_char_sat_rtn(float);\n"
28263"char __ovld __cnfn convert_char(float);\n"
28264"char __ovld __cnfn convert_char_sat(float);\n"
28265"uchar __ovld __cnfn convert_uchar_rte(char);\n"
28266"uchar __ovld __cnfn convert_uchar_sat_rte(char);\n"
28267"uchar __ovld __cnfn convert_uchar_rtz(char);\n"
28268"uchar __ovld __cnfn convert_uchar_sat_rtz(char);\n"
28269"uchar __ovld __cnfn convert_uchar_rtp(char);\n"
28270"uchar __ovld __cnfn convert_uchar_sat_rtp(char);\n"
28271"uchar __ovld __cnfn convert_uchar_rtn(char);\n"
28272"uchar __ovld __cnfn convert_uchar_sat_rtn(char);\n"
28273"uchar __ovld __cnfn convert_uchar(char);\n"
28274"uchar __ovld __cnfn convert_uchar_sat(char);\n"
28275"uchar __ovld __cnfn convert_uchar_rte(uchar);\n"
28276"uchar __ovld __cnfn convert_uchar_sat_rte(uchar);\n"
28277"uchar __ovld __cnfn convert_uchar_rtz(uchar);\n"
28278"uchar __ovld __cnfn convert_uchar_sat_rtz(uchar);\n"
28279"uchar __ovld __cnfn convert_uchar_rtp(uchar);\n"
28280"uchar __ovld __cnfn convert_uchar_sat_rtp(uchar);\n"
28281"uchar __ovld __cnfn convert_uchar_rtn(uchar);\n"
28282"uchar __ovld __cnfn convert_uchar_sat_rtn(uchar);\n"
28283"uchar __ovld __cnfn convert_uchar(uchar);\n"
28284"uchar __ovld __cnfn convert_uchar_sat(uchar);\n"
28285"uchar __ovld __cnfn convert_uchar_rte(short);\n"
28286"uchar __ovld __cnfn convert_uchar_sat_rte(short);\n"
28287"uchar __ovld __cnfn convert_uchar_rtz(short);\n"
28288"uchar __ovld __cnfn convert_uchar_sat_rtz(short);\n"
28289"uchar __ovld __cnfn convert_uchar_rtp(short);\n"
28290"uchar __ovld __cnfn convert_uchar_sat_rtp(short);\n"
28291"uchar __ovld __cnfn convert_uchar_rtn(short);\n"
28292"uchar __ovld __cnfn convert_uchar_sat_rtn(short);\n"
28293"uchar __ovld __cnfn convert_uchar(short);\n"
28294"uchar __ovld __cnfn convert_uchar_sat(short);\n"
28295"uchar __ovld __cnfn convert_uchar_rte(ushort);\n"
28296"uchar __ovld __cnfn convert_uchar_sat_rte(ushort);\n"
28297"uchar __ovld __cnfn convert_uchar_rtz(ushort);\n"
28298"uchar __ovld __cnfn convert_uchar_sat_rtz(ushort);\n"
28299"uchar __ovld __cnfn convert_uchar_rtp(ushort);\n"
28300"uchar __ovld __cnfn convert_uchar_sat_rtp(ushort);\n"
28301"uchar __ovld __cnfn convert_uchar_rtn(ushort);\n"
28302"uchar __ovld __cnfn convert_uchar_sat_rtn(ushort);\n"
28303"uchar __ovld __cnfn convert_uchar(ushort);\n"
28304"uchar __ovld __cnfn convert_uchar_sat(ushort);\n"
28305"uchar __ovld __cnfn convert_uchar_rte(int);\n"
28306"uchar __ovld __cnfn convert_uchar_sat_rte(int);\n"
28307"uchar __ovld __cnfn convert_uchar_rtz(int);\n"
28308"uchar __ovld __cnfn convert_uchar_sat_rtz(int);\n"
28309"uchar __ovld __cnfn convert_uchar_rtp(int);\n"
28310"uchar __ovld __cnfn convert_uchar_sat_rtp(int);\n"
28311"uchar __ovld __cnfn convert_uchar_rtn(int);\n"
28312"uchar __ovld __cnfn convert_uchar_sat_rtn(int);\n"
28313"uchar __ovld __cnfn convert_uchar(int);\n"
28314"uchar __ovld __cnfn convert_uchar_sat(int);\n"
28315"uchar __ovld __cnfn convert_uchar_rte(uint);\n"
28316"uchar __ovld __cnfn convert_uchar_sat_rte(uint);\n"
28317"uchar __ovld __cnfn convert_uchar_rtz(uint);\n"
28318"uchar __ovld __cnfn convert_uchar_sat_rtz(uint);\n"
28319"uchar __ovld __cnfn convert_uchar_rtp(uint);\n"
28320"uchar __ovld __cnfn convert_uchar_sat_rtp(uint);\n"
28321"uchar __ovld __cnfn convert_uchar_rtn(uint);\n"
28322"uchar __ovld __cnfn convert_uchar_sat_rtn(uint);\n"
28323"uchar __ovld __cnfn convert_uchar(uint);\n"
28324"uchar __ovld __cnfn convert_uchar_sat(uint);\n"
28325"uchar __ovld __cnfn convert_uchar_rte(long);\n"
28326"uchar __ovld __cnfn convert_uchar_sat_rte(long);\n"
28327"uchar __ovld __cnfn convert_uchar_rtz(long);\n"
28328"uchar __ovld __cnfn convert_uchar_sat_rtz(long);\n"
28329"uchar __ovld __cnfn convert_uchar_rtp(long);\n"
28330"uchar __ovld __cnfn convert_uchar_sat_rtp(long);\n"
28331"uchar __ovld __cnfn convert_uchar_rtn(long);\n"
28332"uchar __ovld __cnfn convert_uchar_sat_rtn(long);\n"
28333"uchar __ovld __cnfn convert_uchar(long);\n"
28334"uchar __ovld __cnfn convert_uchar_sat(long);\n"
28335"uchar __ovld __cnfn convert_uchar_rte(ulong);\n"
28336"uchar __ovld __cnfn convert_uchar_sat_rte(ulong);\n"
28337"uchar __ovld __cnfn convert_uchar_rtz(ulong);\n"
28338"uchar __ovld __cnfn convert_uchar_sat_rtz(ulong);\n"
28339"uchar __ovld __cnfn convert_uchar_rtp(ulong);\n"
28340"uchar __ovld __cnfn convert_uchar_sat_rtp(ulong);\n"
28341"uchar __ovld __cnfn convert_uchar_rtn(ulong);\n"
28342"uchar __ovld __cnfn convert_uchar_sat_rtn(ulong);\n"
28343"uchar __ovld __cnfn convert_uchar(ulong);\n"
28344"uchar __ovld __cnfn convert_uchar_sat(ulong);\n"
28345"uchar __ovld __cnfn convert_uchar_rte(float);\n"
28346"uchar __ovld __cnfn convert_uchar_sat_rte(float);\n"
28347"uchar __ovld __cnfn convert_uchar_rtz(float);\n"
28348"uchar __ovld __cnfn convert_uchar_sat_rtz(float);\n"
28349"uchar __ovld __cnfn convert_uchar_rtp(float);\n"
28350"uchar __ovld __cnfn convert_uchar_sat_rtp(float);\n"
28351"uchar __ovld __cnfn convert_uchar_rtn(float);\n"
28352"uchar __ovld __cnfn convert_uchar_sat_rtn(float);\n"
28353"uchar __ovld __cnfn convert_uchar(float);\n"
28354"uchar __ovld __cnfn convert_uchar_sat(float);\n"
28355"\n"
28356"short __ovld __cnfn convert_short_rte(char);\n"
28357"short __ovld __cnfn convert_short_sat_rte(char);\n"
28358"short __ovld __cnfn convert_short_rtz(char);\n"
28359"short __ovld __cnfn convert_short_sat_rtz(char);\n"
28360"short __ovld __cnfn convert_short_rtp(char);\n"
28361"short __ovld __cnfn convert_short_sat_rtp(char);\n"
28362"short __ovld __cnfn convert_short_rtn(char);\n"
28363"short __ovld __cnfn convert_short_sat_rtn(char);\n"
28364"short __ovld __cnfn convert_short(char);\n"
28365"short __ovld __cnfn convert_short_sat(char);\n"
28366"short __ovld __cnfn convert_short_rte(uchar);\n"
28367"short __ovld __cnfn convert_short_sat_rte(uchar);\n"
28368"short __ovld __cnfn convert_short_rtz(uchar);\n"
28369"short __ovld __cnfn convert_short_sat_rtz(uchar);\n"
28370"short __ovld __cnfn convert_short_rtp(uchar);\n"
28371"short __ovld __cnfn convert_short_sat_rtp(uchar);\n"
28372"short __ovld __cnfn convert_short_rtn(uchar);\n"
28373"short __ovld __cnfn convert_short_sat_rtn(uchar);\n"
28374"short __ovld __cnfn convert_short(uchar);\n"
28375"short __ovld __cnfn convert_short_sat(uchar);\n"
28376"short __ovld __cnfn convert_short_rte(short);\n"
28377"short __ovld __cnfn convert_short_sat_rte(short);\n"
28378"short __ovld __cnfn convert_short_rtz(short);\n"
28379"short __ovld __cnfn convert_short_sat_rtz(short);\n"
28380"short __ovld __cnfn convert_short_rtp(short);\n"
28381"short __ovld __cnfn convert_short_sat_rtp(short);\n"
28382"short __ovld __cnfn convert_short_rtn(short);\n"
28383"short __ovld __cnfn convert_short_sat_rtn(short);\n"
28384"short __ovld __cnfn convert_short(short);\n"
28385"short __ovld __cnfn convert_short_sat(short);\n"
28386"short __ovld __cnfn convert_short_rte(ushort);\n"
28387"short __ovld __cnfn convert_short_sat_rte(ushort);\n"
28388"short __ovld __cnfn convert_short_rtz(ushort);\n"
28389"short __ovld __cnfn convert_short_sat_rtz(ushort);\n"
28390"short __ovld __cnfn convert_short_rtp(ushort);\n"
28391"short __ovld __cnfn convert_short_sat_rtp(ushort);\n"
28392"short __ovld __cnfn convert_short_rtn(ushort);\n"
28393"short __ovld __cnfn convert_short_sat_rtn(ushort);\n"
28394"short __ovld __cnfn convert_short(ushort);\n"
28395"short __ovld __cnfn convert_short_sat(ushort);\n"
28396"short __ovld __cnfn convert_short_rte(int);\n"
28397"short __ovld __cnfn convert_short_sat_rte(int);\n"
28398"short __ovld __cnfn convert_short_rtz(int);\n"
28399"short __ovld __cnfn convert_short_sat_rtz(int);\n"
28400"short __ovld __cnfn convert_short_rtp(int);\n"
28401"short __ovld __cnfn convert_short_sat_rtp(int);\n"
28402"short __ovld __cnfn convert_short_rtn(int);\n"
28403"short __ovld __cnfn convert_short_sat_rtn(int);\n"
28404"short __ovld __cnfn convert_short(int);\n"
28405"short __ovld __cnfn convert_short_sat(int);\n"
28406"short __ovld __cnfn convert_short_rte(uint);\n"
28407"short __ovld __cnfn convert_short_sat_rte(uint);\n"
28408"short __ovld __cnfn convert_short_rtz(uint);\n"
28409"short __ovld __cnfn convert_short_sat_rtz(uint);\n"
28410"short __ovld __cnfn convert_short_rtp(uint);\n"
28411"short __ovld __cnfn convert_short_sat_rtp(uint);\n"
28412"short __ovld __cnfn convert_short_rtn(uint);\n"
28413"short __ovld __cnfn convert_short_sat_rtn(uint);\n"
28414"short __ovld __cnfn convert_short(uint);\n"
28415"short __ovld __cnfn convert_short_sat(uint);\n"
28416"short __ovld __cnfn convert_short_rte(long);\n"
28417"short __ovld __cnfn convert_short_sat_rte(long);\n"
28418"short __ovld __cnfn convert_short_rtz(long);\n"
28419"short __ovld __cnfn convert_short_sat_rtz(long);\n"
28420"short __ovld __cnfn convert_short_rtp(long);\n"
28421"short __ovld __cnfn convert_short_sat_rtp(long);\n"
28422"short __ovld __cnfn convert_short_rtn(long);\n"
28423"short __ovld __cnfn convert_short_sat_rtn(long);\n"
28424"short __ovld __cnfn convert_short(long);\n"
28425"short __ovld __cnfn convert_short_sat(long);\n"
28426"short __ovld __cnfn convert_short_rte(ulong);\n"
28427"short __ovld __cnfn convert_short_sat_rte(ulong);\n"
28428"short __ovld __cnfn convert_short_rtz(ulong);\n"
28429"short __ovld __cnfn convert_short_sat_rtz(ulong);\n"
28430"short __ovld __cnfn convert_short_rtp(ulong);\n"
28431"short __ovld __cnfn convert_short_sat_rtp(ulong);\n"
28432"short __ovld __cnfn convert_short_rtn(ulong);\n"
28433"short __ovld __cnfn convert_short_sat_rtn(ulong);\n"
28434"short __ovld __cnfn convert_short(ulong);\n"
28435"short __ovld __cnfn convert_short_sat(ulong);\n"
28436"short __ovld __cnfn convert_short_rte(float);\n"
28437"short __ovld __cnfn convert_short_sat_rte(float);\n"
28438"short __ovld __cnfn convert_short_rtz(float);\n"
28439"short __ovld __cnfn convert_short_sat_rtz(float);\n"
28440"short __ovld __cnfn convert_short_rtp(float);\n"
28441"short __ovld __cnfn convert_short_sat_rtp(float);\n"
28442"short __ovld __cnfn convert_short_rtn(float);\n"
28443"short __ovld __cnfn convert_short_sat_rtn(float);\n"
28444"short __ovld __cnfn convert_short(float);\n"
28445"short __ovld __cnfn convert_short_sat(float);\n"
28446"ushort __ovld __cnfn convert_ushort_rte(char);\n"
28447"ushort __ovld __cnfn convert_ushort_sat_rte(char);\n"
28448"ushort __ovld __cnfn convert_ushort_rtz(char);\n"
28449"ushort __ovld __cnfn convert_ushort_sat_rtz(char);\n"
28450"ushort __ovld __cnfn convert_ushort_rtp(char);\n"
28451"ushort __ovld __cnfn convert_ushort_sat_rtp(char);\n"
28452"ushort __ovld __cnfn convert_ushort_rtn(char);\n"
28453"ushort __ovld __cnfn convert_ushort_sat_rtn(char);\n"
28454"ushort __ovld __cnfn convert_ushort(char);\n"
28455"ushort __ovld __cnfn convert_ushort_sat(char);\n"
28456"ushort __ovld __cnfn convert_ushort_rte(uchar);\n"
28457"ushort __ovld __cnfn convert_ushort_sat_rte(uchar);\n"
28458"ushort __ovld __cnfn convert_ushort_rtz(uchar);\n"
28459"ushort __ovld __cnfn convert_ushort_sat_rtz(uchar);\n"
28460"ushort __ovld __cnfn convert_ushort_rtp(uchar);\n"
28461"ushort __ovld __cnfn convert_ushort_sat_rtp(uchar);\n"
28462"ushort __ovld __cnfn convert_ushort_rtn(uchar);\n"
28463"ushort __ovld __cnfn convert_ushort_sat_rtn(uchar);\n"
28464"ushort __ovld __cnfn convert_ushort(uchar);\n"
28465"ushort __ovld __cnfn convert_ushort_sat(uchar);\n"
28466"ushort __ovld __cnfn convert_ushort_rte(short);\n"
28467"ushort __ovld __cnfn convert_ushort_sat_rte(short);\n"
28468"ushort __ovld __cnfn convert_ushort_rtz(short);\n"
28469"ushort __ovld __cnfn convert_ushort_sat_rtz(short);\n"
28470"ushort __ovld __cnfn convert_ushort_rtp(short);\n"
28471"ushort __ovld __cnfn convert_ushort_sat_rtp(short);\n"
28472"ushort __ovld __cnfn convert_ushort_rtn(short);\n"
28473"ushort __ovld __cnfn convert_ushort_sat_rtn(short);\n"
28474"ushort __ovld __cnfn convert_ushort(short);\n"
28475"ushort __ovld __cnfn convert_ushort_sat(short);\n"
28476"ushort __ovld __cnfn convert_ushort_rte(ushort);\n"
28477"ushort __ovld __cnfn convert_ushort_sat_rte(ushort);\n"
28478"ushort __ovld __cnfn convert_ushort_rtz(ushort);\n"
28479"ushort __ovld __cnfn convert_ushort_sat_rtz(ushort);\n"
28480"ushort __ovld __cnfn convert_ushort_rtp(ushort);\n"
28481"ushort __ovld __cnfn convert_ushort_sat_rtp(ushort);\n"
28482"ushort __ovld __cnfn convert_ushort_rtn(ushort);\n"
28483"ushort __ovld __cnfn convert_ushort_sat_rtn(ushort);\n"
28484"ushort __ovld __cnfn convert_ushort(ushort);\n"
28485"ushort __ovld __cnfn convert_ushort_sat(ushort);\n"
28486"ushort __ovld __cnfn convert_ushort_rte(int);\n"
28487"ushort __ovld __cnfn convert_ushort_sat_rte(int);\n"
28488"ushort __ovld __cnfn convert_ushort_rtz(int);\n"
28489"ushort __ovld __cnfn convert_ushort_sat_rtz(int);\n"
28490"ushort __ovld __cnfn convert_ushort_rtp(int);\n"
28491"ushort __ovld __cnfn convert_ushort_sat_rtp(int);\n"
28492"ushort __ovld __cnfn convert_ushort_rtn(int);\n"
28493"ushort __ovld __cnfn convert_ushort_sat_rtn(int);\n"
28494"ushort __ovld __cnfn convert_ushort(int);\n"
28495"ushort __ovld __cnfn convert_ushort_sat(int);\n"
28496"ushort __ovld __cnfn convert_ushort_rte(uint);\n"
28497"ushort __ovld __cnfn convert_ushort_sat_rte(uint);\n"
28498"ushort __ovld __cnfn convert_ushort_rtz(uint);\n"
28499"ushort __ovld __cnfn convert_ushort_sat_rtz(uint);\n"
28500"ushort __ovld __cnfn convert_ushort_rtp(uint);\n"
28501"ushort __ovld __cnfn convert_ushort_sat_rtp(uint);\n"
28502"ushort __ovld __cnfn convert_ushort_rtn(uint);\n"
28503"ushort __ovld __cnfn convert_ushort_sat_rtn(uint);\n"
28504"ushort __ovld __cnfn convert_ushort(uint);\n"
28505"ushort __ovld __cnfn convert_ushort_sat(uint);\n"
28506"ushort __ovld __cnfn convert_ushort_rte(long);\n"
28507"ushort __ovld __cnfn convert_ushort_sat_rte(long);\n"
28508"ushort __ovld __cnfn convert_ushort_rtz(long);\n"
28509"ushort __ovld __cnfn convert_ushort_sat_rtz(long);\n"
28510"ushort __ovld __cnfn convert_ushort_rtp(long);\n"
28511"ushort __ovld __cnfn convert_ushort_sat_rtp(long);\n"
28512"ushort __ovld __cnfn convert_ushort_rtn(long);\n"
28513"ushort __ovld __cnfn convert_ushort_sat_rtn(long);\n"
28514"ushort __ovld __cnfn convert_ushort(long);\n"
28515"ushort __ovld __cnfn convert_ushort_sat(long);\n"
28516"ushort __ovld __cnfn convert_ushort_rte(ulong);\n"
28517"ushort __ovld __cnfn convert_ushort_sat_rte(ulong);\n"
28518"ushort __ovld __cnfn convert_ushort_rtz(ulong);\n"
28519"ushort __ovld __cnfn convert_ushort_sat_rtz(ulong);\n"
28520"ushort __ovld __cnfn convert_ushort_rtp(ulong);\n"
28521"ushort __ovld __cnfn convert_ushort_sat_rtp(ulong);\n"
28522"ushort __ovld __cnfn convert_ushort_rtn(ulong);\n"
28523"ushort __ovld __cnfn convert_ushort_sat_rtn(ulong);\n"
28524"ushort __ovld __cnfn convert_ushort(ulong);\n"
28525"ushort __ovld __cnfn convert_ushort_sat(ulong);\n"
28526"ushort __ovld __cnfn convert_ushort_rte(float);\n"
28527"ushort __ovld __cnfn convert_ushort_sat_rte(float);\n"
28528"ushort __ovld __cnfn convert_ushort_rtz(float);\n"
28529"ushort __ovld __cnfn convert_ushort_sat_rtz(float);\n"
28530"ushort __ovld __cnfn convert_ushort_rtp(float);\n"
28531"ushort __ovld __cnfn convert_ushort_sat_rtp(float);\n"
28532"ushort __ovld __cnfn convert_ushort_rtn(float);\n"
28533"ushort __ovld __cnfn convert_ushort_sat_rtn(float);\n"
28534"ushort __ovld __cnfn convert_ushort(float);\n"
28535"ushort __ovld __cnfn convert_ushort_sat(float);\n"
28536"int __ovld __cnfn convert_int_rte(char);\n"
28537"int __ovld __cnfn convert_int_sat_rte(char);\n"
28538"int __ovld __cnfn convert_int_rtz(char);\n"
28539"int __ovld __cnfn convert_int_sat_rtz(char);\n"
28540"int __ovld __cnfn convert_int_rtp(char);\n"
28541"int __ovld __cnfn convert_int_sat_rtp(char);\n"
28542"int __ovld __cnfn convert_int_rtn(char);\n"
28543"int __ovld __cnfn convert_int_sat_rtn(char);\n"
28544"int __ovld __cnfn convert_int(char);\n"
28545"int __ovld __cnfn convert_int_sat(char);\n"
28546"int __ovld __cnfn convert_int_rte(uchar);\n"
28547"int __ovld __cnfn convert_int_sat_rte(uchar);\n"
28548"int __ovld __cnfn convert_int_rtz(uchar);\n"
28549"int __ovld __cnfn convert_int_sat_rtz(uchar);\n"
28550"int __ovld __cnfn convert_int_rtp(uchar);\n"
28551"int __ovld __cnfn convert_int_sat_rtp(uchar);\n"
28552"int __ovld __cnfn convert_int_rtn(uchar);\n"
28553"int __ovld __cnfn convert_int_sat_rtn(uchar);\n"
28554"int __ovld __cnfn convert_int(uchar);\n"
28555"int __ovld __cnfn convert_int_sat(uchar);\n"
28556"int __ovld __cnfn convert_int_rte(short);\n"
28557"int __ovld __cnfn convert_int_sat_rte(short);\n"
28558"int __ovld __cnfn convert_int_rtz(short);\n"
28559"int __ovld __cnfn convert_int_sat_rtz(short);\n"
28560"int __ovld __cnfn convert_int_rtp(short);\n"
28561"int __ovld __cnfn convert_int_sat_rtp(short);\n"
28562"int __ovld __cnfn convert_int_rtn(short);\n"
28563"int __ovld __cnfn convert_int_sat_rtn(short);\n"
28564"int __ovld __cnfn convert_int(short);\n"
28565"int __ovld __cnfn convert_int_sat(short);\n"
28566"int __ovld __cnfn convert_int_rte(ushort);\n"
28567"int __ovld __cnfn convert_int_sat_rte(ushort);\n"
28568"int __ovld __cnfn convert_int_rtz(ushort);\n"
28569"int __ovld __cnfn convert_int_sat_rtz(ushort);\n"
28570"int __ovld __cnfn convert_int_rtp(ushort);\n"
28571"int __ovld __cnfn convert_int_sat_rtp(ushort);\n"
28572"int __ovld __cnfn convert_int_rtn(ushort);\n"
28573"int __ovld __cnfn convert_int_sat_rtn(ushort);\n"
28574"int __ovld __cnfn convert_int(ushort);\n"
28575"int __ovld __cnfn convert_int_sat(ushort);\n"
28576"int __ovld __cnfn convert_int_rte(int);\n"
28577"int __ovld __cnfn convert_int_sat_rte(int);\n"
28578"int __ovld __cnfn convert_int_rtz(int);\n"
28579"int __ovld __cnfn convert_int_sat_rtz(int);\n"
28580"int __ovld __cnfn convert_int_rtp(int);\n"
28581"int __ovld __cnfn convert_int_sat_rtp(int);\n"
28582"int __ovld __cnfn convert_int_rtn(int);\n"
28583"int __ovld __cnfn convert_int_sat_rtn(int);\n"
28584"int __ovld __cnfn convert_int(int);\n"
28585"int __ovld __cnfn convert_int_sat(int);\n"
28586"int __ovld __cnfn convert_int_rte(uint);\n"
28587"int __ovld __cnfn convert_int_sat_rte(uint);\n"
28588"int __ovld __cnfn convert_int_rtz(uint);\n"
28589"int __ovld __cnfn convert_int_sat_rtz(uint);\n"
28590"int __ovld __cnfn convert_int_rtp(uint);\n"
28591"int __ovld __cnfn convert_int_sat_rtp(uint);\n"
28592"int __ovld __cnfn convert_int_rtn(uint);\n"
28593"int __ovld __cnfn convert_int_sat_rtn(uint);\n"
28594"int __ovld __cnfn convert_int(uint);\n"
28595"int __ovld __cnfn convert_int_sat(uint);\n"
28596"int __ovld __cnfn convert_int_rte(long);\n"
28597"int __ovld __cnfn convert_int_sat_rte(long);\n"
28598"int __ovld __cnfn convert_int_rtz(long);\n"
28599"int __ovld __cnfn convert_int_sat_rtz(long);\n"
28600"int __ovld __cnfn convert_int_rtp(long);\n"
28601"int __ovld __cnfn convert_int_sat_rtp(long);\n"
28602"int __ovld __cnfn convert_int_rtn(long);\n"
28603"int __ovld __cnfn convert_int_sat_rtn(long);\n"
28604"int __ovld __cnfn convert_int(long);\n"
28605"int __ovld __cnfn convert_int_sat(long);\n"
28606"int __ovld __cnfn convert_int_rte(ulong);\n"
28607"int __ovld __cnfn convert_int_sat_rte(ulong);\n"
28608"int __ovld __cnfn convert_int_rtz(ulong);\n"
28609"int __ovld __cnfn convert_int_sat_rtz(ulong);\n"
28610"int __ovld __cnfn convert_int_rtp(ulong);\n"
28611"int __ovld __cnfn convert_int_sat_rtp(ulong);\n"
28612"int __ovld __cnfn convert_int_rtn(ulong);\n"
28613"int __ovld __cnfn convert_int_sat_rtn(ulong);\n"
28614"int __ovld __cnfn convert_int(ulong);\n"
28615"int __ovld __cnfn convert_int_sat(ulong);\n"
28616"int __ovld __cnfn convert_int_rte(float);\n"
28617"int __ovld __cnfn convert_int_sat_rte(float);\n"
28618"int __ovld __cnfn convert_int_rtz(float);\n"
28619"int __ovld __cnfn convert_int_sat_rtz(float);\n"
28620"int __ovld __cnfn convert_int_rtp(float);\n"
28621"int __ovld __cnfn convert_int_sat_rtp(float);\n"
28622"int __ovld __cnfn convert_int_rtn(float);\n"
28623"int __ovld __cnfn convert_int_sat_rtn(float);\n"
28624"int __ovld __cnfn convert_int(float);\n"
28625"int __ovld __cnfn convert_int_sat(float);\n"
28626"uint __ovld __cnfn convert_uint_rte(char);\n"
28627"uint __ovld __cnfn convert_uint_sat_rte(char);\n"
28628"uint __ovld __cnfn convert_uint_rtz(char);\n"
28629"uint __ovld __cnfn convert_uint_sat_rtz(char);\n"
28630"uint __ovld __cnfn convert_uint_rtp(char);\n"
28631"uint __ovld __cnfn convert_uint_sat_rtp(char);\n"
28632"uint __ovld __cnfn convert_uint_rtn(char);\n"
28633"uint __ovld __cnfn convert_uint_sat_rtn(char);\n"
28634"uint __ovld __cnfn convert_uint(char);\n"
28635"uint __ovld __cnfn convert_uint_sat(char);\n"
28636"uint __ovld __cnfn convert_uint_rte(uchar);\n"
28637"uint __ovld __cnfn convert_uint_sat_rte(uchar);\n"
28638"uint __ovld __cnfn convert_uint_rtz(uchar);\n"
28639"uint __ovld __cnfn convert_uint_sat_rtz(uchar);\n"
28640"uint __ovld __cnfn convert_uint_rtp(uchar);\n"
28641"uint __ovld __cnfn convert_uint_sat_rtp(uchar);\n"
28642"uint __ovld __cnfn convert_uint_rtn(uchar);\n"
28643"uint __ovld __cnfn convert_uint_sat_rtn(uchar);\n"
28644"uint __ovld __cnfn convert_uint(uchar);\n"
28645"uint __ovld __cnfn convert_uint_sat(uchar);\n"
28646"uint __ovld __cnfn convert_uint_rte(short);\n"
28647"uint __ovld __cnfn convert_uint_sat_rte(short);\n"
28648"uint __ovld __cnfn convert_uint_rtz(short);\n"
28649"uint __ovld __cnfn convert_uint_sat_rtz(short);\n"
28650"uint __ovld __cnfn convert_uint_rtp(short);\n"
28651"uint __ovld __cnfn convert_uint_sat_rtp(short);\n"
28652"uint __ovld __cnfn convert_uint_rtn(short);\n"
28653"uint __ovld __cnfn convert_uint_sat_rtn(short);\n"
28654"uint __ovld __cnfn convert_uint(short);\n"
28655"uint __ovld __cnfn convert_uint_sat(short);\n"
28656"uint __ovld __cnfn convert_uint_rte(ushort);\n"
28657"uint __ovld __cnfn convert_uint_sat_rte(ushort);\n"
28658"uint __ovld __cnfn convert_uint_rtz(ushort);\n"
28659"uint __ovld __cnfn convert_uint_sat_rtz(ushort);\n"
28660"uint __ovld __cnfn convert_uint_rtp(ushort);\n"
28661"uint __ovld __cnfn convert_uint_sat_rtp(ushort);\n"
28662"uint __ovld __cnfn convert_uint_rtn(ushort);\n"
28663"uint __ovld __cnfn convert_uint_sat_rtn(ushort);\n"
28664"uint __ovld __cnfn convert_uint(ushort);\n"
28665"uint __ovld __cnfn convert_uint_sat(ushort);\n"
28666"uint __ovld __cnfn convert_uint_rte(int);\n"
28667"uint __ovld __cnfn convert_uint_sat_rte(int);\n"
28668"uint __ovld __cnfn convert_uint_rtz(int);\n"
28669"uint __ovld __cnfn convert_uint_sat_rtz(int);\n"
28670"uint __ovld __cnfn convert_uint_rtp(int);\n"
28671"uint __ovld __cnfn convert_uint_sat_rtp(int);\n"
28672"uint __ovld __cnfn convert_uint_rtn(int);\n"
28673"uint __ovld __cnfn convert_uint_sat_rtn(int);\n"
28674"uint __ovld __cnfn convert_uint(int);\n"
28675"uint __ovld __cnfn convert_uint_sat(int);\n"
28676"uint __ovld __cnfn convert_uint_rte(uint);\n"
28677"uint __ovld __cnfn convert_uint_sat_rte(uint);\n"
28678"uint __ovld __cnfn convert_uint_rtz(uint);\n"
28679"uint __ovld __cnfn convert_uint_sat_rtz(uint);\n"
28680"uint __ovld __cnfn convert_uint_rtp(uint);\n"
28681"uint __ovld __cnfn convert_uint_sat_rtp(uint);\n"
28682"uint __ovld __cnfn convert_uint_rtn(uint);\n"
28683"uint __ovld __cnfn convert_uint_sat_rtn(uint);\n"
28684"uint __ovld __cnfn convert_uint(uint);\n"
28685"uint __ovld __cnfn convert_uint_sat(uint);\n"
28686"uint __ovld __cnfn convert_uint_rte(long);\n"
28687"uint __ovld __cnfn convert_uint_sat_rte(long);\n"
28688"uint __ovld __cnfn convert_uint_rtz(long);\n"
28689"uint __ovld __cnfn convert_uint_sat_rtz(long);\n"
28690"uint __ovld __cnfn convert_uint_rtp(long);\n"
28691"uint __ovld __cnfn convert_uint_sat_rtp(long);\n"
28692"uint __ovld __cnfn convert_uint_rtn(long);\n"
28693"uint __ovld __cnfn convert_uint_sat_rtn(long);\n"
28694"uint __ovld __cnfn convert_uint(long);\n"
28695"uint __ovld __cnfn convert_uint_sat(long);\n"
28696"uint __ovld __cnfn convert_uint_rte(ulong);\n"
28697"uint __ovld __cnfn convert_uint_sat_rte(ulong);\n"
28698"uint __ovld __cnfn convert_uint_rtz(ulong);\n"
28699"uint __ovld __cnfn convert_uint_sat_rtz(ulong);\n"
28700"uint __ovld __cnfn convert_uint_rtp(ulong);\n"
28701"uint __ovld __cnfn convert_uint_sat_rtp(ulong);\n"
28702"uint __ovld __cnfn convert_uint_rtn(ulong);\n"
28703"uint __ovld __cnfn convert_uint_sat_rtn(ulong);\n"
28704"uint __ovld __cnfn convert_uint(ulong);\n"
28705"uint __ovld __cnfn convert_uint_sat(ulong);\n"
28706"uint __ovld __cnfn convert_uint_rte(float);\n"
28707"uint __ovld __cnfn convert_uint_sat_rte(float);\n"
28708"uint __ovld __cnfn convert_uint_rtz(float);\n"
28709"uint __ovld __cnfn convert_uint_sat_rtz(float);\n"
28710"uint __ovld __cnfn convert_uint_rtp(float);\n"
28711"uint __ovld __cnfn convert_uint_sat_rtp(float);\n"
28712"uint __ovld __cnfn convert_uint_rtn(float);\n"
28713"uint __ovld __cnfn convert_uint_sat_rtn(float);\n"
28714"uint __ovld __cnfn convert_uint(float);\n"
28715"uint __ovld __cnfn convert_uint_sat(float);\n"
28716"long __ovld __cnfn convert_long_rte(char);\n"
28717"long __ovld __cnfn convert_long_sat_rte(char);\n"
28718"long __ovld __cnfn convert_long_rtz(char);\n"
28719"long __ovld __cnfn convert_long_sat_rtz(char);\n"
28720"long __ovld __cnfn convert_long_rtp(char);\n"
28721"long __ovld __cnfn convert_long_sat_rtp(char);\n"
28722"long __ovld __cnfn convert_long_rtn(char);\n"
28723"long __ovld __cnfn convert_long_sat_rtn(char);\n"
28724"long __ovld __cnfn convert_long(char);\n"
28725"long __ovld __cnfn convert_long_sat(char);\n"
28726"long __ovld __cnfn convert_long_rte(uchar);\n"
28727"long __ovld __cnfn convert_long_sat_rte(uchar);\n"
28728"long __ovld __cnfn convert_long_rtz(uchar);\n"
28729"long __ovld __cnfn convert_long_sat_rtz(uchar);\n"
28730"long __ovld __cnfn convert_long_rtp(uchar);\n"
28731"long __ovld __cnfn convert_long_sat_rtp(uchar);\n"
28732"long __ovld __cnfn convert_long_rtn(uchar);\n"
28733"long __ovld __cnfn convert_long_sat_rtn(uchar);\n"
28734"long __ovld __cnfn convert_long(uchar);\n"
28735"long __ovld __cnfn convert_long_sat(uchar);\n"
28736"long __ovld __cnfn convert_long_rte(short);\n"
28737"long __ovld __cnfn convert_long_sat_rte(short);\n"
28738"long __ovld __cnfn convert_long_rtz(short);\n"
28739"long __ovld __cnfn convert_long_sat_rtz(short);\n"
28740"long __ovld __cnfn convert_long_rtp(short);\n"
28741"long __ovld __cnfn convert_long_sat_rtp(short);\n"
28742"long __ovld __cnfn convert_long_rtn(short);\n"
28743"long __ovld __cnfn convert_long_sat_rtn(short);\n"
28744"long __ovld __cnfn convert_long(short);\n"
28745"long __ovld __cnfn convert_long_sat(short);\n"
28746"long __ovld __cnfn convert_long_rte(ushort);\n"
28747"long __ovld __cnfn convert_long_sat_rte(ushort);\n"
28748"long __ovld __cnfn convert_long_rtz(ushort);\n"
28749"long __ovld __cnfn convert_long_sat_rtz(ushort);\n"
28750"long __ovld __cnfn convert_long_rtp(ushort);\n"
28751"long __ovld __cnfn convert_long_sat_rtp(ushort);\n"
28752"long __ovld __cnfn convert_long_rtn(ushort);\n"
28753"long __ovld __cnfn convert_long_sat_rtn(ushort);\n"
28754"long __ovld __cnfn convert_long(ushort);\n"
28755"long __ovld __cnfn convert_long_sat(ushort);\n"
28756"long __ovld __cnfn convert_long_rte(int);\n"
28757"long __ovld __cnfn convert_long_sat_rte(int);\n"
28758"long __ovld __cnfn convert_long_rtz(int);\n"
28759"long __ovld __cnfn convert_long_sat_rtz(int);\n"
28760"long __ovld __cnfn convert_long_rtp(int);\n"
28761"long __ovld __cnfn convert_long_sat_rtp(int);\n"
28762"long __ovld __cnfn convert_long_rtn(int);\n"
28763"long __ovld __cnfn convert_long_sat_rtn(int);\n"
28764"long __ovld __cnfn convert_long(int);\n"
28765"long __ovld __cnfn convert_long_sat(int);\n"
28766"long __ovld __cnfn convert_long_rte(uint);\n"
28767"long __ovld __cnfn convert_long_sat_rte(uint);\n"
28768"long __ovld __cnfn convert_long_rtz(uint);\n"
28769"long __ovld __cnfn convert_long_sat_rtz(uint);\n"
28770"long __ovld __cnfn convert_long_rtp(uint);\n"
28771"long __ovld __cnfn convert_long_sat_rtp(uint);\n"
28772"long __ovld __cnfn convert_long_rtn(uint);\n"
28773"long __ovld __cnfn convert_long_sat_rtn(uint);\n"
28774"long __ovld __cnfn convert_long(uint);\n"
28775"long __ovld __cnfn convert_long_sat(uint);\n"
28776"long __ovld __cnfn convert_long_rte(long);\n"
28777"long __ovld __cnfn convert_long_sat_rte(long);\n"
28778"long __ovld __cnfn convert_long_rtz(long);\n"
28779"long __ovld __cnfn convert_long_sat_rtz(long);\n"
28780"long __ovld __cnfn convert_long_rtp(long);\n"
28781"long __ovld __cnfn convert_long_sat_rtp(long);\n"
28782"long __ovld __cnfn convert_long_rtn(long);\n"
28783"long __ovld __cnfn convert_long_sat_rtn(long);\n"
28784"long __ovld __cnfn convert_long(long);\n"
28785"long __ovld __cnfn convert_long_sat(long);\n"
28786"long __ovld __cnfn convert_long_rte(ulong);\n"
28787"long __ovld __cnfn convert_long_sat_rte(ulong);\n"
28788"long __ovld __cnfn convert_long_rtz(ulong);\n"
28789"long __ovld __cnfn convert_long_sat_rtz(ulong);\n"
28790"long __ovld __cnfn convert_long_rtp(ulong);\n"
28791"long __ovld __cnfn convert_long_sat_rtp(ulong);\n"
28792"long __ovld __cnfn convert_long_rtn(ulong);\n"
28793"long __ovld __cnfn convert_long_sat_rtn(ulong);\n"
28794"long __ovld __cnfn convert_long(ulong);\n"
28795"long __ovld __cnfn convert_long_sat(ulong);\n"
28796"long __ovld __cnfn convert_long_rte(float);\n"
28797"long __ovld __cnfn convert_long_sat_rte(float);\n"
28798"long __ovld __cnfn convert_long_rtz(float);\n"
28799"long __ovld __cnfn convert_long_sat_rtz(float);\n"
28800"long __ovld __cnfn convert_long_rtp(float);\n"
28801"long __ovld __cnfn convert_long_sat_rtp(float);\n"
28802"long __ovld __cnfn convert_long_rtn(float);\n"
28803"long __ovld __cnfn convert_long_sat_rtn(float);\n"
28804"long __ovld __cnfn convert_long(float);\n"
28805"long __ovld __cnfn convert_long_sat(float);\n"
28806"ulong __ovld __cnfn convert_ulong_rte(char);\n"
28807"ulong __ovld __cnfn convert_ulong_sat_rte(char);\n"
28808"ulong __ovld __cnfn convert_ulong_rtz(char);\n"
28809"ulong __ovld __cnfn convert_ulong_sat_rtz(char);\n"
28810"ulong __ovld __cnfn convert_ulong_rtp(char);\n"
28811"ulong __ovld __cnfn convert_ulong_sat_rtp(char);\n"
28812"ulong __ovld __cnfn convert_ulong_rtn(char);\n"
28813"ulong __ovld __cnfn convert_ulong_sat_rtn(char);\n"
28814"ulong __ovld __cnfn convert_ulong(char);\n"
28815"ulong __ovld __cnfn convert_ulong_sat(char);\n"
28816"ulong __ovld __cnfn convert_ulong_rte(uchar);\n"
28817"ulong __ovld __cnfn convert_ulong_sat_rte(uchar);\n"
28818"ulong __ovld __cnfn convert_ulong_rtz(uchar);\n"
28819"ulong __ovld __cnfn convert_ulong_sat_rtz(uchar);\n"
28820"ulong __ovld __cnfn convert_ulong_rtp(uchar);\n"
28821"ulong __ovld __cnfn convert_ulong_sat_rtp(uchar);\n"
28822"ulong __ovld __cnfn convert_ulong_rtn(uchar);\n"
28823"ulong __ovld __cnfn convert_ulong_sat_rtn(uchar);\n"
28824"ulong __ovld __cnfn convert_ulong(uchar);\n"
28825"ulong __ovld __cnfn convert_ulong_sat(uchar);\n"
28826"ulong __ovld __cnfn convert_ulong_rte(short);\n"
28827"ulong __ovld __cnfn convert_ulong_sat_rte(short);\n"
28828"ulong __ovld __cnfn convert_ulong_rtz(short);\n"
28829"ulong __ovld __cnfn convert_ulong_sat_rtz(short);\n"
28830"ulong __ovld __cnfn convert_ulong_rtp(short);\n"
28831"ulong __ovld __cnfn convert_ulong_sat_rtp(short);\n"
28832"ulong __ovld __cnfn convert_ulong_rtn(short);\n"
28833"ulong __ovld __cnfn convert_ulong_sat_rtn(short);\n"
28834"ulong __ovld __cnfn convert_ulong(short);\n"
28835"ulong __ovld __cnfn convert_ulong_sat(short);\n"
28836"ulong __ovld __cnfn convert_ulong_rte(ushort);\n"
28837"ulong __ovld __cnfn convert_ulong_sat_rte(ushort);\n"
28838"ulong __ovld __cnfn convert_ulong_rtz(ushort);\n"
28839"ulong __ovld __cnfn convert_ulong_sat_rtz(ushort);\n"
28840"ulong __ovld __cnfn convert_ulong_rtp(ushort);\n"
28841"ulong __ovld __cnfn convert_ulong_sat_rtp(ushort);\n"
28842"ulong __ovld __cnfn convert_ulong_rtn(ushort);\n"
28843"ulong __ovld __cnfn convert_ulong_sat_rtn(ushort);\n"
28844"ulong __ovld __cnfn convert_ulong(ushort);\n"
28845"ulong __ovld __cnfn convert_ulong_sat(ushort);\n"
28846"ulong __ovld __cnfn convert_ulong_rte(int);\n"
28847"ulong __ovld __cnfn convert_ulong_sat_rte(int);\n"
28848"ulong __ovld __cnfn convert_ulong_rtz(int);\n"
28849"ulong __ovld __cnfn convert_ulong_sat_rtz(int);\n"
28850"ulong __ovld __cnfn convert_ulong_rtp(int);\n"
28851"ulong __ovld __cnfn convert_ulong_sat_rtp(int);\n"
28852"ulong __ovld __cnfn convert_ulong_rtn(int);\n"
28853"ulong __ovld __cnfn convert_ulong_sat_rtn(int);\n"
28854"ulong __ovld __cnfn convert_ulong(int);\n"
28855"ulong __ovld __cnfn convert_ulong_sat(int);\n"
28856"ulong __ovld __cnfn convert_ulong_rte(uint);\n"
28857"ulong __ovld __cnfn convert_ulong_sat_rte(uint);\n"
28858"ulong __ovld __cnfn convert_ulong_rtz(uint);\n"
28859"ulong __ovld __cnfn convert_ulong_sat_rtz(uint);\n"
28860"ulong __ovld __cnfn convert_ulong_rtp(uint);\n"
28861"ulong __ovld __cnfn convert_ulong_sat_rtp(uint);\n"
28862"ulong __ovld __cnfn convert_ulong_rtn(uint);\n"
28863"ulong __ovld __cnfn convert_ulong_sat_rtn(uint);\n"
28864"ulong __ovld __cnfn convert_ulong(uint);\n"
28865"ulong __ovld __cnfn convert_ulong_sat(uint);\n"
28866"ulong __ovld __cnfn convert_ulong_rte(long);\n"
28867"ulong __ovld __cnfn convert_ulong_sat_rte(long);\n"
28868"ulong __ovld __cnfn convert_ulong_rtz(long);\n"
28869"ulong __ovld __cnfn convert_ulong_sat_rtz(long);\n"
28870"ulong __ovld __cnfn convert_ulong_rtp(long);\n"
28871"ulong __ovld __cnfn convert_ulong_sat_rtp(long);\n"
28872"ulong __ovld __cnfn convert_ulong_rtn(long);\n"
28873"ulong __ovld __cnfn convert_ulong_sat_rtn(long);\n"
28874"ulong __ovld __cnfn convert_ulong(long);\n"
28875"ulong __ovld __cnfn convert_ulong_sat(long);\n"
28876"ulong __ovld __cnfn convert_ulong_rte(ulong);\n"
28877"ulong __ovld __cnfn convert_ulong_sat_rte(ulong);\n"
28878"ulong __ovld __cnfn convert_ulong_rtz(ulong);\n"
28879"ulong __ovld __cnfn convert_ulong_sat_rtz(ulong);\n"
28880"ulong __ovld __cnfn convert_ulong_rtp(ulong);\n"
28881"ulong __ovld __cnfn convert_ulong_sat_rtp(ulong);\n"
28882"ulong __ovld __cnfn convert_ulong_rtn(ulong);\n"
28883"ulong __ovld __cnfn convert_ulong_sat_rtn(ulong);\n"
28884"ulong __ovld __cnfn convert_ulong(ulong);\n"
28885"ulong __ovld __cnfn convert_ulong_sat(ulong);\n"
28886"ulong __ovld __cnfn convert_ulong_rte(float);\n"
28887"ulong __ovld __cnfn convert_ulong_sat_rte(float);\n"
28888"ulong __ovld __cnfn convert_ulong_rtz(float);\n"
28889"ulong __ovld __cnfn convert_ulong_sat_rtz(float);\n"
28890"ulong __ovld __cnfn convert_ulong_rtp(float);\n"
28891"ulong __ovld __cnfn convert_ulong_sat_rtp(float);\n"
28892"ulong __ovld __cnfn convert_ulong_rtn(float);\n"
28893"ulong __ovld __cnfn convert_ulong_sat_rtn(float);\n"
28894"ulong __ovld __cnfn convert_ulong(float);\n"
28895"ulong __ovld __cnfn convert_ulong_sat(float);\n"
28896"float __ovld __cnfn convert_float_rte(char);\n"
28897"float __ovld __cnfn convert_float_rtz(char);\n"
28898"float __ovld __cnfn convert_float_rtp(char);\n"
28899"float __ovld __cnfn convert_float_rtn(char);\n"
28900"float __ovld __cnfn convert_float(char);\n"
28901"float __ovld __cnfn convert_float_rte(uchar);\n"
28902"float __ovld __cnfn convert_float_rtz(uchar);\n"
28903"float __ovld __cnfn convert_float_rtp(uchar);\n"
28904"float __ovld __cnfn convert_float_rtn(uchar);\n"
28905"float __ovld __cnfn convert_float(uchar);\n"
28906"float __ovld __cnfn convert_float_rte(short);\n"
28907"float __ovld __cnfn convert_float_rtz(short);\n"
28908"float __ovld __cnfn convert_float_rtp(short);\n"
28909"float __ovld __cnfn convert_float_rtn(short);\n"
28910"float __ovld __cnfn convert_float(short);\n"
28911"float __ovld __cnfn convert_float_rte(ushort);\n"
28912"float __ovld __cnfn convert_float_rtz(ushort);\n"
28913"float __ovld __cnfn convert_float_rtp(ushort);\n"
28914"float __ovld __cnfn convert_float_rtn(ushort);\n"
28915"float __ovld __cnfn convert_float(ushort);\n"
28916"float __ovld __cnfn convert_float_rte(int);\n"
28917"float __ovld __cnfn convert_float_rtz(int);\n"
28918"float __ovld __cnfn convert_float_rtp(int);\n"
28919"float __ovld __cnfn convert_float_rtn(int);\n"
28920"float __ovld __cnfn convert_float(int);\n"
28921"float __ovld __cnfn convert_float_rte(uint);\n"
28922"float __ovld __cnfn convert_float_rtz(uint);\n"
28923"float __ovld __cnfn convert_float_rtp(uint);\n"
28924"float __ovld __cnfn convert_float_rtn(uint);\n"
28925"float __ovld __cnfn convert_float(uint);\n"
28926"float __ovld __cnfn convert_float_rte(long);\n"
28927"float __ovld __cnfn convert_float_rtz(long);\n"
28928"float __ovld __cnfn convert_float_rtp(long);\n"
28929"float __ovld __cnfn convert_float_rtn(long);\n"
28930"float __ovld __cnfn convert_float(long);\n"
28931"float __ovld __cnfn convert_float_rte(ulong);\n"
28932"float __ovld __cnfn convert_float_rtz(ulong);\n"
28933"float __ovld __cnfn convert_float_rtp(ulong);\n"
28934"float __ovld __cnfn convert_float_rtn(ulong);\n"
28935"float __ovld __cnfn convert_float(ulong);\n"
28936"float __ovld __cnfn convert_float_rte(float);\n"
28937"float __ovld __cnfn convert_float_rtz(float);\n"
28938"float __ovld __cnfn convert_float_rtp(float);\n"
28939"float __ovld __cnfn convert_float_rtn(float);\n"
28940"float __ovld __cnfn convert_float(float);\n"
28941"char2 __ovld __cnfn convert_char2_rte(char2);\n"
28942"char2 __ovld __cnfn convert_char2_sat_rte(char2);\n"
28943"char2 __ovld __cnfn convert_char2_rtz(char2);\n"
28944"char2 __ovld __cnfn convert_char2_sat_rtz(char2);\n"
28945"char2 __ovld __cnfn convert_char2_rtp(char2);\n"
28946"char2 __ovld __cnfn convert_char2_sat_rtp(char2);\n"
28947"char2 __ovld __cnfn convert_char2_rtn(char2);\n"
28948"char2 __ovld __cnfn convert_char2_sat_rtn(char2);\n"
28949"char2 __ovld __cnfn convert_char2(char2);\n"
28950"char2 __ovld __cnfn convert_char2_sat(char2);\n"
28951"char2 __ovld __cnfn convert_char2_rte(uchar2);\n"
28952"char2 __ovld __cnfn convert_char2_sat_rte(uchar2);\n"
28953"char2 __ovld __cnfn convert_char2_rtz(uchar2);\n"
28954"char2 __ovld __cnfn convert_char2_sat_rtz(uchar2);\n"
28955"char2 __ovld __cnfn convert_char2_rtp(uchar2);\n"
28956"char2 __ovld __cnfn convert_char2_sat_rtp(uchar2);\n"
28957"char2 __ovld __cnfn convert_char2_rtn(uchar2);\n"
28958"char2 __ovld __cnfn convert_char2_sat_rtn(uchar2);\n"
28959"char2 __ovld __cnfn convert_char2(uchar2);\n"
28960"char2 __ovld __cnfn convert_char2_sat(uchar2);\n"
28961"char2 __ovld __cnfn convert_char2_rte(short2);\n"
28962"char2 __ovld __cnfn convert_char2_sat_rte(short2);\n"
28963"char2 __ovld __cnfn convert_char2_rtz(short2);\n"
28964"char2 __ovld __cnfn convert_char2_sat_rtz(short2);\n"
28965"char2 __ovld __cnfn convert_char2_rtp(short2);\n"
28966"char2 __ovld __cnfn convert_char2_sat_rtp(short2);\n"
28967"char2 __ovld __cnfn convert_char2_rtn(short2);\n"
28968"char2 __ovld __cnfn convert_char2_sat_rtn(short2);\n"
28969"char2 __ovld __cnfn convert_char2(short2);\n"
28970"char2 __ovld __cnfn convert_char2_sat(short2);\n"
28971"char2 __ovld __cnfn convert_char2_rte(ushort2);\n"
28972"char2 __ovld __cnfn convert_char2_sat_rte(ushort2);\n"
28973"char2 __ovld __cnfn convert_char2_rtz(ushort2);\n"
28974"char2 __ovld __cnfn convert_char2_sat_rtz(ushort2);\n"
28975"char2 __ovld __cnfn convert_char2_rtp(ushort2);\n"
28976"char2 __ovld __cnfn convert_char2_sat_rtp(ushort2);\n"
28977"char2 __ovld __cnfn convert_char2_rtn(ushort2);\n"
28978"char2 __ovld __cnfn convert_char2_sat_rtn(ushort2);\n"
28979"char2 __ovld __cnfn convert_char2(ushort2);\n"
28980"char2 __ovld __cnfn convert_char2_sat(ushort2);\n"
28981"char2 __ovld __cnfn convert_char2_rte(int2);\n"
28982"char2 __ovld __cnfn convert_char2_sat_rte(int2);\n"
28983"char2 __ovld __cnfn convert_char2_rtz(int2);\n"
28984"char2 __ovld __cnfn convert_char2_sat_rtz(int2);\n"
28985"char2 __ovld __cnfn convert_char2_rtp(int2);\n"
28986"char2 __ovld __cnfn convert_char2_sat_rtp(int2);\n"
28987"char2 __ovld __cnfn convert_char2_rtn(int2);\n"
28988"char2 __ovld __cnfn convert_char2_sat_rtn(int2);\n"
28989"char2 __ovld __cnfn convert_char2(int2);\n"
28990"char2 __ovld __cnfn convert_char2_sat(int2);\n"
28991"char2 __ovld __cnfn convert_char2_rte(uint2);\n"
28992"char2 __ovld __cnfn convert_char2_sat_rte(uint2);\n"
28993"char2 __ovld __cnfn convert_char2_rtz(uint2);\n"
28994"char2 __ovld __cnfn convert_char2_sat_rtz(uint2);\n"
28995"char2 __ovld __cnfn convert_char2_rtp(uint2);\n"
28996"char2 __ovld __cnfn convert_char2_sat_rtp(uint2);\n"
28997"char2 __ovld __cnfn convert_char2_rtn(uint2);\n"
28998"char2 __ovld __cnfn convert_char2_sat_rtn(uint2);\n"
28999"char2 __ovld __cnfn convert_char2(uint2);\n"
29000"char2 __ovld __cnfn convert_char2_sat(uint2);\n"
29001"char2 __ovld __cnfn convert_char2_rte(long2);\n"
29002"char2 __ovld __cnfn convert_char2_sat_rte(long2);\n"
29003"char2 __ovld __cnfn convert_char2_rtz(long2);\n"
29004"char2 __ovld __cnfn convert_char2_sat_rtz(long2);\n"
29005"char2 __ovld __cnfn convert_char2_rtp(long2);\n"
29006"char2 __ovld __cnfn convert_char2_sat_rtp(long2);\n"
29007"char2 __ovld __cnfn convert_char2_rtn(long2);\n"
29008"char2 __ovld __cnfn convert_char2_sat_rtn(long2);\n"
29009"char2 __ovld __cnfn convert_char2(long2);\n"
29010"char2 __ovld __cnfn convert_char2_sat(long2);\n"
29011"char2 __ovld __cnfn convert_char2_rte(ulong2);\n"
29012"char2 __ovld __cnfn convert_char2_sat_rte(ulong2);\n"
29013"char2 __ovld __cnfn convert_char2_rtz(ulong2);\n"
29014"char2 __ovld __cnfn convert_char2_sat_rtz(ulong2);\n"
29015"char2 __ovld __cnfn convert_char2_rtp(ulong2);\n"
29016"char2 __ovld __cnfn convert_char2_sat_rtp(ulong2);\n"
29017"char2 __ovld __cnfn convert_char2_rtn(ulong2);\n"
29018"char2 __ovld __cnfn convert_char2_sat_rtn(ulong2);\n"
29019"char2 __ovld __cnfn convert_char2(ulong2);\n"
29020"char2 __ovld __cnfn convert_char2_sat(ulong2);\n"
29021"char2 __ovld __cnfn convert_char2_rte(float2);\n"
29022"char2 __ovld __cnfn convert_char2_sat_rte(float2);\n"
29023"char2 __ovld __cnfn convert_char2_rtz(float2);\n"
29024"char2 __ovld __cnfn convert_char2_sat_rtz(float2);\n"
29025"char2 __ovld __cnfn convert_char2_rtp(float2);\n"
29026"char2 __ovld __cnfn convert_char2_sat_rtp(float2);\n"
29027"char2 __ovld __cnfn convert_char2_rtn(float2);\n"
29028"char2 __ovld __cnfn convert_char2_sat_rtn(float2);\n"
29029"char2 __ovld __cnfn convert_char2(float2);\n"
29030"char2 __ovld __cnfn convert_char2_sat(float2);\n"
29031"uchar2 __ovld __cnfn convert_uchar2_rte(char2);\n"
29032"uchar2 __ovld __cnfn convert_uchar2_sat_rte(char2);\n"
29033"uchar2 __ovld __cnfn convert_uchar2_rtz(char2);\n"
29034"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(char2);\n"
29035"uchar2 __ovld __cnfn convert_uchar2_rtp(char2);\n"
29036"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(char2);\n"
29037"uchar2 __ovld __cnfn convert_uchar2_rtn(char2);\n"
29038"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(char2);\n"
29039"uchar2 __ovld __cnfn convert_uchar2(char2);\n"
29040"uchar2 __ovld __cnfn convert_uchar2_sat(char2);\n"
29041"uchar2 __ovld __cnfn convert_uchar2_rte(uchar2);\n"
29042"uchar2 __ovld __cnfn convert_uchar2_sat_rte(uchar2);\n"
29043"uchar2 __ovld __cnfn convert_uchar2_rtz(uchar2);\n"
29044"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uchar2);\n"
29045"uchar2 __ovld __cnfn convert_uchar2_rtp(uchar2);\n"
29046"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uchar2);\n"
29047"uchar2 __ovld __cnfn convert_uchar2_rtn(uchar2);\n"
29048"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uchar2);\n"
29049"uchar2 __ovld __cnfn convert_uchar2(uchar2);\n"
29050"uchar2 __ovld __cnfn convert_uchar2_sat(uchar2);\n"
29051"uchar2 __ovld __cnfn convert_uchar2_rte(short2);\n"
29052"uchar2 __ovld __cnfn convert_uchar2_sat_rte(short2);\n"
29053"uchar2 __ovld __cnfn convert_uchar2_rtz(short2);\n"
29054"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(short2);\n"
29055"uchar2 __ovld __cnfn convert_uchar2_rtp(short2);\n"
29056"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(short2);\n"
29057"uchar2 __ovld __cnfn convert_uchar2_rtn(short2);\n"
29058"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(short2);\n"
29059"uchar2 __ovld __cnfn convert_uchar2(short2);\n"
29060"uchar2 __ovld __cnfn convert_uchar2_sat(short2);\n"
29061"uchar2 __ovld __cnfn convert_uchar2_rte(ushort2);\n"
29062"uchar2 __ovld __cnfn convert_uchar2_sat_rte(ushort2);\n"
29063"uchar2 __ovld __cnfn convert_uchar2_rtz(ushort2);\n"
29064"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ushort2);\n"
29065"uchar2 __ovld __cnfn convert_uchar2_rtp(ushort2);\n"
29066"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ushort2);\n"
29067"uchar2 __ovld __cnfn convert_uchar2_rtn(ushort2);\n"
29068"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ushort2);\n"
29069"uchar2 __ovld __cnfn convert_uchar2(ushort2);\n"
29070"uchar2 __ovld __cnfn convert_uchar2_sat(ushort2);\n"
29071"uchar2 __ovld __cnfn convert_uchar2_rte(int2);\n"
29072"uchar2 __ovld __cnfn convert_uchar2_sat_rte(int2);\n"
29073"uchar2 __ovld __cnfn convert_uchar2_rtz(int2);\n"
29074"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(int2);\n"
29075"uchar2 __ovld __cnfn convert_uchar2_rtp(int2);\n"
29076"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(int2);\n"
29077"uchar2 __ovld __cnfn convert_uchar2_rtn(int2);\n"
29078"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(int2);\n"
29079"uchar2 __ovld __cnfn convert_uchar2(int2);\n"
29080"uchar2 __ovld __cnfn convert_uchar2_sat(int2);\n"
29081"uchar2 __ovld __cnfn convert_uchar2_rte(uint2);\n"
29082"uchar2 __ovld __cnfn convert_uchar2_sat_rte(uint2);\n"
29083"uchar2 __ovld __cnfn convert_uchar2_rtz(uint2);\n"
29084"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uint2);\n"
29085"uchar2 __ovld __cnfn convert_uchar2_rtp(uint2);\n"
29086"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uint2);\n"
29087"uchar2 __ovld __cnfn convert_uchar2_rtn(uint2);\n"
29088"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uint2);\n"
29089"uchar2 __ovld __cnfn convert_uchar2(uint2);\n"
29090"uchar2 __ovld __cnfn convert_uchar2_sat(uint2);\n"
29091"uchar2 __ovld __cnfn convert_uchar2_rte(long2);\n"
29092"uchar2 __ovld __cnfn convert_uchar2_sat_rte(long2);\n"
29093"uchar2 __ovld __cnfn convert_uchar2_rtz(long2);\n"
29094"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(long2);\n"
29095"uchar2 __ovld __cnfn convert_uchar2_rtp(long2);\n"
29096"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(long2);\n"
29097"uchar2 __ovld __cnfn convert_uchar2_rtn(long2);\n"
29098"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(long2);\n"
29099"uchar2 __ovld __cnfn convert_uchar2(long2);\n"
29100"uchar2 __ovld __cnfn convert_uchar2_sat(long2);\n"
29101"uchar2 __ovld __cnfn convert_uchar2_rte(ulong2);\n"
29102"uchar2 __ovld __cnfn convert_uchar2_sat_rte(ulong2);\n"
29103"uchar2 __ovld __cnfn convert_uchar2_rtz(ulong2);\n"
29104"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ulong2);\n"
29105"uchar2 __ovld __cnfn convert_uchar2_rtp(ulong2);\n"
29106"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ulong2);\n"
29107"uchar2 __ovld __cnfn convert_uchar2_rtn(ulong2);\n"
29108"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ulong2);\n"
29109"uchar2 __ovld __cnfn convert_uchar2(ulong2);\n"
29110"uchar2 __ovld __cnfn convert_uchar2_sat(ulong2);\n"
29111"uchar2 __ovld __cnfn convert_uchar2_rte(float2);\n"
29112"uchar2 __ovld __cnfn convert_uchar2_sat_rte(float2);\n"
29113"uchar2 __ovld __cnfn convert_uchar2_rtz(float2);\n"
29114"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(float2);\n"
29115"uchar2 __ovld __cnfn convert_uchar2_rtp(float2);\n"
29116"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(float2);\n"
29117"uchar2 __ovld __cnfn convert_uchar2_rtn(float2);\n"
29118"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(float2);\n"
29119"uchar2 __ovld __cnfn convert_uchar2(float2);\n"
29120"uchar2 __ovld __cnfn convert_uchar2_sat(float2);\n"
29121"short2 __ovld __cnfn convert_short2_rte(char2);\n"
29122"short2 __ovld __cnfn convert_short2_sat_rte(char2);\n"
29123"short2 __ovld __cnfn convert_short2_rtz(char2);\n"
29124"short2 __ovld __cnfn convert_short2_sat_rtz(char2);\n"
29125"short2 __ovld __cnfn convert_short2_rtp(char2);\n"
29126"short2 __ovld __cnfn convert_short2_sat_rtp(char2);\n"
29127"short2 __ovld __cnfn convert_short2_rtn(char2);\n"
29128"short2 __ovld __cnfn convert_short2_sat_rtn(char2);\n"
29129"short2 __ovld __cnfn convert_short2(char2);\n"
29130"short2 __ovld __cnfn convert_short2_sat(char2);\n"
29131"short2 __ovld __cnfn convert_short2_rte(uchar2);\n"
29132"short2 __ovld __cnfn convert_short2_sat_rte(uchar2);\n"
29133"short2 __ovld __cnfn convert_short2_rtz(uchar2);\n"
29134"short2 __ovld __cnfn convert_short2_sat_rtz(uchar2);\n"
29135"short2 __ovld __cnfn convert_short2_rtp(uchar2);\n"
29136"short2 __ovld __cnfn convert_short2_sat_rtp(uchar2);\n"
29137"short2 __ovld __cnfn convert_short2_rtn(uchar2);\n"
29138"short2 __ovld __cnfn convert_short2_sat_rtn(uchar2);\n"
29139"short2 __ovld __cnfn convert_short2(uchar2);\n"
29140"short2 __ovld __cnfn convert_short2_sat(uchar2);\n"
29141"short2 __ovld __cnfn convert_short2_rte(short2);\n"
29142"short2 __ovld __cnfn convert_short2_sat_rte(short2);\n"
29143"short2 __ovld __cnfn convert_short2_rtz(short2);\n"
29144"short2 __ovld __cnfn convert_short2_sat_rtz(short2);\n"
29145"short2 __ovld __cnfn convert_short2_rtp(short2);\n"
29146"short2 __ovld __cnfn convert_short2_sat_rtp(short2);\n"
29147"short2 __ovld __cnfn convert_short2_rtn(short2);\n"
29148"short2 __ovld __cnfn convert_short2_sat_rtn(short2);\n"
29149"short2 __ovld __cnfn convert_short2(short2);\n"
29150"short2 __ovld __cnfn convert_short2_sat(short2);\n"
29151"short2 __ovld __cnfn convert_short2_rte(ushort2);\n"
29152"short2 __ovld __cnfn convert_short2_sat_rte(ushort2);\n"
29153"short2 __ovld __cnfn convert_short2_rtz(ushort2);\n"
29154"short2 __ovld __cnfn convert_short2_sat_rtz(ushort2);\n"
29155"short2 __ovld __cnfn convert_short2_rtp(ushort2);\n"
29156"short2 __ovld __cnfn convert_short2_sat_rtp(ushort2);\n"
29157"short2 __ovld __cnfn convert_short2_rtn(ushort2);\n"
29158"short2 __ovld __cnfn convert_short2_sat_rtn(ushort2);\n"
29159"short2 __ovld __cnfn convert_short2(ushort2);\n"
29160"short2 __ovld __cnfn convert_short2_sat(ushort2);\n"
29161"short2 __ovld __cnfn convert_short2_rte(int2);\n"
29162"short2 __ovld __cnfn convert_short2_sat_rte(int2);\n"
29163"short2 __ovld __cnfn convert_short2_rtz(int2);\n"
29164"short2 __ovld __cnfn convert_short2_sat_rtz(int2);\n"
29165"short2 __ovld __cnfn convert_short2_rtp(int2);\n"
29166"short2 __ovld __cnfn convert_short2_sat_rtp(int2);\n"
29167"short2 __ovld __cnfn convert_short2_rtn(int2);\n"
29168"short2 __ovld __cnfn convert_short2_sat_rtn(int2);\n"
29169"short2 __ovld __cnfn convert_short2(int2);\n"
29170"short2 __ovld __cnfn convert_short2_sat(int2);\n"
29171"short2 __ovld __cnfn convert_short2_rte(uint2);\n"
29172"short2 __ovld __cnfn convert_short2_sat_rte(uint2);\n"
29173"short2 __ovld __cnfn convert_short2_rtz(uint2);\n"
29174"short2 __ovld __cnfn convert_short2_sat_rtz(uint2);\n"
29175"short2 __ovld __cnfn convert_short2_rtp(uint2);\n"
29176"short2 __ovld __cnfn convert_short2_sat_rtp(uint2);\n"
29177"short2 __ovld __cnfn convert_short2_rtn(uint2);\n"
29178"short2 __ovld __cnfn convert_short2_sat_rtn(uint2);\n"
29179"short2 __ovld __cnfn convert_short2(uint2);\n"
29180"short2 __ovld __cnfn convert_short2_sat(uint2);\n"
29181"short2 __ovld __cnfn convert_short2_rte(long2);\n"
29182"short2 __ovld __cnfn convert_short2_sat_rte(long2);\n"
29183"short2 __ovld __cnfn convert_short2_rtz(long2);\n"
29184"short2 __ovld __cnfn convert_short2_sat_rtz(long2);\n"
29185"short2 __ovld __cnfn convert_short2_rtp(long2);\n"
29186"short2 __ovld __cnfn convert_short2_sat_rtp(long2);\n"
29187"short2 __ovld __cnfn convert_short2_rtn(long2);\n"
29188"short2 __ovld __cnfn convert_short2_sat_rtn(long2);\n"
29189"short2 __ovld __cnfn convert_short2(long2);\n"
29190"short2 __ovld __cnfn convert_short2_sat(long2);\n"
29191"short2 __ovld __cnfn convert_short2_rte(ulong2);\n"
29192"short2 __ovld __cnfn convert_short2_sat_rte(ulong2);\n"
29193"short2 __ovld __cnfn convert_short2_rtz(ulong2);\n"
29194"short2 __ovld __cnfn convert_short2_sat_rtz(ulong2);\n"
29195"short2 __ovld __cnfn convert_short2_rtp(ulong2);\n"
29196"short2 __ovld __cnfn convert_short2_sat_rtp(ulong2);\n"
29197"short2 __ovld __cnfn convert_short2_rtn(ulong2);\n"
29198"short2 __ovld __cnfn convert_short2_sat_rtn(ulong2);\n"
29199"short2 __ovld __cnfn convert_short2(ulong2);\n"
29200"short2 __ovld __cnfn convert_short2_sat(ulong2);\n"
29201"short2 __ovld __cnfn convert_short2_rte(float2);\n"
29202"short2 __ovld __cnfn convert_short2_sat_rte(float2);\n"
29203"short2 __ovld __cnfn convert_short2_rtz(float2);\n"
29204"short2 __ovld __cnfn convert_short2_sat_rtz(float2);\n"
29205"short2 __ovld __cnfn convert_short2_rtp(float2);\n"
29206"short2 __ovld __cnfn convert_short2_sat_rtp(float2);\n"
29207"short2 __ovld __cnfn convert_short2_rtn(float2);\n"
29208"short2 __ovld __cnfn convert_short2_sat_rtn(float2);\n"
29209"short2 __ovld __cnfn convert_short2(float2);\n"
29210"short2 __ovld __cnfn convert_short2_sat(float2);\n"
29211"ushort2 __ovld __cnfn convert_ushort2_rte(char2);\n"
29212"ushort2 __ovld __cnfn convert_ushort2_sat_rte(char2);\n"
29213"ushort2 __ovld __cnfn convert_ushort2_rtz(char2);\n"
29214"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(char2);\n"
29215"ushort2 __ovld __cnfn convert_ushort2_rtp(char2);\n"
29216"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(char2);\n"
29217"ushort2 __ovld __cnfn convert_ushort2_rtn(char2);\n"
29218"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(char2);\n"
29219"ushort2 __ovld __cnfn convert_ushort2(char2);\n"
29220"ushort2 __ovld __cnfn convert_ushort2_sat(char2);\n"
29221"ushort2 __ovld __cnfn convert_ushort2_rte(uchar2);\n"
29222"ushort2 __ovld __cnfn convert_ushort2_sat_rte(uchar2);\n"
29223"ushort2 __ovld __cnfn convert_ushort2_rtz(uchar2);\n"
29224"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uchar2);\n"
29225"ushort2 __ovld __cnfn convert_ushort2_rtp(uchar2);\n"
29226"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uchar2);\n"
29227"ushort2 __ovld __cnfn convert_ushort2_rtn(uchar2);\n"
29228"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uchar2);\n"
29229"ushort2 __ovld __cnfn convert_ushort2(uchar2);\n"
29230"ushort2 __ovld __cnfn convert_ushort2_sat(uchar2);\n"
29231"ushort2 __ovld __cnfn convert_ushort2_rte(short2);\n"
29232"ushort2 __ovld __cnfn convert_ushort2_sat_rte(short2);\n"
29233"ushort2 __ovld __cnfn convert_ushort2_rtz(short2);\n"
29234"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(short2);\n"
29235"ushort2 __ovld __cnfn convert_ushort2_rtp(short2);\n"
29236"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(short2);\n"
29237"ushort2 __ovld __cnfn convert_ushort2_rtn(short2);\n"
29238"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(short2);\n"
29239"ushort2 __ovld __cnfn convert_ushort2(short2);\n"
29240"ushort2 __ovld __cnfn convert_ushort2_sat(short2);\n"
29241"ushort2 __ovld __cnfn convert_ushort2_rte(ushort2);\n"
29242"ushort2 __ovld __cnfn convert_ushort2_sat_rte(ushort2);\n"
29243"ushort2 __ovld __cnfn convert_ushort2_rtz(ushort2);\n"
29244"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ushort2);\n"
29245"ushort2 __ovld __cnfn convert_ushort2_rtp(ushort2);\n"
29246"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ushort2);\n"
29247"ushort2 __ovld __cnfn convert_ushort2_rtn(ushort2);\n"
29248"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ushort2);\n"
29249"ushort2 __ovld __cnfn convert_ushort2(ushort2);\n"
29250"ushort2 __ovld __cnfn convert_ushort2_sat(ushort2);\n"
29251"ushort2 __ovld __cnfn convert_ushort2_rte(int2);\n"
29252"ushort2 __ovld __cnfn convert_ushort2_sat_rte(int2);\n"
29253"ushort2 __ovld __cnfn convert_ushort2_rtz(int2);\n"
29254"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(int2);\n"
29255"ushort2 __ovld __cnfn convert_ushort2_rtp(int2);\n"
29256"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(int2);\n"
29257"ushort2 __ovld __cnfn convert_ushort2_rtn(int2);\n"
29258"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(int2);\n"
29259"ushort2 __ovld __cnfn convert_ushort2(int2);\n"
29260"ushort2 __ovld __cnfn convert_ushort2_sat(int2);\n"
29261"ushort2 __ovld __cnfn convert_ushort2_rte(uint2);\n"
29262"ushort2 __ovld __cnfn convert_ushort2_sat_rte(uint2);\n"
29263"ushort2 __ovld __cnfn convert_ushort2_rtz(uint2);\n"
29264"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uint2);\n"
29265"ushort2 __ovld __cnfn convert_ushort2_rtp(uint2);\n"
29266"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uint2);\n"
29267"ushort2 __ovld __cnfn convert_ushort2_rtn(uint2);\n"
29268"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uint2);\n"
29269"ushort2 __ovld __cnfn convert_ushort2(uint2);\n"
29270"ushort2 __ovld __cnfn convert_ushort2_sat(uint2);\n"
29271"ushort2 __ovld __cnfn convert_ushort2_rte(long2);\n"
29272"ushort2 __ovld __cnfn convert_ushort2_sat_rte(long2);\n"
29273"ushort2 __ovld __cnfn convert_ushort2_rtz(long2);\n"
29274"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(long2);\n"
29275"ushort2 __ovld __cnfn convert_ushort2_rtp(long2);\n"
29276"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(long2);\n"
29277"ushort2 __ovld __cnfn convert_ushort2_rtn(long2);\n"
29278"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(long2);\n"
29279"ushort2 __ovld __cnfn convert_ushort2(long2);\n"
29280"ushort2 __ovld __cnfn convert_ushort2_sat(long2);\n"
29281"ushort2 __ovld __cnfn convert_ushort2_rte(ulong2);\n"
29282"ushort2 __ovld __cnfn convert_ushort2_sat_rte(ulong2);\n"
29283"ushort2 __ovld __cnfn convert_ushort2_rtz(ulong2);\n"
29284"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ulong2);\n"
29285"ushort2 __ovld __cnfn convert_ushort2_rtp(ulong2);\n"
29286"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ulong2);\n"
29287"ushort2 __ovld __cnfn convert_ushort2_rtn(ulong2);\n"
29288"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ulong2);\n"
29289"ushort2 __ovld __cnfn convert_ushort2(ulong2);\n"
29290"ushort2 __ovld __cnfn convert_ushort2_sat(ulong2);\n"
29291"ushort2 __ovld __cnfn convert_ushort2_rte(float2);\n"
29292"ushort2 __ovld __cnfn convert_ushort2_sat_rte(float2);\n"
29293"ushort2 __ovld __cnfn convert_ushort2_rtz(float2);\n"
29294"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(float2);\n"
29295"ushort2 __ovld __cnfn convert_ushort2_rtp(float2);\n"
29296"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(float2);\n"
29297"ushort2 __ovld __cnfn convert_ushort2_rtn(float2);\n"
29298"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(float2);\n"
29299"ushort2 __ovld __cnfn convert_ushort2(float2);\n"
29300"ushort2 __ovld __cnfn convert_ushort2_sat(float2);\n"
29301"int2 __ovld __cnfn convert_int2_rte(char2);\n"
29302"int2 __ovld __cnfn convert_int2_sat_rte(char2);\n"
29303"int2 __ovld __cnfn convert_int2_rtz(char2);\n"
29304"int2 __ovld __cnfn convert_int2_sat_rtz(char2);\n"
29305"int2 __ovld __cnfn convert_int2_rtp(char2);\n"
29306"int2 __ovld __cnfn convert_int2_sat_rtp(char2);\n"
29307"int2 __ovld __cnfn convert_int2_rtn(char2);\n"
29308"int2 __ovld __cnfn convert_int2_sat_rtn(char2);\n"
29309"int2 __ovld __cnfn convert_int2(char2);\n"
29310"int2 __ovld __cnfn convert_int2_sat(char2);\n"
29311"int2 __ovld __cnfn convert_int2_rte(uchar2);\n"
29312"int2 __ovld __cnfn convert_int2_sat_rte(uchar2);\n"
29313"int2 __ovld __cnfn convert_int2_rtz(uchar2);\n"
29314"int2 __ovld __cnfn convert_int2_sat_rtz(uchar2);\n"
29315"int2 __ovld __cnfn convert_int2_rtp(uchar2);\n"
29316"int2 __ovld __cnfn convert_int2_sat_rtp(uchar2);\n"
29317"int2 __ovld __cnfn convert_int2_rtn(uchar2);\n"
29318"int2 __ovld __cnfn convert_int2_sat_rtn(uchar2);\n"
29319"int2 __ovld __cnfn convert_int2(uchar2);\n"
29320"int2 __ovld __cnfn convert_int2_sat(uchar2);\n"
29321"int2 __ovld __cnfn convert_int2_rte(short2);\n"
29322"int2 __ovld __cnfn convert_int2_sat_rte(short2);\n"
29323"int2 __ovld __cnfn convert_int2_rtz(short2);\n"
29324"int2 __ovld __cnfn convert_int2_sat_rtz(short2);\n"
29325"int2 __ovld __cnfn convert_int2_rtp(short2);\n"
29326"int2 __ovld __cnfn convert_int2_sat_rtp(short2);\n"
29327"int2 __ovld __cnfn convert_int2_rtn(short2);\n"
29328"int2 __ovld __cnfn convert_int2_sat_rtn(short2);\n"
29329"int2 __ovld __cnfn convert_int2(short2);\n"
29330"int2 __ovld __cnfn convert_int2_sat(short2);\n"
29331"int2 __ovld __cnfn convert_int2_rte(ushort2);\n"
29332"int2 __ovld __cnfn convert_int2_sat_rte(ushort2);\n"
29333"int2 __ovld __cnfn convert_int2_rtz(ushort2);\n"
29334"int2 __ovld __cnfn convert_int2_sat_rtz(ushort2);\n"
29335"int2 __ovld __cnfn convert_int2_rtp(ushort2);\n"
29336"int2 __ovld __cnfn convert_int2_sat_rtp(ushort2);\n"
29337"int2 __ovld __cnfn convert_int2_rtn(ushort2);\n"
29338"int2 __ovld __cnfn convert_int2_sat_rtn(ushort2);\n"
29339"int2 __ovld __cnfn convert_int2(ushort2);\n"
29340"int2 __ovld __cnfn convert_int2_sat(ushort2);\n"
29341"int2 __ovld __cnfn convert_int2_rte(int2);\n"
29342"int2 __ovld __cnfn convert_int2_sat_rte(int2);\n"
29343"int2 __ovld __cnfn convert_int2_rtz(int2);\n"
29344"int2 __ovld __cnfn convert_int2_sat_rtz(int2);\n"
29345"int2 __ovld __cnfn convert_int2_rtp(int2);\n"
29346"int2 __ovld __cnfn convert_int2_sat_rtp(int2);\n"
29347"int2 __ovld __cnfn convert_int2_rtn(int2);\n"
29348"int2 __ovld __cnfn convert_int2_sat_rtn(int2);\n"
29349"int2 __ovld __cnfn convert_int2(int2);\n"
29350"int2 __ovld __cnfn convert_int2_sat(int2);\n"
29351"int2 __ovld __cnfn convert_int2_rte(uint2);\n"
29352"int2 __ovld __cnfn convert_int2_sat_rte(uint2);\n"
29353"int2 __ovld __cnfn convert_int2_rtz(uint2);\n"
29354"int2 __ovld __cnfn convert_int2_sat_rtz(uint2);\n"
29355"int2 __ovld __cnfn convert_int2_rtp(uint2);\n"
29356"int2 __ovld __cnfn convert_int2_sat_rtp(uint2);\n"
29357"int2 __ovld __cnfn convert_int2_rtn(uint2);\n"
29358"int2 __ovld __cnfn convert_int2_sat_rtn(uint2);\n"
29359"int2 __ovld __cnfn convert_int2(uint2);\n"
29360"int2 __ovld __cnfn convert_int2_sat(uint2);\n"
29361"int2 __ovld __cnfn convert_int2_rte(long2);\n"
29362"int2 __ovld __cnfn convert_int2_sat_rte(long2);\n"
29363"int2 __ovld __cnfn convert_int2_rtz(long2);\n"
29364"int2 __ovld __cnfn convert_int2_sat_rtz(long2);\n"
29365"int2 __ovld __cnfn convert_int2_rtp(long2);\n"
29366"int2 __ovld __cnfn convert_int2_sat_rtp(long2);\n"
29367"int2 __ovld __cnfn convert_int2_rtn(long2);\n"
29368"int2 __ovld __cnfn convert_int2_sat_rtn(long2);\n"
29369"int2 __ovld __cnfn convert_int2(long2);\n"
29370"int2 __ovld __cnfn convert_int2_sat(long2);\n"
29371"int2 __ovld __cnfn convert_int2_rte(ulong2);\n"
29372"int2 __ovld __cnfn convert_int2_sat_rte(ulong2);\n"
29373"int2 __ovld __cnfn convert_int2_rtz(ulong2);\n"
29374"int2 __ovld __cnfn convert_int2_sat_rtz(ulong2);\n"
29375"int2 __ovld __cnfn convert_int2_rtp(ulong2);\n"
29376"int2 __ovld __cnfn convert_int2_sat_rtp(ulong2);\n"
29377"int2 __ovld __cnfn convert_int2_rtn(ulong2);\n"
29378"int2 __ovld __cnfn convert_int2_sat_rtn(ulong2);\n"
29379"int2 __ovld __cnfn convert_int2(ulong2);\n"
29380"int2 __ovld __cnfn convert_int2_sat(ulong2);\n"
29381"int2 __ovld __cnfn convert_int2_rte(float2);\n"
29382"int2 __ovld __cnfn convert_int2_sat_rte(float2);\n"
29383"int2 __ovld __cnfn convert_int2_rtz(float2);\n"
29384"int2 __ovld __cnfn convert_int2_sat_rtz(float2);\n"
29385"int2 __ovld __cnfn convert_int2_rtp(float2);\n"
29386"int2 __ovld __cnfn convert_int2_sat_rtp(float2);\n"
29387"int2 __ovld __cnfn convert_int2_rtn(float2);\n"
29388"int2 __ovld __cnfn convert_int2_sat_rtn(float2);\n"
29389"int2 __ovld __cnfn convert_int2(float2);\n"
29390"int2 __ovld __cnfn convert_int2_sat(float2);\n"
29391"uint2 __ovld __cnfn convert_uint2_rte(char2);\n"
29392"uint2 __ovld __cnfn convert_uint2_sat_rte(char2);\n"
29393"uint2 __ovld __cnfn convert_uint2_rtz(char2);\n"
29394"uint2 __ovld __cnfn convert_uint2_sat_rtz(char2);\n"
29395"uint2 __ovld __cnfn convert_uint2_rtp(char2);\n"
29396"uint2 __ovld __cnfn convert_uint2_sat_rtp(char2);\n"
29397"uint2 __ovld __cnfn convert_uint2_rtn(char2);\n"
29398"uint2 __ovld __cnfn convert_uint2_sat_rtn(char2);\n"
29399"uint2 __ovld __cnfn convert_uint2(char2);\n"
29400"uint2 __ovld __cnfn convert_uint2_sat(char2);\n"
29401"uint2 __ovld __cnfn convert_uint2_rte(uchar2);\n"
29402"uint2 __ovld __cnfn convert_uint2_sat_rte(uchar2);\n"
29403"uint2 __ovld __cnfn convert_uint2_rtz(uchar2);\n"
29404"uint2 __ovld __cnfn convert_uint2_sat_rtz(uchar2);\n"
29405"uint2 __ovld __cnfn convert_uint2_rtp(uchar2);\n"
29406"uint2 __ovld __cnfn convert_uint2_sat_rtp(uchar2);\n"
29407"uint2 __ovld __cnfn convert_uint2_rtn(uchar2);\n"
29408"uint2 __ovld __cnfn convert_uint2_sat_rtn(uchar2);\n"
29409"uint2 __ovld __cnfn convert_uint2(uchar2);\n"
29410"uint2 __ovld __cnfn convert_uint2_sat(uchar2);\n"
29411"uint2 __ovld __cnfn convert_uint2_rte(short2);\n"
29412"uint2 __ovld __cnfn convert_uint2_sat_rte(short2);\n"
29413"uint2 __ovld __cnfn convert_uint2_rtz(short2);\n"
29414"uint2 __ovld __cnfn convert_uint2_sat_rtz(short2);\n"
29415"uint2 __ovld __cnfn convert_uint2_rtp(short2);\n"
29416"uint2 __ovld __cnfn convert_uint2_sat_rtp(short2);\n"
29417"uint2 __ovld __cnfn convert_uint2_rtn(short2);\n"
29418"uint2 __ovld __cnfn convert_uint2_sat_rtn(short2);\n"
29419"uint2 __ovld __cnfn convert_uint2(short2);\n"
29420"uint2 __ovld __cnfn convert_uint2_sat(short2);\n"
29421"uint2 __ovld __cnfn convert_uint2_rte(ushort2);\n"
29422"uint2 __ovld __cnfn convert_uint2_sat_rte(ushort2);\n"
29423"uint2 __ovld __cnfn convert_uint2_rtz(ushort2);\n"
29424"uint2 __ovld __cnfn convert_uint2_sat_rtz(ushort2);\n"
29425"uint2 __ovld __cnfn convert_uint2_rtp(ushort2);\n"
29426"uint2 __ovld __cnfn convert_uint2_sat_rtp(ushort2);\n"
29427"uint2 __ovld __cnfn convert_uint2_rtn(ushort2);\n"
29428"uint2 __ovld __cnfn convert_uint2_sat_rtn(ushort2);\n"
29429"uint2 __ovld __cnfn convert_uint2(ushort2);\n"
29430"uint2 __ovld __cnfn convert_uint2_sat(ushort2);\n"
29431"uint2 __ovld __cnfn convert_uint2_rte(int2);\n"
29432"uint2 __ovld __cnfn convert_uint2_sat_rte(int2);\n"
29433"uint2 __ovld __cnfn convert_uint2_rtz(int2);\n"
29434"uint2 __ovld __cnfn convert_uint2_sat_rtz(int2);\n"
29435"uint2 __ovld __cnfn convert_uint2_rtp(int2);\n"
29436"uint2 __ovld __cnfn convert_uint2_sat_rtp(int2);\n"
29437"uint2 __ovld __cnfn convert_uint2_rtn(int2);\n"
29438"uint2 __ovld __cnfn convert_uint2_sat_rtn(int2);\n"
29439"uint2 __ovld __cnfn convert_uint2(int2);\n"
29440"uint2 __ovld __cnfn convert_uint2_sat(int2);\n"
29441"uint2 __ovld __cnfn convert_uint2_rte(uint2);\n"
29442"uint2 __ovld __cnfn convert_uint2_sat_rte(uint2);\n"
29443"uint2 __ovld __cnfn convert_uint2_rtz(uint2);\n"
29444"uint2 __ovld __cnfn convert_uint2_sat_rtz(uint2);\n"
29445"uint2 __ovld __cnfn convert_uint2_rtp(uint2);\n"
29446"uint2 __ovld __cnfn convert_uint2_sat_rtp(uint2);\n"
29447"uint2 __ovld __cnfn convert_uint2_rtn(uint2);\n"
29448"uint2 __ovld __cnfn convert_uint2_sat_rtn(uint2);\n"
29449"uint2 __ovld __cnfn convert_uint2(uint2);\n"
29450"uint2 __ovld __cnfn convert_uint2_sat(uint2);\n"
29451"uint2 __ovld __cnfn convert_uint2_rte(long2);\n"
29452"uint2 __ovld __cnfn convert_uint2_sat_rte(long2);\n"
29453"uint2 __ovld __cnfn convert_uint2_rtz(long2);\n"
29454"uint2 __ovld __cnfn convert_uint2_sat_rtz(long2);\n"
29455"uint2 __ovld __cnfn convert_uint2_rtp(long2);\n"
29456"uint2 __ovld __cnfn convert_uint2_sat_rtp(long2);\n"
29457"uint2 __ovld __cnfn convert_uint2_rtn(long2);\n"
29458"uint2 __ovld __cnfn convert_uint2_sat_rtn(long2);\n"
29459"uint2 __ovld __cnfn convert_uint2(long2);\n"
29460"uint2 __ovld __cnfn convert_uint2_sat(long2);\n"
29461"uint2 __ovld __cnfn convert_uint2_rte(ulong2);\n"
29462"uint2 __ovld __cnfn convert_uint2_sat_rte(ulong2);\n"
29463"uint2 __ovld __cnfn convert_uint2_rtz(ulong2);\n"
29464"uint2 __ovld __cnfn convert_uint2_sat_rtz(ulong2);\n"
29465"uint2 __ovld __cnfn convert_uint2_rtp(ulong2);\n"
29466"uint2 __ovld __cnfn convert_uint2_sat_rtp(ulong2);\n"
29467"uint2 __ovld __cnfn convert_uint2_rtn(ulong2);\n"
29468"uint2 __ovld __cnfn convert_uint2_sat_rtn(ulong2);\n"
29469"uint2 __ovld __cnfn convert_uint2(ulong2);\n"
29470"uint2 __ovld __cnfn convert_uint2_sat(ulong2);\n"
29471"uint2 __ovld __cnfn convert_uint2_rte(float2);\n"
29472"uint2 __ovld __cnfn convert_uint2_sat_rte(float2);\n"
29473"uint2 __ovld __cnfn convert_uint2_rtz(float2);\n"
29474"uint2 __ovld __cnfn convert_uint2_sat_rtz(float2);\n"
29475"uint2 __ovld __cnfn convert_uint2_rtp(float2);\n"
29476"uint2 __ovld __cnfn convert_uint2_sat_rtp(float2);\n"
29477"uint2 __ovld __cnfn convert_uint2_rtn(float2);\n"
29478"uint2 __ovld __cnfn convert_uint2_sat_rtn(float2);\n"
29479"uint2 __ovld __cnfn convert_uint2(float2);\n"
29480"uint2 __ovld __cnfn convert_uint2_sat(float2);\n"
29481"long2 __ovld __cnfn convert_long2_rte(char2);\n"
29482"long2 __ovld __cnfn convert_long2_sat_rte(char2);\n"
29483"long2 __ovld __cnfn convert_long2_rtz(char2);\n"
29484"long2 __ovld __cnfn convert_long2_sat_rtz(char2);\n"
29485"long2 __ovld __cnfn convert_long2_rtp(char2);\n"
29486"long2 __ovld __cnfn convert_long2_sat_rtp(char2);\n"
29487"long2 __ovld __cnfn convert_long2_rtn(char2);\n"
29488"long2 __ovld __cnfn convert_long2_sat_rtn(char2);\n"
29489"long2 __ovld __cnfn convert_long2(char2);\n"
29490"long2 __ovld __cnfn convert_long2_sat(char2);\n"
29491"long2 __ovld __cnfn convert_long2_rte(uchar2);\n"
29492"long2 __ovld __cnfn convert_long2_sat_rte(uchar2);\n"
29493"long2 __ovld __cnfn convert_long2_rtz(uchar2);\n"
29494"long2 __ovld __cnfn convert_long2_sat_rtz(uchar2);\n"
29495"long2 __ovld __cnfn convert_long2_rtp(uchar2);\n"
29496"long2 __ovld __cnfn convert_long2_sat_rtp(uchar2);\n"
29497"long2 __ovld __cnfn convert_long2_rtn(uchar2);\n"
29498"long2 __ovld __cnfn convert_long2_sat_rtn(uchar2);\n"
29499"long2 __ovld __cnfn convert_long2(uchar2);\n"
29500"long2 __ovld __cnfn convert_long2_sat(uchar2);\n"
29501"long2 __ovld __cnfn convert_long2_rte(short2);\n"
29502"long2 __ovld __cnfn convert_long2_sat_rte(short2);\n"
29503"long2 __ovld __cnfn convert_long2_rtz(short2);\n"
29504"long2 __ovld __cnfn convert_long2_sat_rtz(short2);\n"
29505"long2 __ovld __cnfn convert_long2_rtp(short2);\n"
29506"long2 __ovld __cnfn convert_long2_sat_rtp(short2);\n"
29507"long2 __ovld __cnfn convert_long2_rtn(short2);\n"
29508"long2 __ovld __cnfn convert_long2_sat_rtn(short2);\n"
29509"long2 __ovld __cnfn convert_long2(short2);\n"
29510"long2 __ovld __cnfn convert_long2_sat(short2);\n"
29511"long2 __ovld __cnfn convert_long2_rte(ushort2);\n"
29512"long2 __ovld __cnfn convert_long2_sat_rte(ushort2);\n"
29513"long2 __ovld __cnfn convert_long2_rtz(ushort2);\n"
29514"long2 __ovld __cnfn convert_long2_sat_rtz(ushort2);\n"
29515"long2 __ovld __cnfn convert_long2_rtp(ushort2);\n"
29516"long2 __ovld __cnfn convert_long2_sat_rtp(ushort2);\n"
29517"long2 __ovld __cnfn convert_long2_rtn(ushort2);\n"
29518"long2 __ovld __cnfn convert_long2_sat_rtn(ushort2);\n"
29519"long2 __ovld __cnfn convert_long2(ushort2);\n"
29520"long2 __ovld __cnfn convert_long2_sat(ushort2);\n"
29521"long2 __ovld __cnfn convert_long2_rte(int2);\n"
29522"long2 __ovld __cnfn convert_long2_sat_rte(int2);\n"
29523"long2 __ovld __cnfn convert_long2_rtz(int2);\n"
29524"long2 __ovld __cnfn convert_long2_sat_rtz(int2);\n"
29525"long2 __ovld __cnfn convert_long2_rtp(int2);\n"
29526"long2 __ovld __cnfn convert_long2_sat_rtp(int2);\n"
29527"long2 __ovld __cnfn convert_long2_rtn(int2);\n"
29528"long2 __ovld __cnfn convert_long2_sat_rtn(int2);\n"
29529"long2 __ovld __cnfn convert_long2(int2);\n"
29530"long2 __ovld __cnfn convert_long2_sat(int2);\n"
29531"long2 __ovld __cnfn convert_long2_rte(uint2);\n"
29532"long2 __ovld __cnfn convert_long2_sat_rte(uint2);\n"
29533"long2 __ovld __cnfn convert_long2_rtz(uint2);\n"
29534"long2 __ovld __cnfn convert_long2_sat_rtz(uint2);\n"
29535"long2 __ovld __cnfn convert_long2_rtp(uint2);\n"
29536"long2 __ovld __cnfn convert_long2_sat_rtp(uint2);\n"
29537"long2 __ovld __cnfn convert_long2_rtn(uint2);\n"
29538"long2 __ovld __cnfn convert_long2_sat_rtn(uint2);\n"
29539"long2 __ovld __cnfn convert_long2(uint2);\n"
29540"long2 __ovld __cnfn convert_long2_sat(uint2);\n"
29541"long2 __ovld __cnfn convert_long2_rte(long2);\n"
29542"long2 __ovld __cnfn convert_long2_sat_rte(long2);\n"
29543"long2 __ovld __cnfn convert_long2_rtz(long2);\n"
29544"long2 __ovld __cnfn convert_long2_sat_rtz(long2);\n"
29545"long2 __ovld __cnfn convert_long2_rtp(long2);\n"
29546"long2 __ovld __cnfn convert_long2_sat_rtp(long2);\n"
29547"long2 __ovld __cnfn convert_long2_rtn(long2);\n"
29548"long2 __ovld __cnfn convert_long2_sat_rtn(long2);\n"
29549"long2 __ovld __cnfn convert_long2(long2);\n"
29550"long2 __ovld __cnfn convert_long2_sat(long2);\n"
29551"long2 __ovld __cnfn convert_long2_rte(ulong2);\n"
29552"long2 __ovld __cnfn convert_long2_sat_rte(ulong2);\n"
29553"long2 __ovld __cnfn convert_long2_rtz(ulong2);\n"
29554"long2 __ovld __cnfn convert_long2_sat_rtz(ulong2);\n"
29555"long2 __ovld __cnfn convert_long2_rtp(ulong2);\n"
29556"long2 __ovld __cnfn convert_long2_sat_rtp(ulong2);\n"
29557"long2 __ovld __cnfn convert_long2_rtn(ulong2);\n"
29558"long2 __ovld __cnfn convert_long2_sat_rtn(ulong2);\n"
29559"long2 __ovld __cnfn convert_long2(ulong2);\n"
29560"long2 __ovld __cnfn convert_long2_sat(ulong2);\n"
29561"long2 __ovld __cnfn convert_long2_rte(float2);\n"
29562"long2 __ovld __cnfn convert_long2_sat_rte(float2);\n"
29563"long2 __ovld __cnfn convert_long2_rtz(float2);\n"
29564"long2 __ovld __cnfn convert_long2_sat_rtz(float2);\n"
29565"long2 __ovld __cnfn convert_long2_rtp(float2);\n"
29566"long2 __ovld __cnfn convert_long2_sat_rtp(float2);\n"
29567"long2 __ovld __cnfn convert_long2_rtn(float2);\n"
29568"long2 __ovld __cnfn convert_long2_sat_rtn(float2);\n"
29569"long2 __ovld __cnfn convert_long2(float2);\n"
29570"long2 __ovld __cnfn convert_long2_sat(float2);\n"
29571"ulong2 __ovld __cnfn convert_ulong2_rte(char2);\n"
29572"ulong2 __ovld __cnfn convert_ulong2_sat_rte(char2);\n"
29573"ulong2 __ovld __cnfn convert_ulong2_rtz(char2);\n"
29574"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(char2);\n"
29575"ulong2 __ovld __cnfn convert_ulong2_rtp(char2);\n"
29576"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(char2);\n"
29577"ulong2 __ovld __cnfn convert_ulong2_rtn(char2);\n"
29578"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(char2);\n"
29579"ulong2 __ovld __cnfn convert_ulong2(char2);\n"
29580"ulong2 __ovld __cnfn convert_ulong2_sat(char2);\n"
29581"ulong2 __ovld __cnfn convert_ulong2_rte(uchar2);\n"
29582"ulong2 __ovld __cnfn convert_ulong2_sat_rte(uchar2);\n"
29583"ulong2 __ovld __cnfn convert_ulong2_rtz(uchar2);\n"
29584"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uchar2);\n"
29585"ulong2 __ovld __cnfn convert_ulong2_rtp(uchar2);\n"
29586"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uchar2);\n"
29587"ulong2 __ovld __cnfn convert_ulong2_rtn(uchar2);\n"
29588"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uchar2);\n"
29589"ulong2 __ovld __cnfn convert_ulong2(uchar2);\n"
29590"ulong2 __ovld __cnfn convert_ulong2_sat(uchar2);\n"
29591"ulong2 __ovld __cnfn convert_ulong2_rte(short2);\n"
29592"ulong2 __ovld __cnfn convert_ulong2_sat_rte(short2);\n"
29593"ulong2 __ovld __cnfn convert_ulong2_rtz(short2);\n"
29594"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(short2);\n"
29595"ulong2 __ovld __cnfn convert_ulong2_rtp(short2);\n"
29596"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(short2);\n"
29597"ulong2 __ovld __cnfn convert_ulong2_rtn(short2);\n"
29598"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(short2);\n"
29599"ulong2 __ovld __cnfn convert_ulong2(short2);\n"
29600"ulong2 __ovld __cnfn convert_ulong2_sat(short2);\n"
29601"ulong2 __ovld __cnfn convert_ulong2_rte(ushort2);\n"
29602"ulong2 __ovld __cnfn convert_ulong2_sat_rte(ushort2);\n"
29603"ulong2 __ovld __cnfn convert_ulong2_rtz(ushort2);\n"
29604"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ushort2);\n"
29605"ulong2 __ovld __cnfn convert_ulong2_rtp(ushort2);\n"
29606"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ushort2);\n"
29607"ulong2 __ovld __cnfn convert_ulong2_rtn(ushort2);\n"
29608"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ushort2);\n"
29609"ulong2 __ovld __cnfn convert_ulong2(ushort2);\n"
29610"ulong2 __ovld __cnfn convert_ulong2_sat(ushort2);\n"
29611"ulong2 __ovld __cnfn convert_ulong2_rte(int2);\n"
29612"ulong2 __ovld __cnfn convert_ulong2_sat_rte(int2);\n"
29613"ulong2 __ovld __cnfn convert_ulong2_rtz(int2);\n"
29614"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(int2);\n"
29615"ulong2 __ovld __cnfn convert_ulong2_rtp(int2);\n"
29616"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(int2);\n"
29617"ulong2 __ovld __cnfn convert_ulong2_rtn(int2);\n"
29618"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(int2);\n"
29619"ulong2 __ovld __cnfn convert_ulong2(int2);\n"
29620"ulong2 __ovld __cnfn convert_ulong2_sat(int2);\n"
29621"ulong2 __ovld __cnfn convert_ulong2_rte(uint2);\n"
29622"ulong2 __ovld __cnfn convert_ulong2_sat_rte(uint2);\n"
29623"ulong2 __ovld __cnfn convert_ulong2_rtz(uint2);\n"
29624"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uint2);\n"
29625"ulong2 __ovld __cnfn convert_ulong2_rtp(uint2);\n"
29626"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uint2);\n"
29627"ulong2 __ovld __cnfn convert_ulong2_rtn(uint2);\n"
29628"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uint2);\n"
29629"ulong2 __ovld __cnfn convert_ulong2(uint2);\n"
29630"ulong2 __ovld __cnfn convert_ulong2_sat(uint2);\n"
29631"ulong2 __ovld __cnfn convert_ulong2_rte(long2);\n"
29632"ulong2 __ovld __cnfn convert_ulong2_sat_rte(long2);\n"
29633"ulong2 __ovld __cnfn convert_ulong2_rtz(long2);\n"
29634"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(long2);\n"
29635"ulong2 __ovld __cnfn convert_ulong2_rtp(long2);\n"
29636"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(long2);\n"
29637"ulong2 __ovld __cnfn convert_ulong2_rtn(long2);\n"
29638"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(long2);\n"
29639"ulong2 __ovld __cnfn convert_ulong2(long2);\n"
29640"ulong2 __ovld __cnfn convert_ulong2_sat(long2);\n"
29641"ulong2 __ovld __cnfn convert_ulong2_rte(ulong2);\n"
29642"ulong2 __ovld __cnfn convert_ulong2_sat_rte(ulong2);\n"
29643"ulong2 __ovld __cnfn convert_ulong2_rtz(ulong2);\n"
29644"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ulong2);\n"
29645"ulong2 __ovld __cnfn convert_ulong2_rtp(ulong2);\n"
29646"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ulong2);\n"
29647"ulong2 __ovld __cnfn convert_ulong2_rtn(ulong2);\n"
29648"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ulong2);\n"
29649"ulong2 __ovld __cnfn convert_ulong2(ulong2);\n"
29650"ulong2 __ovld __cnfn convert_ulong2_sat(ulong2);\n"
29651"ulong2 __ovld __cnfn convert_ulong2_rte(float2);\n"
29652"ulong2 __ovld __cnfn convert_ulong2_sat_rte(float2);\n"
29653"ulong2 __ovld __cnfn convert_ulong2_rtz(float2);\n"
29654"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(float2);\n"
29655"ulong2 __ovld __cnfn convert_ulong2_rtp(float2);\n"
29656"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(float2);\n"
29657"ulong2 __ovld __cnfn convert_ulong2_rtn(float2);\n"
29658"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(float2);\n"
29659"ulong2 __ovld __cnfn convert_ulong2(float2);\n"
29660"ulong2 __ovld __cnfn convert_ulong2_sat(float2);\n"
29661"float2 __ovld __cnfn convert_float2_rte(char2);\n"
29662"float2 __ovld __cnfn convert_float2_rtz(char2);\n"
29663"float2 __ovld __cnfn convert_float2_rtp(char2);\n"
29664"float2 __ovld __cnfn convert_float2_rtn(char2);\n"
29665"float2 __ovld __cnfn convert_float2(char2);\n"
29666"float2 __ovld __cnfn convert_float2_rte(uchar2);\n"
29667"float2 __ovld __cnfn convert_float2_rtz(uchar2);\n"
29668"float2 __ovld __cnfn convert_float2_rtp(uchar2);\n"
29669"float2 __ovld __cnfn convert_float2_rtn(uchar2);\n"
29670"float2 __ovld __cnfn convert_float2(uchar2);\n"
29671"float2 __ovld __cnfn convert_float2_rte(short2);\n"
29672"float2 __ovld __cnfn convert_float2_rtz(short2);\n"
29673"float2 __ovld __cnfn convert_float2_rtp(short2);\n"
29674"float2 __ovld __cnfn convert_float2_rtn(short2);\n"
29675"float2 __ovld __cnfn convert_float2(short2);\n"
29676"float2 __ovld __cnfn convert_float2_rte(ushort2);\n"
29677"float2 __ovld __cnfn convert_float2_rtz(ushort2);\n"
29678"float2 __ovld __cnfn convert_float2_rtp(ushort2);\n"
29679"float2 __ovld __cnfn convert_float2_rtn(ushort2);\n"
29680"float2 __ovld __cnfn convert_float2(ushort2);\n"
29681"float2 __ovld __cnfn convert_float2_rte(int2);\n"
29682"float2 __ovld __cnfn convert_float2_rtz(int2);\n"
29683"float2 __ovld __cnfn convert_float2_rtp(int2);\n"
29684"float2 __ovld __cnfn convert_float2_rtn(int2);\n"
29685"float2 __ovld __cnfn convert_float2(int2);\n"
29686"float2 __ovld __cnfn convert_float2_rte(uint2);\n"
29687"float2 __ovld __cnfn convert_float2_rtz(uint2);\n"
29688"float2 __ovld __cnfn convert_float2_rtp(uint2);\n"
29689"float2 __ovld __cnfn convert_float2_rtn(uint2);\n"
29690"float2 __ovld __cnfn convert_float2(uint2);\n"
29691"float2 __ovld __cnfn convert_float2_rte(long2);\n"
29692"float2 __ovld __cnfn convert_float2_rtz(long2);\n"
29693"float2 __ovld __cnfn convert_float2_rtp(long2);\n"
29694"float2 __ovld __cnfn convert_float2_rtn(long2);\n"
29695"float2 __ovld __cnfn convert_float2(long2);\n"
29696"float2 __ovld __cnfn convert_float2_rte(ulong2);\n"
29697"float2 __ovld __cnfn convert_float2_rtz(ulong2);\n"
29698"float2 __ovld __cnfn convert_float2_rtp(ulong2);\n"
29699"float2 __ovld __cnfn convert_float2_rtn(ulong2);\n"
29700"float2 __ovld __cnfn convert_float2(ulong2);\n"
29701"float2 __ovld __cnfn convert_float2_rte(float2);\n"
29702"float2 __ovld __cnfn convert_float2_rtz(float2);\n"
29703"float2 __ovld __cnfn convert_float2_rtp(float2);\n"
29704"float2 __ovld __cnfn convert_float2_rtn(float2);\n"
29705"float2 __ovld __cnfn convert_float2(float2);\n"
29706"char3 __ovld __cnfn convert_char3_rte(char3);\n"
29707"char3 __ovld __cnfn convert_char3_sat_rte(char3);\n"
29708"char3 __ovld __cnfn convert_char3_rtz(char3);\n"
29709"char3 __ovld __cnfn convert_char3_sat_rtz(char3);\n"
29710"char3 __ovld __cnfn convert_char3_rtp(char3);\n"
29711"char3 __ovld __cnfn convert_char3_sat_rtp(char3);\n"
29712"char3 __ovld __cnfn convert_char3_rtn(char3);\n"
29713"char3 __ovld __cnfn convert_char3_sat_rtn(char3);\n"
29714"char3 __ovld __cnfn convert_char3(char3);\n"
29715"char3 __ovld __cnfn convert_char3_sat(char3);\n"
29716"char3 __ovld __cnfn convert_char3_rte(uchar3);\n"
29717"char3 __ovld __cnfn convert_char3_sat_rte(uchar3);\n"
29718"char3 __ovld __cnfn convert_char3_rtz(uchar3);\n"
29719"char3 __ovld __cnfn convert_char3_sat_rtz(uchar3);\n"
29720"char3 __ovld __cnfn convert_char3_rtp(uchar3);\n"
29721"char3 __ovld __cnfn convert_char3_sat_rtp(uchar3);\n"
29722"char3 __ovld __cnfn convert_char3_rtn(uchar3);\n"
29723"char3 __ovld __cnfn convert_char3_sat_rtn(uchar3);\n"
29724"char3 __ovld __cnfn convert_char3(uchar3);\n"
29725"char3 __ovld __cnfn convert_char3_sat(uchar3);\n"
29726"char3 __ovld __cnfn convert_char3_rte(short3);\n"
29727"char3 __ovld __cnfn convert_char3_sat_rte(short3);\n"
29728"char3 __ovld __cnfn convert_char3_rtz(short3);\n"
29729"char3 __ovld __cnfn convert_char3_sat_rtz(short3);\n"
29730"char3 __ovld __cnfn convert_char3_rtp(short3);\n"
29731"char3 __ovld __cnfn convert_char3_sat_rtp(short3);\n"
29732"char3 __ovld __cnfn convert_char3_rtn(short3);\n"
29733"char3 __ovld __cnfn convert_char3_sat_rtn(short3);\n"
29734"char3 __ovld __cnfn convert_char3(short3);\n"
29735"char3 __ovld __cnfn convert_char3_sat(short3);\n"
29736"char3 __ovld __cnfn convert_char3_rte(ushort3);\n"
29737"char3 __ovld __cnfn convert_char3_sat_rte(ushort3);\n"
29738"char3 __ovld __cnfn convert_char3_rtz(ushort3);\n"
29739"char3 __ovld __cnfn convert_char3_sat_rtz(ushort3);\n"
29740"char3 __ovld __cnfn convert_char3_rtp(ushort3);\n"
29741"char3 __ovld __cnfn convert_char3_sat_rtp(ushort3);\n"
29742"char3 __ovld __cnfn convert_char3_rtn(ushort3);\n"
29743"char3 __ovld __cnfn convert_char3_sat_rtn(ushort3);\n"
29744"char3 __ovld __cnfn convert_char3(ushort3);\n"
29745"char3 __ovld __cnfn convert_char3_sat(ushort3);\n"
29746"char3 __ovld __cnfn convert_char3_rte(int3);\n"
29747"char3 __ovld __cnfn convert_char3_sat_rte(int3);\n"
29748"char3 __ovld __cnfn convert_char3_rtz(int3);\n"
29749"char3 __ovld __cnfn convert_char3_sat_rtz(int3);\n"
29750"char3 __ovld __cnfn convert_char3_rtp(int3);\n"
29751"char3 __ovld __cnfn convert_char3_sat_rtp(int3);\n"
29752"char3 __ovld __cnfn convert_char3_rtn(int3);\n"
29753"char3 __ovld __cnfn convert_char3_sat_rtn(int3);\n"
29754"char3 __ovld __cnfn convert_char3(int3);\n"
29755"char3 __ovld __cnfn convert_char3_sat(int3);\n"
29756"char3 __ovld __cnfn convert_char3_rte(uint3);\n"
29757"char3 __ovld __cnfn convert_char3_sat_rte(uint3);\n"
29758"char3 __ovld __cnfn convert_char3_rtz(uint3);\n"
29759"char3 __ovld __cnfn convert_char3_sat_rtz(uint3);\n"
29760"char3 __ovld __cnfn convert_char3_rtp(uint3);\n"
29761"char3 __ovld __cnfn convert_char3_sat_rtp(uint3);\n"
29762"char3 __ovld __cnfn convert_char3_rtn(uint3);\n"
29763"char3 __ovld __cnfn convert_char3_sat_rtn(uint3);\n"
29764"char3 __ovld __cnfn convert_char3(uint3);\n"
29765"char3 __ovld __cnfn convert_char3_sat(uint3);\n"
29766"char3 __ovld __cnfn convert_char3_rte(long3);\n"
29767"char3 __ovld __cnfn convert_char3_sat_rte(long3);\n"
29768"char3 __ovld __cnfn convert_char3_rtz(long3);\n"
29769"char3 __ovld __cnfn convert_char3_sat_rtz(long3);\n"
29770"char3 __ovld __cnfn convert_char3_rtp(long3);\n"
29771"char3 __ovld __cnfn convert_char3_sat_rtp(long3);\n"
29772"char3 __ovld __cnfn convert_char3_rtn(long3);\n"
29773"char3 __ovld __cnfn convert_char3_sat_rtn(long3);\n"
29774"char3 __ovld __cnfn convert_char3(long3);\n"
29775"char3 __ovld __cnfn convert_char3_sat(long3);\n"
29776"char3 __ovld __cnfn convert_char3_rte(ulong3);\n"
29777"char3 __ovld __cnfn convert_char3_sat_rte(ulong3);\n"
29778"char3 __ovld __cnfn convert_char3_rtz(ulong3);\n"
29779"char3 __ovld __cnfn convert_char3_sat_rtz(ulong3);\n"
29780"char3 __ovld __cnfn convert_char3_rtp(ulong3);\n"
29781"char3 __ovld __cnfn convert_char3_sat_rtp(ulong3);\n"
29782"char3 __ovld __cnfn convert_char3_rtn(ulong3);\n"
29783"char3 __ovld __cnfn convert_char3_sat_rtn(ulong3);\n"
29784"char3 __ovld __cnfn convert_char3(ulong3);\n"
29785"char3 __ovld __cnfn convert_char3_sat(ulong3);\n"
29786"char3 __ovld __cnfn convert_char3_rte(float3);\n"
29787"char3 __ovld __cnfn convert_char3_sat_rte(float3);\n"
29788"char3 __ovld __cnfn convert_char3_rtz(float3);\n"
29789"char3 __ovld __cnfn convert_char3_sat_rtz(float3);\n"
29790"char3 __ovld __cnfn convert_char3_rtp(float3);\n"
29791"char3 __ovld __cnfn convert_char3_sat_rtp(float3);\n"
29792"char3 __ovld __cnfn convert_char3_rtn(float3);\n"
29793"char3 __ovld __cnfn convert_char3_sat_rtn(float3);\n"
29794"char3 __ovld __cnfn convert_char3(float3);\n"
29795"char3 __ovld __cnfn convert_char3_sat(float3);\n"
29796"uchar3 __ovld __cnfn convert_uchar3_rte(char3);\n"
29797"uchar3 __ovld __cnfn convert_uchar3_sat_rte(char3);\n"
29798"uchar3 __ovld __cnfn convert_uchar3_rtz(char3);\n"
29799"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(char3);\n"
29800"uchar3 __ovld __cnfn convert_uchar3_rtp(char3);\n"
29801"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(char3);\n"
29802"uchar3 __ovld __cnfn convert_uchar3_rtn(char3);\n"
29803"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(char3);\n"
29804"uchar3 __ovld __cnfn convert_uchar3(char3);\n"
29805"uchar3 __ovld __cnfn convert_uchar3_sat(char3);\n"
29806"uchar3 __ovld __cnfn convert_uchar3_rte(uchar3);\n"
29807"uchar3 __ovld __cnfn convert_uchar3_sat_rte(uchar3);\n"
29808"uchar3 __ovld __cnfn convert_uchar3_rtz(uchar3);\n"
29809"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uchar3);\n"
29810"uchar3 __ovld __cnfn convert_uchar3_rtp(uchar3);\n"
29811"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uchar3);\n"
29812"uchar3 __ovld __cnfn convert_uchar3_rtn(uchar3);\n"
29813"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uchar3);\n"
29814"uchar3 __ovld __cnfn convert_uchar3(uchar3);\n"
29815"uchar3 __ovld __cnfn convert_uchar3_sat(uchar3);\n"
29816"uchar3 __ovld __cnfn convert_uchar3_rte(short3);\n"
29817"uchar3 __ovld __cnfn convert_uchar3_sat_rte(short3);\n"
29818"uchar3 __ovld __cnfn convert_uchar3_rtz(short3);\n"
29819"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(short3);\n"
29820"uchar3 __ovld __cnfn convert_uchar3_rtp(short3);\n"
29821"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(short3);\n"
29822"uchar3 __ovld __cnfn convert_uchar3_rtn(short3);\n"
29823"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(short3);\n"
29824"uchar3 __ovld __cnfn convert_uchar3(short3);\n"
29825"uchar3 __ovld __cnfn convert_uchar3_sat(short3);\n"
29826"uchar3 __ovld __cnfn convert_uchar3_rte(ushort3);\n"
29827"uchar3 __ovld __cnfn convert_uchar3_sat_rte(ushort3);\n"
29828"uchar3 __ovld __cnfn convert_uchar3_rtz(ushort3);\n"
29829"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ushort3);\n"
29830"uchar3 __ovld __cnfn convert_uchar3_rtp(ushort3);\n"
29831"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ushort3);\n"
29832"uchar3 __ovld __cnfn convert_uchar3_rtn(ushort3);\n"
29833"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ushort3);\n"
29834"uchar3 __ovld __cnfn convert_uchar3(ushort3);\n"
29835"uchar3 __ovld __cnfn convert_uchar3_sat(ushort3);\n"
29836"uchar3 __ovld __cnfn convert_uchar3_rte(int3);\n"
29837"uchar3 __ovld __cnfn convert_uchar3_sat_rte(int3);\n"
29838"uchar3 __ovld __cnfn convert_uchar3_rtz(int3);\n"
29839"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(int3);\n"
29840"uchar3 __ovld __cnfn convert_uchar3_rtp(int3);\n"
29841"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(int3);\n"
29842"uchar3 __ovld __cnfn convert_uchar3_rtn(int3);\n"
29843"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(int3);\n"
29844"uchar3 __ovld __cnfn convert_uchar3(int3);\n"
29845"uchar3 __ovld __cnfn convert_uchar3_sat(int3);\n"
29846"uchar3 __ovld __cnfn convert_uchar3_rte(uint3);\n"
29847"uchar3 __ovld __cnfn convert_uchar3_sat_rte(uint3);\n"
29848"uchar3 __ovld __cnfn convert_uchar3_rtz(uint3);\n"
29849"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uint3);\n"
29850"uchar3 __ovld __cnfn convert_uchar3_rtp(uint3);\n"
29851"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uint3);\n"
29852"uchar3 __ovld __cnfn convert_uchar3_rtn(uint3);\n"
29853"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uint3);\n"
29854"uchar3 __ovld __cnfn convert_uchar3(uint3);\n"
29855"uchar3 __ovld __cnfn convert_uchar3_sat(uint3);\n"
29856"uchar3 __ovld __cnfn convert_uchar3_rte(long3);\n"
29857"uchar3 __ovld __cnfn convert_uchar3_sat_rte(long3);\n"
29858"uchar3 __ovld __cnfn convert_uchar3_rtz(long3);\n"
29859"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(long3);\n"
29860"uchar3 __ovld __cnfn convert_uchar3_rtp(long3);\n"
29861"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(long3);\n"
29862"uchar3 __ovld __cnfn convert_uchar3_rtn(long3);\n"
29863"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(long3);\n"
29864"uchar3 __ovld __cnfn convert_uchar3(long3);\n"
29865"uchar3 __ovld __cnfn convert_uchar3_sat(long3);\n"
29866"uchar3 __ovld __cnfn convert_uchar3_rte(ulong3);\n"
29867"uchar3 __ovld __cnfn convert_uchar3_sat_rte(ulong3);\n"
29868"uchar3 __ovld __cnfn convert_uchar3_rtz(ulong3);\n"
29869"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ulong3);\n"
29870"uchar3 __ovld __cnfn convert_uchar3_rtp(ulong3);\n"
29871"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ulong3);\n"
29872"uchar3 __ovld __cnfn convert_uchar3_rtn(ulong3);\n"
29873"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ulong3);\n"
29874"uchar3 __ovld __cnfn convert_uchar3(ulong3);\n"
29875"uchar3 __ovld __cnfn convert_uchar3_sat(ulong3);\n"
29876"uchar3 __ovld __cnfn convert_uchar3_rte(float3);\n"
29877"uchar3 __ovld __cnfn convert_uchar3_sat_rte(float3);\n"
29878"uchar3 __ovld __cnfn convert_uchar3_rtz(float3);\n"
29879"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(float3);\n"
29880"uchar3 __ovld __cnfn convert_uchar3_rtp(float3);\n"
29881"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(float3);\n"
29882"uchar3 __ovld __cnfn convert_uchar3_rtn(float3);\n"
29883"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(float3);\n"
29884"uchar3 __ovld __cnfn convert_uchar3(float3);\n"
29885"uchar3 __ovld __cnfn convert_uchar3_sat(float3);\n"
29886"short3 __ovld __cnfn convert_short3_rte(char3);\n"
29887"short3 __ovld __cnfn convert_short3_sat_rte(char3);\n"
29888"short3 __ovld __cnfn convert_short3_rtz(char3);\n"
29889"short3 __ovld __cnfn convert_short3_sat_rtz(char3);\n"
29890"short3 __ovld __cnfn convert_short3_rtp(char3);\n"
29891"short3 __ovld __cnfn convert_short3_sat_rtp(char3);\n"
29892"short3 __ovld __cnfn convert_short3_rtn(char3);\n"
29893"short3 __ovld __cnfn convert_short3_sat_rtn(char3);\n"
29894"short3 __ovld __cnfn convert_short3(char3);\n"
29895"short3 __ovld __cnfn convert_short3_sat(char3);\n"
29896"short3 __ovld __cnfn convert_short3_rte(uchar3);\n"
29897"short3 __ovld __cnfn convert_short3_sat_rte(uchar3);\n"
29898"short3 __ovld __cnfn convert_short3_rtz(uchar3);\n"
29899"short3 __ovld __cnfn convert_short3_sat_rtz(uchar3);\n"
29900"short3 __ovld __cnfn convert_short3_rtp(uchar3);\n"
29901"short3 __ovld __cnfn convert_short3_sat_rtp(uchar3);\n"
29902"short3 __ovld __cnfn convert_short3_rtn(uchar3);\n"
29903"short3 __ovld __cnfn convert_short3_sat_rtn(uchar3);\n"
29904"short3 __ovld __cnfn convert_short3(uchar3);\n"
29905"short3 __ovld __cnfn convert_short3_sat(uchar3);\n"
29906"short3 __ovld __cnfn convert_short3_rte(short3);\n"
29907"short3 __ovld __cnfn convert_short3_sat_rte(short3);\n"
29908"short3 __ovld __cnfn convert_short3_rtz(short3);\n"
29909"short3 __ovld __cnfn convert_short3_sat_rtz(short3);\n"
29910"short3 __ovld __cnfn convert_short3_rtp(short3);\n"
29911"short3 __ovld __cnfn convert_short3_sat_rtp(short3);\n"
29912"short3 __ovld __cnfn convert_short3_rtn(short3);\n"
29913"short3 __ovld __cnfn convert_short3_sat_rtn(short3);\n"
29914"short3 __ovld __cnfn convert_short3(short3);\n"
29915"short3 __ovld __cnfn convert_short3_sat(short3);\n"
29916"short3 __ovld __cnfn convert_short3_rte(ushort3);\n"
29917"short3 __ovld __cnfn convert_short3_sat_rte(ushort3);\n"
29918"short3 __ovld __cnfn convert_short3_rtz(ushort3);\n"
29919"short3 __ovld __cnfn convert_short3_sat_rtz(ushort3);\n"
29920"short3 __ovld __cnfn convert_short3_rtp(ushort3);\n"
29921"short3 __ovld __cnfn convert_short3_sat_rtp(ushort3);\n"
29922"short3 __ovld __cnfn convert_short3_rtn(ushort3);\n"
29923"short3 __ovld __cnfn convert_short3_sat_rtn(ushort3);\n"
29924"short3 __ovld __cnfn convert_short3(ushort3);\n"
29925"short3 __ovld __cnfn convert_short3_sat(ushort3);\n"
29926"short3 __ovld __cnfn convert_short3_rte(int3);\n"
29927"short3 __ovld __cnfn convert_short3_sat_rte(int3);\n"
29928"short3 __ovld __cnfn convert_short3_rtz(int3);\n"
29929"short3 __ovld __cnfn convert_short3_sat_rtz(int3);\n"
29930"short3 __ovld __cnfn convert_short3_rtp(int3);\n"
29931"short3 __ovld __cnfn convert_short3_sat_rtp(int3);\n"
29932"short3 __ovld __cnfn convert_short3_rtn(int3);\n"
29933"short3 __ovld __cnfn convert_short3_sat_rtn(int3);\n"
29934"short3 __ovld __cnfn convert_short3(int3);\n"
29935"short3 __ovld __cnfn convert_short3_sat(int3);\n"
29936"short3 __ovld __cnfn convert_short3_rte(uint3);\n"
29937"short3 __ovld __cnfn convert_short3_sat_rte(uint3);\n"
29938"short3 __ovld __cnfn convert_short3_rtz(uint3);\n"
29939"short3 __ovld __cnfn convert_short3_sat_rtz(uint3);\n"
29940"short3 __ovld __cnfn convert_short3_rtp(uint3);\n"
29941"short3 __ovld __cnfn convert_short3_sat_rtp(uint3);\n"
29942"short3 __ovld __cnfn convert_short3_rtn(uint3);\n"
29943"short3 __ovld __cnfn convert_short3_sat_rtn(uint3);\n"
29944"short3 __ovld __cnfn convert_short3(uint3);\n"
29945"short3 __ovld __cnfn convert_short3_sat(uint3);\n"
29946"short3 __ovld __cnfn convert_short3_rte(long3);\n"
29947"short3 __ovld __cnfn convert_short3_sat_rte(long3);\n"
29948"short3 __ovld __cnfn convert_short3_rtz(long3);\n"
29949"short3 __ovld __cnfn convert_short3_sat_rtz(long3);\n"
29950"short3 __ovld __cnfn convert_short3_rtp(long3);\n"
29951"short3 __ovld __cnfn convert_short3_sat_rtp(long3);\n"
29952"short3 __ovld __cnfn convert_short3_rtn(long3);\n"
29953"short3 __ovld __cnfn convert_short3_sat_rtn(long3);\n"
29954"short3 __ovld __cnfn convert_short3(long3);\n"
29955"short3 __ovld __cnfn convert_short3_sat(long3);\n"
29956"short3 __ovld __cnfn convert_short3_rte(ulong3);\n"
29957"short3 __ovld __cnfn convert_short3_sat_rte(ulong3);\n"
29958"short3 __ovld __cnfn convert_short3_rtz(ulong3);\n"
29959"short3 __ovld __cnfn convert_short3_sat_rtz(ulong3);\n"
29960"short3 __ovld __cnfn convert_short3_rtp(ulong3);\n"
29961"short3 __ovld __cnfn convert_short3_sat_rtp(ulong3);\n"
29962"short3 __ovld __cnfn convert_short3_rtn(ulong3);\n"
29963"short3 __ovld __cnfn convert_short3_sat_rtn(ulong3);\n"
29964"short3 __ovld __cnfn convert_short3(ulong3);\n"
29965"short3 __ovld __cnfn convert_short3_sat(ulong3);\n"
29966"short3 __ovld __cnfn convert_short3_rte(float3);\n"
29967"short3 __ovld __cnfn convert_short3_sat_rte(float3);\n"
29968"short3 __ovld __cnfn convert_short3_rtz(float3);\n"
29969"short3 __ovld __cnfn convert_short3_sat_rtz(float3);\n"
29970"short3 __ovld __cnfn convert_short3_rtp(float3);\n"
29971"short3 __ovld __cnfn convert_short3_sat_rtp(float3);\n"
29972"short3 __ovld __cnfn convert_short3_rtn(float3);\n"
29973"short3 __ovld __cnfn convert_short3_sat_rtn(float3);\n"
29974"short3 __ovld __cnfn convert_short3(float3);\n"
29975"short3 __ovld __cnfn convert_short3_sat(float3);\n"
29976"ushort3 __ovld __cnfn convert_ushort3_rte(char3);\n"
29977"ushort3 __ovld __cnfn convert_ushort3_sat_rte(char3);\n"
29978"ushort3 __ovld __cnfn convert_ushort3_rtz(char3);\n"
29979"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(char3);\n"
29980"ushort3 __ovld __cnfn convert_ushort3_rtp(char3);\n"
29981"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(char3);\n"
29982"ushort3 __ovld __cnfn convert_ushort3_rtn(char3);\n"
29983"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(char3);\n"
29984"ushort3 __ovld __cnfn convert_ushort3(char3);\n"
29985"ushort3 __ovld __cnfn convert_ushort3_sat(char3);\n"
29986"ushort3 __ovld __cnfn convert_ushort3_rte(uchar3);\n"
29987"ushort3 __ovld __cnfn convert_ushort3_sat_rte(uchar3);\n"
29988"ushort3 __ovld __cnfn convert_ushort3_rtz(uchar3);\n"
29989"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uchar3);\n"
29990"ushort3 __ovld __cnfn convert_ushort3_rtp(uchar3);\n"
29991"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uchar3);\n"
29992"ushort3 __ovld __cnfn convert_ushort3_rtn(uchar3);\n"
29993"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uchar3);\n"
29994"ushort3 __ovld __cnfn convert_ushort3(uchar3);\n"
29995"ushort3 __ovld __cnfn convert_ushort3_sat(uchar3);\n"
29996"ushort3 __ovld __cnfn convert_ushort3_rte(short3);\n"
29997"ushort3 __ovld __cnfn convert_ushort3_sat_rte(short3);\n"
29998"ushort3 __ovld __cnfn convert_ushort3_rtz(short3);\n"
29999"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(short3);\n"
30000"ushort3 __ovld __cnfn convert_ushort3_rtp(short3);\n"
30001"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(short3);\n"
30002"ushort3 __ovld __cnfn convert_ushort3_rtn(short3);\n"
30003"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(short3);\n"
30004"ushort3 __ovld __cnfn convert_ushort3(short3);\n"
30005"ushort3 __ovld __cnfn convert_ushort3_sat(short3);\n"
30006"ushort3 __ovld __cnfn convert_ushort3_rte(ushort3);\n"
30007"ushort3 __ovld __cnfn convert_ushort3_sat_rte(ushort3);\n"
30008"ushort3 __ovld __cnfn convert_ushort3_rtz(ushort3);\n"
30009"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ushort3);\n"
30010"ushort3 __ovld __cnfn convert_ushort3_rtp(ushort3);\n"
30011"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ushort3);\n"
30012"ushort3 __ovld __cnfn convert_ushort3_rtn(ushort3);\n"
30013"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ushort3);\n"
30014"ushort3 __ovld __cnfn convert_ushort3(ushort3);\n"
30015"ushort3 __ovld __cnfn convert_ushort3_sat(ushort3);\n"
30016"ushort3 __ovld __cnfn convert_ushort3_rte(int3);\n"
30017"ushort3 __ovld __cnfn convert_ushort3_sat_rte(int3);\n"
30018"ushort3 __ovld __cnfn convert_ushort3_rtz(int3);\n"
30019"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(int3);\n"
30020"ushort3 __ovld __cnfn convert_ushort3_rtp(int3);\n"
30021"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(int3);\n"
30022"ushort3 __ovld __cnfn convert_ushort3_rtn(int3);\n"
30023"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(int3);\n"
30024"ushort3 __ovld __cnfn convert_ushort3(int3);\n"
30025"ushort3 __ovld __cnfn convert_ushort3_sat(int3);\n"
30026"ushort3 __ovld __cnfn convert_ushort3_rte(uint3);\n"
30027"ushort3 __ovld __cnfn convert_ushort3_sat_rte(uint3);\n"
30028"ushort3 __ovld __cnfn convert_ushort3_rtz(uint3);\n"
30029"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uint3);\n"
30030"ushort3 __ovld __cnfn convert_ushort3_rtp(uint3);\n"
30031"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uint3);\n"
30032"ushort3 __ovld __cnfn convert_ushort3_rtn(uint3);\n"
30033"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uint3);\n"
30034"ushort3 __ovld __cnfn convert_ushort3(uint3);\n"
30035"ushort3 __ovld __cnfn convert_ushort3_sat(uint3);\n"
30036"ushort3 __ovld __cnfn convert_ushort3_rte(long3);\n"
30037"ushort3 __ovld __cnfn convert_ushort3_sat_rte(long3);\n"
30038"ushort3 __ovld __cnfn convert_ushort3_rtz(long3);\n"
30039"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(long3);\n"
30040"ushort3 __ovld __cnfn convert_ushort3_rtp(long3);\n"
30041"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(long3);\n"
30042"ushort3 __ovld __cnfn convert_ushort3_rtn(long3);\n"
30043"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(long3);\n"
30044"ushort3 __ovld __cnfn convert_ushort3(long3);\n"
30045"ushort3 __ovld __cnfn convert_ushort3_sat(long3);\n"
30046"ushort3 __ovld __cnfn convert_ushort3_rte(ulong3);\n"
30047"ushort3 __ovld __cnfn convert_ushort3_sat_rte(ulong3);\n"
30048"ushort3 __ovld __cnfn convert_ushort3_rtz(ulong3);\n"
30049"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ulong3);\n"
30050"ushort3 __ovld __cnfn convert_ushort3_rtp(ulong3);\n"
30051"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ulong3);\n"
30052"ushort3 __ovld __cnfn convert_ushort3_rtn(ulong3);\n"
30053"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ulong3);\n"
30054"ushort3 __ovld __cnfn convert_ushort3(ulong3);\n"
30055"ushort3 __ovld __cnfn convert_ushort3_sat(ulong3);\n"
30056"ushort3 __ovld __cnfn convert_ushort3_rte(float3);\n"
30057"ushort3 __ovld __cnfn convert_ushort3_sat_rte(float3);\n"
30058"ushort3 __ovld __cnfn convert_ushort3_rtz(float3);\n"
30059"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(float3);\n"
30060"ushort3 __ovld __cnfn convert_ushort3_rtp(float3);\n"
30061"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(float3);\n"
30062"ushort3 __ovld __cnfn convert_ushort3_rtn(float3);\n"
30063"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(float3);\n"
30064"ushort3 __ovld __cnfn convert_ushort3(float3);\n"
30065"ushort3 __ovld __cnfn convert_ushort3_sat(float3);\n"
30066"int3 __ovld __cnfn convert_int3_rte(char3);\n"
30067"int3 __ovld __cnfn convert_int3_sat_rte(char3);\n"
30068"int3 __ovld __cnfn convert_int3_rtz(char3);\n"
30069"int3 __ovld __cnfn convert_int3_sat_rtz(char3);\n"
30070"int3 __ovld __cnfn convert_int3_rtp(char3);\n"
30071"int3 __ovld __cnfn convert_int3_sat_rtp(char3);\n"
30072"int3 __ovld __cnfn convert_int3_rtn(char3);\n"
30073"int3 __ovld __cnfn convert_int3_sat_rtn(char3);\n"
30074"int3 __ovld __cnfn convert_int3(char3);\n"
30075"int3 __ovld __cnfn convert_int3_sat(char3);\n"
30076"int3 __ovld __cnfn convert_int3_rte(uchar3);\n"
30077"int3 __ovld __cnfn convert_int3_sat_rte(uchar3);\n"
30078"int3 __ovld __cnfn convert_int3_rtz(uchar3);\n"
30079"int3 __ovld __cnfn convert_int3_sat_rtz(uchar3);\n"
30080"int3 __ovld __cnfn convert_int3_rtp(uchar3);\n"
30081"int3 __ovld __cnfn convert_int3_sat_rtp(uchar3);\n"
30082"int3 __ovld __cnfn convert_int3_rtn(uchar3);\n"
30083"int3 __ovld __cnfn convert_int3_sat_rtn(uchar3);\n"
30084"int3 __ovld __cnfn convert_int3(uchar3);\n"
30085"int3 __ovld __cnfn convert_int3_sat(uchar3);\n"
30086"int3 __ovld __cnfn convert_int3_rte(short3);\n"
30087"int3 __ovld __cnfn convert_int3_sat_rte(short3);\n"
30088"int3 __ovld __cnfn convert_int3_rtz(short3);\n"
30089"int3 __ovld __cnfn convert_int3_sat_rtz(short3);\n"
30090"int3 __ovld __cnfn convert_int3_rtp(short3);\n"
30091"int3 __ovld __cnfn convert_int3_sat_rtp(short3);\n"
30092"int3 __ovld __cnfn convert_int3_rtn(short3);\n"
30093"int3 __ovld __cnfn convert_int3_sat_rtn(short3);\n"
30094"int3 __ovld __cnfn convert_int3(short3);\n"
30095"int3 __ovld __cnfn convert_int3_sat(short3);\n"
30096"int3 __ovld __cnfn convert_int3_rte(ushort3);\n"
30097"int3 __ovld __cnfn convert_int3_sat_rte(ushort3);\n"
30098"int3 __ovld __cnfn convert_int3_rtz(ushort3);\n"
30099"int3 __ovld __cnfn convert_int3_sat_rtz(ushort3);\n"
30100"int3 __ovld __cnfn convert_int3_rtp(ushort3);\n"
30101"int3 __ovld __cnfn convert_int3_sat_rtp(ushort3);\n"
30102"int3 __ovld __cnfn convert_int3_rtn(ushort3);\n"
30103"int3 __ovld __cnfn convert_int3_sat_rtn(ushort3);\n"
30104"int3 __ovld __cnfn convert_int3(ushort3);\n"
30105"int3 __ovld __cnfn convert_int3_sat(ushort3);\n"
30106"int3 __ovld __cnfn convert_int3_rte(int3);\n"
30107"int3 __ovld __cnfn convert_int3_sat_rte(int3);\n"
30108"int3 __ovld __cnfn convert_int3_rtz(int3);\n"
30109"int3 __ovld __cnfn convert_int3_sat_rtz(int3);\n"
30110"int3 __ovld __cnfn convert_int3_rtp(int3);\n"
30111"int3 __ovld __cnfn convert_int3_sat_rtp(int3);\n"
30112"int3 __ovld __cnfn convert_int3_rtn(int3);\n"
30113"int3 __ovld __cnfn convert_int3_sat_rtn(int3);\n"
30114"int3 __ovld __cnfn convert_int3(int3);\n"
30115"int3 __ovld __cnfn convert_int3_sat(int3);\n"
30116"int3 __ovld __cnfn convert_int3_rte(uint3);\n"
30117"int3 __ovld __cnfn convert_int3_sat_rte(uint3);\n"
30118"int3 __ovld __cnfn convert_int3_rtz(uint3);\n"
30119"int3 __ovld __cnfn convert_int3_sat_rtz(uint3);\n"
30120"int3 __ovld __cnfn convert_int3_rtp(uint3);\n"
30121"int3 __ovld __cnfn convert_int3_sat_rtp(uint3);\n"
30122"int3 __ovld __cnfn convert_int3_rtn(uint3);\n"
30123"int3 __ovld __cnfn convert_int3_sat_rtn(uint3);\n"
30124"int3 __ovld __cnfn convert_int3(uint3);\n"
30125"int3 __ovld __cnfn convert_int3_sat(uint3);\n"
30126"int3 __ovld __cnfn convert_int3_rte(long3);\n"
30127"int3 __ovld __cnfn convert_int3_sat_rte(long3);\n"
30128"int3 __ovld __cnfn convert_int3_rtz(long3);\n"
30129"int3 __ovld __cnfn convert_int3_sat_rtz(long3);\n"
30130"int3 __ovld __cnfn convert_int3_rtp(long3);\n"
30131"int3 __ovld __cnfn convert_int3_sat_rtp(long3);\n"
30132"int3 __ovld __cnfn convert_int3_rtn(long3);\n"
30133"int3 __ovld __cnfn convert_int3_sat_rtn(long3);\n"
30134"int3 __ovld __cnfn convert_int3(long3);\n"
30135"int3 __ovld __cnfn convert_int3_sat(long3);\n"
30136"int3 __ovld __cnfn convert_int3_rte(ulong3);\n"
30137"int3 __ovld __cnfn convert_int3_sat_rte(ulong3);\n"
30138"int3 __ovld __cnfn convert_int3_rtz(ulong3);\n"
30139"int3 __ovld __cnfn convert_int3_sat_rtz(ulong3);\n"
30140"int3 __ovld __cnfn convert_int3_rtp(ulong3);\n"
30141"int3 __ovld __cnfn convert_int3_sat_rtp(ulong3);\n"
30142"int3 __ovld __cnfn convert_int3_rtn(ulong3);\n"
30143"int3 __ovld __cnfn convert_int3_sat_rtn(ulong3);\n"
30144"int3 __ovld __cnfn convert_int3(ulong3);\n"
30145"int3 __ovld __cnfn convert_int3_sat(ulong3);\n"
30146"int3 __ovld __cnfn convert_int3_rte(float3);\n"
30147"int3 __ovld __cnfn convert_int3_sat_rte(float3);\n"
30148"int3 __ovld __cnfn convert_int3_rtz(float3);\n"
30149"int3 __ovld __cnfn convert_int3_sat_rtz(float3);\n"
30150"int3 __ovld __cnfn convert_int3_rtp(float3);\n"
30151"int3 __ovld __cnfn convert_int3_sat_rtp(float3);\n"
30152"int3 __ovld __cnfn convert_int3_rtn(float3);\n"
30153"int3 __ovld __cnfn convert_int3_sat_rtn(float3);\n"
30154"int3 __ovld __cnfn convert_int3(float3);\n"
30155"int3 __ovld __cnfn convert_int3_sat(float3);\n"
30156"uint3 __ovld __cnfn convert_uint3_rte(char3);\n"
30157"uint3 __ovld __cnfn convert_uint3_sat_rte(char3);\n"
30158"uint3 __ovld __cnfn convert_uint3_rtz(char3);\n"
30159"uint3 __ovld __cnfn convert_uint3_sat_rtz(char3);\n"
30160"uint3 __ovld __cnfn convert_uint3_rtp(char3);\n"
30161"uint3 __ovld __cnfn convert_uint3_sat_rtp(char3);\n"
30162"uint3 __ovld __cnfn convert_uint3_rtn(char3);\n"
30163"uint3 __ovld __cnfn convert_uint3_sat_rtn(char3);\n"
30164"uint3 __ovld __cnfn convert_uint3(char3);\n"
30165"uint3 __ovld __cnfn convert_uint3_sat(char3);\n"
30166"uint3 __ovld __cnfn convert_uint3_rte(uchar3);\n"
30167"uint3 __ovld __cnfn convert_uint3_sat_rte(uchar3);\n"
30168"uint3 __ovld __cnfn convert_uint3_rtz(uchar3);\n"
30169"uint3 __ovld __cnfn convert_uint3_sat_rtz(uchar3);\n"
30170"uint3 __ovld __cnfn convert_uint3_rtp(uchar3);\n"
30171"uint3 __ovld __cnfn convert_uint3_sat_rtp(uchar3);\n"
30172"uint3 __ovld __cnfn convert_uint3_rtn(uchar3);\n"
30173"uint3 __ovld __cnfn convert_uint3_sat_rtn(uchar3);\n"
30174"uint3 __ovld __cnfn convert_uint3(uchar3);\n"
30175"uint3 __ovld __cnfn convert_uint3_sat(uchar3);\n"
30176"uint3 __ovld __cnfn convert_uint3_rte(short3);\n"
30177"uint3 __ovld __cnfn convert_uint3_sat_rte(short3);\n"
30178"uint3 __ovld __cnfn convert_uint3_rtz(short3);\n"
30179"uint3 __ovld __cnfn convert_uint3_sat_rtz(short3);\n"
30180"uint3 __ovld __cnfn convert_uint3_rtp(short3);\n"
30181"uint3 __ovld __cnfn convert_uint3_sat_rtp(short3);\n"
30182"uint3 __ovld __cnfn convert_uint3_rtn(short3);\n"
30183"uint3 __ovld __cnfn convert_uint3_sat_rtn(short3);\n"
30184"uint3 __ovld __cnfn convert_uint3(short3);\n"
30185"uint3 __ovld __cnfn convert_uint3_sat(short3);\n"
30186"uint3 __ovld __cnfn convert_uint3_rte(ushort3);\n"
30187"uint3 __ovld __cnfn convert_uint3_sat_rte(ushort3);\n"
30188"uint3 __ovld __cnfn convert_uint3_rtz(ushort3);\n"
30189"uint3 __ovld __cnfn convert_uint3_sat_rtz(ushort3);\n"
30190"uint3 __ovld __cnfn convert_uint3_rtp(ushort3);\n"
30191"uint3 __ovld __cnfn convert_uint3_sat_rtp(ushort3);\n"
30192"uint3 __ovld __cnfn convert_uint3_rtn(ushort3);\n"
30193"uint3 __ovld __cnfn convert_uint3_sat_rtn(ushort3);\n"
30194"uint3 __ovld __cnfn convert_uint3(ushort3);\n"
30195"uint3 __ovld __cnfn convert_uint3_sat(ushort3);\n"
30196"uint3 __ovld __cnfn convert_uint3_rte(int3);\n"
30197"uint3 __ovld __cnfn convert_uint3_sat_rte(int3);\n"
30198"uint3 __ovld __cnfn convert_uint3_rtz(int3);\n"
30199"uint3 __ovld __cnfn convert_uint3_sat_rtz(int3);\n"
30200"uint3 __ovld __cnfn convert_uint3_rtp(int3);\n"
30201"uint3 __ovld __cnfn convert_uint3_sat_rtp(int3);\n"
30202"uint3 __ovld __cnfn convert_uint3_rtn(int3);\n"
30203"uint3 __ovld __cnfn convert_uint3_sat_rtn(int3);\n"
30204"uint3 __ovld __cnfn convert_uint3(int3);\n"
30205"uint3 __ovld __cnfn convert_uint3_sat(int3);\n"
30206"uint3 __ovld __cnfn convert_uint3_rte(uint3);\n"
30207"uint3 __ovld __cnfn convert_uint3_sat_rte(uint3);\n"
30208"uint3 __ovld __cnfn convert_uint3_rtz(uint3);\n"
30209"uint3 __ovld __cnfn convert_uint3_sat_rtz(uint3);\n"
30210"uint3 __ovld __cnfn convert_uint3_rtp(uint3);\n"
30211"uint3 __ovld __cnfn convert_uint3_sat_rtp(uint3);\n"
30212"uint3 __ovld __cnfn convert_uint3_rtn(uint3);\n"
30213"uint3 __ovld __cnfn convert_uint3_sat_rtn(uint3);\n"
30214"uint3 __ovld __cnfn convert_uint3(uint3);\n"
30215"uint3 __ovld __cnfn convert_uint3_sat(uint3);\n"
30216"uint3 __ovld __cnfn convert_uint3_rte(long3);\n"
30217"uint3 __ovld __cnfn convert_uint3_sat_rte(long3);\n"
30218"uint3 __ovld __cnfn convert_uint3_rtz(long3);\n"
30219"uint3 __ovld __cnfn convert_uint3_sat_rtz(long3);\n"
30220"uint3 __ovld __cnfn convert_uint3_rtp(long3);\n"
30221"uint3 __ovld __cnfn convert_uint3_sat_rtp(long3);\n"
30222"uint3 __ovld __cnfn convert_uint3_rtn(long3);\n"
30223"uint3 __ovld __cnfn convert_uint3_sat_rtn(long3);\n"
30224"uint3 __ovld __cnfn convert_uint3(long3);\n"
30225"uint3 __ovld __cnfn convert_uint3_sat(long3);\n"
30226"uint3 __ovld __cnfn convert_uint3_rte(ulong3);\n"
30227"uint3 __ovld __cnfn convert_uint3_sat_rte(ulong3);\n"
30228"uint3 __ovld __cnfn convert_uint3_rtz(ulong3);\n"
30229"uint3 __ovld __cnfn convert_uint3_sat_rtz(ulong3);\n"
30230"uint3 __ovld __cnfn convert_uint3_rtp(ulong3);\n"
30231"uint3 __ovld __cnfn convert_uint3_sat_rtp(ulong3);\n"
30232"uint3 __ovld __cnfn convert_uint3_rtn(ulong3);\n"
30233"uint3 __ovld __cnfn convert_uint3_sat_rtn(ulong3);\n"
30234"uint3 __ovld __cnfn convert_uint3(ulong3);\n"
30235"uint3 __ovld __cnfn convert_uint3_sat(ulong3);\n"
30236"uint3 __ovld __cnfn convert_uint3_rte(float3);\n"
30237"uint3 __ovld __cnfn convert_uint3_sat_rte(float3);\n"
30238"uint3 __ovld __cnfn convert_uint3_rtz(float3);\n"
30239"uint3 __ovld __cnfn convert_uint3_sat_rtz(float3);\n"
30240"uint3 __ovld __cnfn convert_uint3_rtp(float3);\n"
30241"uint3 __ovld __cnfn convert_uint3_sat_rtp(float3);\n"
30242"uint3 __ovld __cnfn convert_uint3_rtn(float3);\n"
30243"uint3 __ovld __cnfn convert_uint3_sat_rtn(float3);\n"
30244"uint3 __ovld __cnfn convert_uint3(float3);\n"
30245"uint3 __ovld __cnfn convert_uint3_sat(float3);\n"
30246"long3 __ovld __cnfn convert_long3_rte(char3);\n"
30247"long3 __ovld __cnfn convert_long3_sat_rte(char3);\n"
30248"long3 __ovld __cnfn convert_long3_rtz(char3);\n"
30249"long3 __ovld __cnfn convert_long3_sat_rtz(char3);\n"
30250"long3 __ovld __cnfn convert_long3_rtp(char3);\n"
30251"long3 __ovld __cnfn convert_long3_sat_rtp(char3);\n"
30252"long3 __ovld __cnfn convert_long3_rtn(char3);\n"
30253"long3 __ovld __cnfn convert_long3_sat_rtn(char3);\n"
30254"long3 __ovld __cnfn convert_long3(char3);\n"
30255"long3 __ovld __cnfn convert_long3_sat(char3);\n"
30256"long3 __ovld __cnfn convert_long3_rte(uchar3);\n"
30257"long3 __ovld __cnfn convert_long3_sat_rte(uchar3);\n"
30258"long3 __ovld __cnfn convert_long3_rtz(uchar3);\n"
30259"long3 __ovld __cnfn convert_long3_sat_rtz(uchar3);\n"
30260"long3 __ovld __cnfn convert_long3_rtp(uchar3);\n"
30261"long3 __ovld __cnfn convert_long3_sat_rtp(uchar3);\n"
30262"long3 __ovld __cnfn convert_long3_rtn(uchar3);\n"
30263"long3 __ovld __cnfn convert_long3_sat_rtn(uchar3);\n"
30264"long3 __ovld __cnfn convert_long3(uchar3);\n"
30265"long3 __ovld __cnfn convert_long3_sat(uchar3);\n"
30266"long3 __ovld __cnfn convert_long3_rte(short3);\n"
30267"long3 __ovld __cnfn convert_long3_sat_rte(short3);\n"
30268"long3 __ovld __cnfn convert_long3_rtz(short3);\n"
30269"long3 __ovld __cnfn convert_long3_sat_rtz(short3);\n"
30270"long3 __ovld __cnfn convert_long3_rtp(short3);\n"
30271"long3 __ovld __cnfn convert_long3_sat_rtp(short3);\n"
30272"long3 __ovld __cnfn convert_long3_rtn(short3);\n"
30273"long3 __ovld __cnfn convert_long3_sat_rtn(short3);\n"
30274"long3 __ovld __cnfn convert_long3(short3);\n"
30275"long3 __ovld __cnfn convert_long3_sat(short3);\n"
30276"long3 __ovld __cnfn convert_long3_rte(ushort3);\n"
30277"long3 __ovld __cnfn convert_long3_sat_rte(ushort3);\n"
30278"long3 __ovld __cnfn convert_long3_rtz(ushort3);\n"
30279"long3 __ovld __cnfn convert_long3_sat_rtz(ushort3);\n"
30280"long3 __ovld __cnfn convert_long3_rtp(ushort3);\n"
30281"long3 __ovld __cnfn convert_long3_sat_rtp(ushort3);\n"
30282"long3 __ovld __cnfn convert_long3_rtn(ushort3);\n"
30283"long3 __ovld __cnfn convert_long3_sat_rtn(ushort3);\n"
30284"long3 __ovld __cnfn convert_long3(ushort3);\n"
30285"long3 __ovld __cnfn convert_long3_sat(ushort3);\n"
30286"long3 __ovld __cnfn convert_long3_rte(int3);\n"
30287"long3 __ovld __cnfn convert_long3_sat_rte(int3);\n"
30288"long3 __ovld __cnfn convert_long3_rtz(int3);\n"
30289"long3 __ovld __cnfn convert_long3_sat_rtz(int3);\n"
30290"long3 __ovld __cnfn convert_long3_rtp(int3);\n"
30291"long3 __ovld __cnfn convert_long3_sat_rtp(int3);\n"
30292"long3 __ovld __cnfn convert_long3_rtn(int3);\n"
30293"long3 __ovld __cnfn convert_long3_sat_rtn(int3);\n"
30294"long3 __ovld __cnfn convert_long3(int3);\n"
30295"long3 __ovld __cnfn convert_long3_sat(int3);\n"
30296"long3 __ovld __cnfn convert_long3_rte(uint3);\n"
30297"long3 __ovld __cnfn convert_long3_sat_rte(uint3);\n"
30298"long3 __ovld __cnfn convert_long3_rtz(uint3);\n"
30299"long3 __ovld __cnfn convert_long3_sat_rtz(uint3);\n"
30300"long3 __ovld __cnfn convert_long3_rtp(uint3);\n"
30301"long3 __ovld __cnfn convert_long3_sat_rtp(uint3);\n"
30302"long3 __ovld __cnfn convert_long3_rtn(uint3);\n"
30303"long3 __ovld __cnfn convert_long3_sat_rtn(uint3);\n"
30304"long3 __ovld __cnfn convert_long3(uint3);\n"
30305"long3 __ovld __cnfn convert_long3_sat(uint3);\n"
30306"long3 __ovld __cnfn convert_long3_rte(long3);\n"
30307"long3 __ovld __cnfn convert_long3_sat_rte(long3);\n"
30308"long3 __ovld __cnfn convert_long3_rtz(long3);\n"
30309"long3 __ovld __cnfn convert_long3_sat_rtz(long3);\n"
30310"long3 __ovld __cnfn convert_long3_rtp(long3);\n"
30311"long3 __ovld __cnfn convert_long3_sat_rtp(long3);\n"
30312"long3 __ovld __cnfn convert_long3_rtn(long3);\n"
30313"long3 __ovld __cnfn convert_long3_sat_rtn(long3);\n"
30314"long3 __ovld __cnfn convert_long3(long3);\n"
30315"long3 __ovld __cnfn convert_long3_sat(long3);\n"
30316"long3 __ovld __cnfn convert_long3_rte(ulong3);\n"
30317"long3 __ovld __cnfn convert_long3_sat_rte(ulong3);\n"
30318"long3 __ovld __cnfn convert_long3_rtz(ulong3);\n"
30319"long3 __ovld __cnfn convert_long3_sat_rtz(ulong3);\n"
30320"long3 __ovld __cnfn convert_long3_rtp(ulong3);\n"
30321"long3 __ovld __cnfn convert_long3_sat_rtp(ulong3);\n"
30322"long3 __ovld __cnfn convert_long3_rtn(ulong3);\n"
30323"long3 __ovld __cnfn convert_long3_sat_rtn(ulong3);\n"
30324"long3 __ovld __cnfn convert_long3(ulong3);\n"
30325"long3 __ovld __cnfn convert_long3_sat(ulong3);\n"
30326"long3 __ovld __cnfn convert_long3_rte(float3);\n"
30327"long3 __ovld __cnfn convert_long3_sat_rte(float3);\n"
30328"long3 __ovld __cnfn convert_long3_rtz(float3);\n"
30329"long3 __ovld __cnfn convert_long3_sat_rtz(float3);\n"
30330"long3 __ovld __cnfn convert_long3_rtp(float3);\n"
30331"long3 __ovld __cnfn convert_long3_sat_rtp(float3);\n"
30332"long3 __ovld __cnfn convert_long3_rtn(float3);\n"
30333"long3 __ovld __cnfn convert_long3_sat_rtn(float3);\n"
30334"long3 __ovld __cnfn convert_long3(float3);\n"
30335"long3 __ovld __cnfn convert_long3_sat(float3);\n"
30336"ulong3 __ovld __cnfn convert_ulong3_rte(char3);\n"
30337"ulong3 __ovld __cnfn convert_ulong3_sat_rte(char3);\n"
30338"ulong3 __ovld __cnfn convert_ulong3_rtz(char3);\n"
30339"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(char3);\n"
30340"ulong3 __ovld __cnfn convert_ulong3_rtp(char3);\n"
30341"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(char3);\n"
30342"ulong3 __ovld __cnfn convert_ulong3_rtn(char3);\n"
30343"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(char3);\n"
30344"ulong3 __ovld __cnfn convert_ulong3(char3);\n"
30345"ulong3 __ovld __cnfn convert_ulong3_sat(char3);\n"
30346"ulong3 __ovld __cnfn convert_ulong3_rte(uchar3);\n"
30347"ulong3 __ovld __cnfn convert_ulong3_sat_rte(uchar3);\n"
30348"ulong3 __ovld __cnfn convert_ulong3_rtz(uchar3);\n"
30349"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uchar3);\n"
30350"ulong3 __ovld __cnfn convert_ulong3_rtp(uchar3);\n"
30351"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uchar3);\n"
30352"ulong3 __ovld __cnfn convert_ulong3_rtn(uchar3);\n"
30353"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uchar3);\n"
30354"ulong3 __ovld __cnfn convert_ulong3(uchar3);\n"
30355"ulong3 __ovld __cnfn convert_ulong3_sat(uchar3);\n"
30356"ulong3 __ovld __cnfn convert_ulong3_rte(short3);\n"
30357"ulong3 __ovld __cnfn convert_ulong3_sat_rte(short3);\n"
30358"ulong3 __ovld __cnfn convert_ulong3_rtz(short3);\n"
30359"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(short3);\n"
30360"ulong3 __ovld __cnfn convert_ulong3_rtp(short3);\n"
30361"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(short3);\n"
30362"ulong3 __ovld __cnfn convert_ulong3_rtn(short3);\n"
30363"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(short3);\n"
30364"ulong3 __ovld __cnfn convert_ulong3(short3);\n"
30365"ulong3 __ovld __cnfn convert_ulong3_sat(short3);\n"
30366"ulong3 __ovld __cnfn convert_ulong3_rte(ushort3);\n"
30367"ulong3 __ovld __cnfn convert_ulong3_sat_rte(ushort3);\n"
30368"ulong3 __ovld __cnfn convert_ulong3_rtz(ushort3);\n"
30369"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ushort3);\n"
30370"ulong3 __ovld __cnfn convert_ulong3_rtp(ushort3);\n"
30371"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ushort3);\n"
30372"ulong3 __ovld __cnfn convert_ulong3_rtn(ushort3);\n"
30373"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ushort3);\n"
30374"ulong3 __ovld __cnfn convert_ulong3(ushort3);\n"
30375"ulong3 __ovld __cnfn convert_ulong3_sat(ushort3);\n"
30376"ulong3 __ovld __cnfn convert_ulong3_rte(int3);\n"
30377"ulong3 __ovld __cnfn convert_ulong3_sat_rte(int3);\n"
30378"ulong3 __ovld __cnfn convert_ulong3_rtz(int3);\n"
30379"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(int3);\n"
30380"ulong3 __ovld __cnfn convert_ulong3_rtp(int3);\n"
30381"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(int3);\n"
30382"ulong3 __ovld __cnfn convert_ulong3_rtn(int3);\n"
30383"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(int3);\n"
30384"ulong3 __ovld __cnfn convert_ulong3(int3);\n"
30385"ulong3 __ovld __cnfn convert_ulong3_sat(int3);\n"
30386"ulong3 __ovld __cnfn convert_ulong3_rte(uint3);\n"
30387"ulong3 __ovld __cnfn convert_ulong3_sat_rte(uint3);\n"
30388"ulong3 __ovld __cnfn convert_ulong3_rtz(uint3);\n"
30389"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uint3);\n"
30390"ulong3 __ovld __cnfn convert_ulong3_rtp(uint3);\n"
30391"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uint3);\n"
30392"ulong3 __ovld __cnfn convert_ulong3_rtn(uint3);\n"
30393"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uint3);\n"
30394"ulong3 __ovld __cnfn convert_ulong3(uint3);\n"
30395"ulong3 __ovld __cnfn convert_ulong3_sat(uint3);\n"
30396"ulong3 __ovld __cnfn convert_ulong3_rte(long3);\n"
30397"ulong3 __ovld __cnfn convert_ulong3_sat_rte(long3);\n"
30398"ulong3 __ovld __cnfn convert_ulong3_rtz(long3);\n"
30399"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(long3);\n"
30400"ulong3 __ovld __cnfn convert_ulong3_rtp(long3);\n"
30401"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(long3);\n"
30402"ulong3 __ovld __cnfn convert_ulong3_rtn(long3);\n"
30403"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(long3);\n"
30404"ulong3 __ovld __cnfn convert_ulong3(long3);\n"
30405"ulong3 __ovld __cnfn convert_ulong3_sat(long3);\n"
30406"ulong3 __ovld __cnfn convert_ulong3_rte(ulong3);\n"
30407"ulong3 __ovld __cnfn convert_ulong3_sat_rte(ulong3);\n"
30408"ulong3 __ovld __cnfn convert_ulong3_rtz(ulong3);\n"
30409"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ulong3);\n"
30410"ulong3 __ovld __cnfn convert_ulong3_rtp(ulong3);\n"
30411"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ulong3);\n"
30412"ulong3 __ovld __cnfn convert_ulong3_rtn(ulong3);\n"
30413"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ulong3);\n"
30414"ulong3 __ovld __cnfn convert_ulong3(ulong3);\n"
30415"ulong3 __ovld __cnfn convert_ulong3_sat(ulong3);\n"
30416"ulong3 __ovld __cnfn convert_ulong3_rte(float3);\n"
30417"ulong3 __ovld __cnfn convert_ulong3_sat_rte(float3);\n"
30418"ulong3 __ovld __cnfn convert_ulong3_rtz(float3);\n"
30419"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(float3);\n"
30420"ulong3 __ovld __cnfn convert_ulong3_rtp(float3);\n"
30421"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(float3);\n"
30422"ulong3 __ovld __cnfn convert_ulong3_rtn(float3);\n"
30423"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(float3);\n"
30424"ulong3 __ovld __cnfn convert_ulong3(float3);\n"
30425"ulong3 __ovld __cnfn convert_ulong3_sat(float3);\n"
30426"float3 __ovld __cnfn convert_float3_rte(char3);\n"
30427"float3 __ovld __cnfn convert_float3_rtz(char3);\n"
30428"float3 __ovld __cnfn convert_float3_rtp(char3);\n"
30429"float3 __ovld __cnfn convert_float3_rtn(char3);\n"
30430"float3 __ovld __cnfn convert_float3(char3);\n"
30431"float3 __ovld __cnfn convert_float3_rte(uchar3);\n"
30432"float3 __ovld __cnfn convert_float3_rtz(uchar3);\n"
30433"float3 __ovld __cnfn convert_float3_rtp(uchar3);\n"
30434"float3 __ovld __cnfn convert_float3_rtn(uchar3);\n"
30435"float3 __ovld __cnfn convert_float3(uchar3);\n"
30436"float3 __ovld __cnfn convert_float3_rte(short3);\n"
30437"float3 __ovld __cnfn convert_float3_rtz(short3);\n"
30438"float3 __ovld __cnfn convert_float3_rtp(short3);\n"
30439"float3 __ovld __cnfn convert_float3_rtn(short3);\n"
30440"float3 __ovld __cnfn convert_float3(short3);\n"
30441"float3 __ovld __cnfn convert_float3_rte(ushort3);\n"
30442"float3 __ovld __cnfn convert_float3_rtz(ushort3);\n"
30443"float3 __ovld __cnfn convert_float3_rtp(ushort3);\n"
30444"float3 __ovld __cnfn convert_float3_rtn(ushort3);\n"
30445"float3 __ovld __cnfn convert_float3(ushort3);\n"
30446"float3 __ovld __cnfn convert_float3_rte(int3);\n"
30447"float3 __ovld __cnfn convert_float3_rtz(int3);\n"
30448"float3 __ovld __cnfn convert_float3_rtp(int3);\n"
30449"float3 __ovld __cnfn convert_float3_rtn(int3);\n"
30450"float3 __ovld __cnfn convert_float3(int3);\n"
30451"float3 __ovld __cnfn convert_float3_rte(uint3);\n"
30452"float3 __ovld __cnfn convert_float3_rtz(uint3);\n"
30453"float3 __ovld __cnfn convert_float3_rtp(uint3);\n"
30454"float3 __ovld __cnfn convert_float3_rtn(uint3);\n"
30455"float3 __ovld __cnfn convert_float3(uint3);\n"
30456"float3 __ovld __cnfn convert_float3_rte(long3);\n"
30457"float3 __ovld __cnfn convert_float3_rtz(long3);\n"
30458"float3 __ovld __cnfn convert_float3_rtp(long3);\n"
30459"float3 __ovld __cnfn convert_float3_rtn(long3);\n"
30460"float3 __ovld __cnfn convert_float3(long3);\n"
30461"float3 __ovld __cnfn convert_float3_rte(ulong3);\n"
30462"float3 __ovld __cnfn convert_float3_rtz(ulong3);\n"
30463"float3 __ovld __cnfn convert_float3_rtp(ulong3);\n"
30464"float3 __ovld __cnfn convert_float3_rtn(ulong3);\n"
30465"float3 __ovld __cnfn convert_float3(ulong3);\n"
30466"float3 __ovld __cnfn convert_float3_rte(float3);\n"
30467"float3 __ovld __cnfn convert_float3_rtz(float3);\n"
30468"float3 __ovld __cnfn convert_float3_rtp(float3);\n"
30469"float3 __ovld __cnfn convert_float3_rtn(float3);\n"
30470"float3 __ovld __cnfn convert_float3(float3);\n"
30471"char4 __ovld __cnfn convert_char4_rte(char4);\n"
30472"char4 __ovld __cnfn convert_char4_sat_rte(char4);\n"
30473"char4 __ovld __cnfn convert_char4_rtz(char4);\n"
30474"char4 __ovld __cnfn convert_char4_sat_rtz(char4);\n"
30475"char4 __ovld __cnfn convert_char4_rtp(char4);\n"
30476"char4 __ovld __cnfn convert_char4_sat_rtp(char4);\n"
30477"char4 __ovld __cnfn convert_char4_rtn(char4);\n"
30478"char4 __ovld __cnfn convert_char4_sat_rtn(char4);\n"
30479"char4 __ovld __cnfn convert_char4(char4);\n"
30480"char4 __ovld __cnfn convert_char4_sat(char4);\n"
30481"char4 __ovld __cnfn convert_char4_rte(uchar4);\n"
30482"char4 __ovld __cnfn convert_char4_sat_rte(uchar4);\n"
30483"char4 __ovld __cnfn convert_char4_rtz(uchar4);\n"
30484"char4 __ovld __cnfn convert_char4_sat_rtz(uchar4);\n"
30485"char4 __ovld __cnfn convert_char4_rtp(uchar4);\n"
30486"char4 __ovld __cnfn convert_char4_sat_rtp(uchar4);\n"
30487"char4 __ovld __cnfn convert_char4_rtn(uchar4);\n"
30488"char4 __ovld __cnfn convert_char4_sat_rtn(uchar4);\n"
30489"char4 __ovld __cnfn convert_char4(uchar4);\n"
30490"char4 __ovld __cnfn convert_char4_sat(uchar4);\n"
30491"char4 __ovld __cnfn convert_char4_rte(short4);\n"
30492"char4 __ovld __cnfn convert_char4_sat_rte(short4);\n"
30493"char4 __ovld __cnfn convert_char4_rtz(short4);\n"
30494"char4 __ovld __cnfn convert_char4_sat_rtz(short4);\n"
30495"char4 __ovld __cnfn convert_char4_rtp(short4);\n"
30496"char4 __ovld __cnfn convert_char4_sat_rtp(short4);\n"
30497"char4 __ovld __cnfn convert_char4_rtn(short4);\n"
30498"char4 __ovld __cnfn convert_char4_sat_rtn(short4);\n"
30499"char4 __ovld __cnfn convert_char4(short4);\n"
30500"char4 __ovld __cnfn convert_char4_sat(short4);\n"
30501"char4 __ovld __cnfn convert_char4_rte(ushort4);\n"
30502"char4 __ovld __cnfn convert_char4_sat_rte(ushort4);\n"
30503"char4 __ovld __cnfn convert_char4_rtz(ushort4);\n"
30504"char4 __ovld __cnfn convert_char4_sat_rtz(ushort4);\n"
30505"char4 __ovld __cnfn convert_char4_rtp(ushort4);\n"
30506"char4 __ovld __cnfn convert_char4_sat_rtp(ushort4);\n"
30507"char4 __ovld __cnfn convert_char4_rtn(ushort4);\n"
30508"char4 __ovld __cnfn convert_char4_sat_rtn(ushort4);\n"
30509"char4 __ovld __cnfn convert_char4(ushort4);\n"
30510"char4 __ovld __cnfn convert_char4_sat(ushort4);\n"
30511"char4 __ovld __cnfn convert_char4_rte(int4);\n"
30512"char4 __ovld __cnfn convert_char4_sat_rte(int4);\n"
30513"char4 __ovld __cnfn convert_char4_rtz(int4);\n"
30514"char4 __ovld __cnfn convert_char4_sat_rtz(int4);\n"
30515"char4 __ovld __cnfn convert_char4_rtp(int4);\n"
30516"char4 __ovld __cnfn convert_char4_sat_rtp(int4);\n"
30517"char4 __ovld __cnfn convert_char4_rtn(int4);\n"
30518"char4 __ovld __cnfn convert_char4_sat_rtn(int4);\n"
30519"char4 __ovld __cnfn convert_char4(int4);\n"
30520"char4 __ovld __cnfn convert_char4_sat(int4);\n"
30521"char4 __ovld __cnfn convert_char4_rte(uint4);\n"
30522"char4 __ovld __cnfn convert_char4_sat_rte(uint4);\n"
30523"char4 __ovld __cnfn convert_char4_rtz(uint4);\n"
30524"char4 __ovld __cnfn convert_char4_sat_rtz(uint4);\n"
30525"char4 __ovld __cnfn convert_char4_rtp(uint4);\n"
30526"char4 __ovld __cnfn convert_char4_sat_rtp(uint4);\n"
30527"char4 __ovld __cnfn convert_char4_rtn(uint4);\n"
30528"char4 __ovld __cnfn convert_char4_sat_rtn(uint4);\n"
30529"char4 __ovld __cnfn convert_char4(uint4);\n"
30530"char4 __ovld __cnfn convert_char4_sat(uint4);\n"
30531"char4 __ovld __cnfn convert_char4_rte(long4);\n"
30532"char4 __ovld __cnfn convert_char4_sat_rte(long4);\n"
30533"char4 __ovld __cnfn convert_char4_rtz(long4);\n"
30534"char4 __ovld __cnfn convert_char4_sat_rtz(long4);\n"
30535"char4 __ovld __cnfn convert_char4_rtp(long4);\n"
30536"char4 __ovld __cnfn convert_char4_sat_rtp(long4);\n"
30537"char4 __ovld __cnfn convert_char4_rtn(long4);\n"
30538"char4 __ovld __cnfn convert_char4_sat_rtn(long4);\n"
30539"char4 __ovld __cnfn convert_char4(long4);\n"
30540"char4 __ovld __cnfn convert_char4_sat(long4);\n"
30541"char4 __ovld __cnfn convert_char4_rte(ulong4);\n"
30542"char4 __ovld __cnfn convert_char4_sat_rte(ulong4);\n"
30543"char4 __ovld __cnfn convert_char4_rtz(ulong4);\n"
30544"char4 __ovld __cnfn convert_char4_sat_rtz(ulong4);\n"
30545"char4 __ovld __cnfn convert_char4_rtp(ulong4);\n"
30546"char4 __ovld __cnfn convert_char4_sat_rtp(ulong4);\n"
30547"char4 __ovld __cnfn convert_char4_rtn(ulong4);\n"
30548"char4 __ovld __cnfn convert_char4_sat_rtn(ulong4);\n"
30549"char4 __ovld __cnfn convert_char4(ulong4);\n"
30550"char4 __ovld __cnfn convert_char4_sat(ulong4);\n"
30551"char4 __ovld __cnfn convert_char4_rte(float4);\n"
30552"char4 __ovld __cnfn convert_char4_sat_rte(float4);\n"
30553"char4 __ovld __cnfn convert_char4_rtz(float4);\n"
30554"char4 __ovld __cnfn convert_char4_sat_rtz(float4);\n"
30555"char4 __ovld __cnfn convert_char4_rtp(float4);\n"
30556"char4 __ovld __cnfn convert_char4_sat_rtp(float4);\n"
30557"char4 __ovld __cnfn convert_char4_rtn(float4);\n"
30558"char4 __ovld __cnfn convert_char4_sat_rtn(float4);\n"
30559"char4 __ovld __cnfn convert_char4(float4);\n"
30560"char4 __ovld __cnfn convert_char4_sat(float4);\n"
30561"uchar4 __ovld __cnfn convert_uchar4_rte(char4);\n"
30562"uchar4 __ovld __cnfn convert_uchar4_sat_rte(char4);\n"
30563"uchar4 __ovld __cnfn convert_uchar4_rtz(char4);\n"
30564"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(char4);\n"
30565"uchar4 __ovld __cnfn convert_uchar4_rtp(char4);\n"
30566"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(char4);\n"
30567"uchar4 __ovld __cnfn convert_uchar4_rtn(char4);\n"
30568"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(char4);\n"
30569"uchar4 __ovld __cnfn convert_uchar4(char4);\n"
30570"uchar4 __ovld __cnfn convert_uchar4_sat(char4);\n"
30571"uchar4 __ovld __cnfn convert_uchar4_rte(uchar4);\n"
30572"uchar4 __ovld __cnfn convert_uchar4_sat_rte(uchar4);\n"
30573"uchar4 __ovld __cnfn convert_uchar4_rtz(uchar4);\n"
30574"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uchar4);\n"
30575"uchar4 __ovld __cnfn convert_uchar4_rtp(uchar4);\n"
30576"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uchar4);\n"
30577"uchar4 __ovld __cnfn convert_uchar4_rtn(uchar4);\n"
30578"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uchar4);\n"
30579"uchar4 __ovld __cnfn convert_uchar4(uchar4);\n"
30580"uchar4 __ovld __cnfn convert_uchar4_sat(uchar4);\n"
30581"uchar4 __ovld __cnfn convert_uchar4_rte(short4);\n"
30582"uchar4 __ovld __cnfn convert_uchar4_sat_rte(short4);\n"
30583"uchar4 __ovld __cnfn convert_uchar4_rtz(short4);\n"
30584"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(short4);\n"
30585"uchar4 __ovld __cnfn convert_uchar4_rtp(short4);\n"
30586"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(short4);\n"
30587"uchar4 __ovld __cnfn convert_uchar4_rtn(short4);\n"
30588"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(short4);\n"
30589"uchar4 __ovld __cnfn convert_uchar4(short4);\n"
30590"uchar4 __ovld __cnfn convert_uchar4_sat(short4);\n"
30591"uchar4 __ovld __cnfn convert_uchar4_rte(ushort4);\n"
30592"uchar4 __ovld __cnfn convert_uchar4_sat_rte(ushort4);\n"
30593"uchar4 __ovld __cnfn convert_uchar4_rtz(ushort4);\n"
30594"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ushort4);\n"
30595"uchar4 __ovld __cnfn convert_uchar4_rtp(ushort4);\n"
30596"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ushort4);\n"
30597"uchar4 __ovld __cnfn convert_uchar4_rtn(ushort4);\n"
30598"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ushort4);\n"
30599"uchar4 __ovld __cnfn convert_uchar4(ushort4);\n"
30600"uchar4 __ovld __cnfn convert_uchar4_sat(ushort4);\n"
30601"uchar4 __ovld __cnfn convert_uchar4_rte(int4);\n"
30602"uchar4 __ovld __cnfn convert_uchar4_sat_rte(int4);\n"
30603"uchar4 __ovld __cnfn convert_uchar4_rtz(int4);\n"
30604"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(int4);\n"
30605"uchar4 __ovld __cnfn convert_uchar4_rtp(int4);\n"
30606"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(int4);\n"
30607"uchar4 __ovld __cnfn convert_uchar4_rtn(int4);\n"
30608"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(int4);\n"
30609"uchar4 __ovld __cnfn convert_uchar4(int4);\n"
30610"uchar4 __ovld __cnfn convert_uchar4_sat(int4);\n"
30611"uchar4 __ovld __cnfn convert_uchar4_rte(uint4);\n"
30612"uchar4 __ovld __cnfn convert_uchar4_sat_rte(uint4);\n"
30613"uchar4 __ovld __cnfn convert_uchar4_rtz(uint4);\n"
30614"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uint4);\n"
30615"uchar4 __ovld __cnfn convert_uchar4_rtp(uint4);\n"
30616"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uint4);\n"
30617"uchar4 __ovld __cnfn convert_uchar4_rtn(uint4);\n"
30618"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uint4);\n"
30619"uchar4 __ovld __cnfn convert_uchar4(uint4);\n"
30620"uchar4 __ovld __cnfn convert_uchar4_sat(uint4);\n"
30621"uchar4 __ovld __cnfn convert_uchar4_rte(long4);\n"
30622"uchar4 __ovld __cnfn convert_uchar4_sat_rte(long4);\n"
30623"uchar4 __ovld __cnfn convert_uchar4_rtz(long4);\n"
30624"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(long4);\n"
30625"uchar4 __ovld __cnfn convert_uchar4_rtp(long4);\n"
30626"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(long4);\n"
30627"uchar4 __ovld __cnfn convert_uchar4_rtn(long4);\n"
30628"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(long4);\n"
30629"uchar4 __ovld __cnfn convert_uchar4(long4);\n"
30630"uchar4 __ovld __cnfn convert_uchar4_sat(long4);\n"
30631"uchar4 __ovld __cnfn convert_uchar4_rte(ulong4);\n"
30632"uchar4 __ovld __cnfn convert_uchar4_sat_rte(ulong4);\n"
30633"uchar4 __ovld __cnfn convert_uchar4_rtz(ulong4);\n"
30634"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ulong4);\n"
30635"uchar4 __ovld __cnfn convert_uchar4_rtp(ulong4);\n"
30636"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ulong4);\n"
30637"uchar4 __ovld __cnfn convert_uchar4_rtn(ulong4);\n"
30638"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ulong4);\n"
30639"uchar4 __ovld __cnfn convert_uchar4(ulong4);\n"
30640"uchar4 __ovld __cnfn convert_uchar4_sat(ulong4);\n"
30641"uchar4 __ovld __cnfn convert_uchar4_rte(float4);\n"
30642"uchar4 __ovld __cnfn convert_uchar4_sat_rte(float4);\n"
30643"uchar4 __ovld __cnfn convert_uchar4_rtz(float4);\n"
30644"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(float4);\n"
30645"uchar4 __ovld __cnfn convert_uchar4_rtp(float4);\n"
30646"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(float4);\n"
30647"uchar4 __ovld __cnfn convert_uchar4_rtn(float4);\n"
30648"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(float4);\n"
30649"uchar4 __ovld __cnfn convert_uchar4(float4);\n"
30650"uchar4 __ovld __cnfn convert_uchar4_sat(float4);\n"
30651"short4 __ovld __cnfn convert_short4_rte(char4);\n"
30652"short4 __ovld __cnfn convert_short4_sat_rte(char4);\n"
30653"short4 __ovld __cnfn convert_short4_rtz(char4);\n"
30654"short4 __ovld __cnfn convert_short4_sat_rtz(char4);\n"
30655"short4 __ovld __cnfn convert_short4_rtp(char4);\n"
30656"short4 __ovld __cnfn convert_short4_sat_rtp(char4);\n"
30657"short4 __ovld __cnfn convert_short4_rtn(char4);\n"
30658"short4 __ovld __cnfn convert_short4_sat_rtn(char4);\n"
30659"short4 __ovld __cnfn convert_short4(char4);\n"
30660"short4 __ovld __cnfn convert_short4_sat(char4);\n"
30661"short4 __ovld __cnfn convert_short4_rte(uchar4);\n"
30662"short4 __ovld __cnfn convert_short4_sat_rte(uchar4);\n"
30663"short4 __ovld __cnfn convert_short4_rtz(uchar4);\n"
30664"short4 __ovld __cnfn convert_short4_sat_rtz(uchar4);\n"
30665"short4 __ovld __cnfn convert_short4_rtp(uchar4);\n"
30666"short4 __ovld __cnfn convert_short4_sat_rtp(uchar4);\n"
30667"short4 __ovld __cnfn convert_short4_rtn(uchar4);\n"
30668"short4 __ovld __cnfn convert_short4_sat_rtn(uchar4);\n"
30669"short4 __ovld __cnfn convert_short4(uchar4);\n"
30670"short4 __ovld __cnfn convert_short4_sat(uchar4);\n"
30671"short4 __ovld __cnfn convert_short4_rte(short4);\n"
30672"short4 __ovld __cnfn convert_short4_sat_rte(short4);\n"
30673"short4 __ovld __cnfn convert_short4_rtz(short4);\n"
30674"short4 __ovld __cnfn convert_short4_sat_rtz(short4);\n"
30675"short4 __ovld __cnfn convert_short4_rtp(short4);\n"
30676"short4 __ovld __cnfn convert_short4_sat_rtp(short4);\n"
30677"short4 __ovld __cnfn convert_short4_rtn(short4);\n"
30678"short4 __ovld __cnfn convert_short4_sat_rtn(short4);\n"
30679"short4 __ovld __cnfn convert_short4(short4);\n"
30680"short4 __ovld __cnfn convert_short4_sat(short4);\n"
30681"short4 __ovld __cnfn convert_short4_rte(ushort4);\n"
30682"short4 __ovld __cnfn convert_short4_sat_rte(ushort4);\n"
30683"short4 __ovld __cnfn convert_short4_rtz(ushort4);\n"
30684"short4 __ovld __cnfn convert_short4_sat_rtz(ushort4);\n"
30685"short4 __ovld __cnfn convert_short4_rtp(ushort4);\n"
30686"short4 __ovld __cnfn convert_short4_sat_rtp(ushort4);\n"
30687"short4 __ovld __cnfn convert_short4_rtn(ushort4);\n"
30688"short4 __ovld __cnfn convert_short4_sat_rtn(ushort4);\n"
30689"short4 __ovld __cnfn convert_short4(ushort4);\n"
30690"short4 __ovld __cnfn convert_short4_sat(ushort4);\n"
30691"short4 __ovld __cnfn convert_short4_rte(int4);\n"
30692"short4 __ovld __cnfn convert_short4_sat_rte(int4);\n"
30693"short4 __ovld __cnfn convert_short4_rtz(int4);\n"
30694"short4 __ovld __cnfn convert_short4_sat_rtz(int4);\n"
30695"short4 __ovld __cnfn convert_short4_rtp(int4);\n"
30696"short4 __ovld __cnfn convert_short4_sat_rtp(int4);\n"
30697"short4 __ovld __cnfn convert_short4_rtn(int4);\n"
30698"short4 __ovld __cnfn convert_short4_sat_rtn(int4);\n"
30699"short4 __ovld __cnfn convert_short4(int4);\n"
30700"short4 __ovld __cnfn convert_short4_sat(int4);\n"
30701"short4 __ovld __cnfn convert_short4_rte(uint4);\n"
30702"short4 __ovld __cnfn convert_short4_sat_rte(uint4);\n"
30703"short4 __ovld __cnfn convert_short4_rtz(uint4);\n"
30704"short4 __ovld __cnfn convert_short4_sat_rtz(uint4);\n"
30705"short4 __ovld __cnfn convert_short4_rtp(uint4);\n"
30706"short4 __ovld __cnfn convert_short4_sat_rtp(uint4);\n"
30707"short4 __ovld __cnfn convert_short4_rtn(uint4);\n"
30708"short4 __ovld __cnfn convert_short4_sat_rtn(uint4);\n"
30709"short4 __ovld __cnfn convert_short4(uint4);\n"
30710"short4 __ovld __cnfn convert_short4_sat(uint4);\n"
30711"short4 __ovld __cnfn convert_short4_rte(long4);\n"
30712"short4 __ovld __cnfn convert_short4_sat_rte(long4);\n"
30713"short4 __ovld __cnfn convert_short4_rtz(long4);\n"
30714"short4 __ovld __cnfn convert_short4_sat_rtz(long4);\n"
30715"short4 __ovld __cnfn convert_short4_rtp(long4);\n"
30716"short4 __ovld __cnfn convert_short4_sat_rtp(long4);\n"
30717"short4 __ovld __cnfn convert_short4_rtn(long4);\n"
30718"short4 __ovld __cnfn convert_short4_sat_rtn(long4);\n"
30719"short4 __ovld __cnfn convert_short4(long4);\n"
30720"short4 __ovld __cnfn convert_short4_sat(long4);\n"
30721"short4 __ovld __cnfn convert_short4_rte(ulong4);\n"
30722"short4 __ovld __cnfn convert_short4_sat_rte(ulong4);\n"
30723"short4 __ovld __cnfn convert_short4_rtz(ulong4);\n"
30724"short4 __ovld __cnfn convert_short4_sat_rtz(ulong4);\n"
30725"short4 __ovld __cnfn convert_short4_rtp(ulong4);\n"
30726"short4 __ovld __cnfn convert_short4_sat_rtp(ulong4);\n"
30727"short4 __ovld __cnfn convert_short4_rtn(ulong4);\n"
30728"short4 __ovld __cnfn convert_short4_sat_rtn(ulong4);\n"
30729"short4 __ovld __cnfn convert_short4(ulong4);\n"
30730"short4 __ovld __cnfn convert_short4_sat(ulong4);\n"
30731"short4 __ovld __cnfn convert_short4_rte(float4);\n"
30732"short4 __ovld __cnfn convert_short4_sat_rte(float4);\n"
30733"short4 __ovld __cnfn convert_short4_rtz(float4);\n"
30734"short4 __ovld __cnfn convert_short4_sat_rtz(float4);\n"
30735"short4 __ovld __cnfn convert_short4_rtp(float4);\n"
30736"short4 __ovld __cnfn convert_short4_sat_rtp(float4);\n"
30737"short4 __ovld __cnfn convert_short4_rtn(float4);\n"
30738"short4 __ovld __cnfn convert_short4_sat_rtn(float4);\n"
30739"short4 __ovld __cnfn convert_short4(float4);\n"
30740"short4 __ovld __cnfn convert_short4_sat(float4);\n"
30741"ushort4 __ovld __cnfn convert_ushort4_rte(char4);\n"
30742"ushort4 __ovld __cnfn convert_ushort4_sat_rte(char4);\n"
30743"ushort4 __ovld __cnfn convert_ushort4_rtz(char4);\n"
30744"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(char4);\n"
30745"ushort4 __ovld __cnfn convert_ushort4_rtp(char4);\n"
30746"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(char4);\n"
30747"ushort4 __ovld __cnfn convert_ushort4_rtn(char4);\n"
30748"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(char4);\n"
30749"ushort4 __ovld __cnfn convert_ushort4(char4);\n"
30750"ushort4 __ovld __cnfn convert_ushort4_sat(char4);\n"
30751"ushort4 __ovld __cnfn convert_ushort4_rte(uchar4);\n"
30752"ushort4 __ovld __cnfn convert_ushort4_sat_rte(uchar4);\n"
30753"ushort4 __ovld __cnfn convert_ushort4_rtz(uchar4);\n"
30754"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uchar4);\n"
30755"ushort4 __ovld __cnfn convert_ushort4_rtp(uchar4);\n"
30756"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uchar4);\n"
30757"ushort4 __ovld __cnfn convert_ushort4_rtn(uchar4);\n"
30758"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uchar4);\n"
30759"ushort4 __ovld __cnfn convert_ushort4(uchar4);\n"
30760"ushort4 __ovld __cnfn convert_ushort4_sat(uchar4);\n"
30761"ushort4 __ovld __cnfn convert_ushort4_rte(short4);\n"
30762"ushort4 __ovld __cnfn convert_ushort4_sat_rte(short4);\n"
30763"ushort4 __ovld __cnfn convert_ushort4_rtz(short4);\n"
30764"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(short4);\n"
30765"ushort4 __ovld __cnfn convert_ushort4_rtp(short4);\n"
30766"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(short4);\n"
30767"ushort4 __ovld __cnfn convert_ushort4_rtn(short4);\n"
30768"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(short4);\n"
30769"ushort4 __ovld __cnfn convert_ushort4(short4);\n"
30770"ushort4 __ovld __cnfn convert_ushort4_sat(short4);\n"
30771"ushort4 __ovld __cnfn convert_ushort4_rte(ushort4);\n"
30772"ushort4 __ovld __cnfn convert_ushort4_sat_rte(ushort4);\n"
30773"ushort4 __ovld __cnfn convert_ushort4_rtz(ushort4);\n"
30774"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ushort4);\n"
30775"ushort4 __ovld __cnfn convert_ushort4_rtp(ushort4);\n"
30776"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ushort4);\n"
30777"ushort4 __ovld __cnfn convert_ushort4_rtn(ushort4);\n"
30778"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ushort4);\n"
30779"ushort4 __ovld __cnfn convert_ushort4(ushort4);\n"
30780"ushort4 __ovld __cnfn convert_ushort4_sat(ushort4);\n"
30781"ushort4 __ovld __cnfn convert_ushort4_rte(int4);\n"
30782"ushort4 __ovld __cnfn convert_ushort4_sat_rte(int4);\n"
30783"ushort4 __ovld __cnfn convert_ushort4_rtz(int4);\n"
30784"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(int4);\n"
30785"ushort4 __ovld __cnfn convert_ushort4_rtp(int4);\n"
30786"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(int4);\n"
30787"ushort4 __ovld __cnfn convert_ushort4_rtn(int4);\n"
30788"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(int4);\n"
30789"ushort4 __ovld __cnfn convert_ushort4(int4);\n"
30790"ushort4 __ovld __cnfn convert_ushort4_sat(int4);\n"
30791"ushort4 __ovld __cnfn convert_ushort4_rte(uint4);\n"
30792"ushort4 __ovld __cnfn convert_ushort4_sat_rte(uint4);\n"
30793"ushort4 __ovld __cnfn convert_ushort4_rtz(uint4);\n"
30794"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uint4);\n"
30795"ushort4 __ovld __cnfn convert_ushort4_rtp(uint4);\n"
30796"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uint4);\n"
30797"ushort4 __ovld __cnfn convert_ushort4_rtn(uint4);\n"
30798"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uint4);\n"
30799"ushort4 __ovld __cnfn convert_ushort4(uint4);\n"
30800"ushort4 __ovld __cnfn convert_ushort4_sat(uint4);\n"
30801"ushort4 __ovld __cnfn convert_ushort4_rte(long4);\n"
30802"ushort4 __ovld __cnfn convert_ushort4_sat_rte(long4);\n"
30803"ushort4 __ovld __cnfn convert_ushort4_rtz(long4);\n"
30804"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(long4);\n"
30805"ushort4 __ovld __cnfn convert_ushort4_rtp(long4);\n"
30806"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(long4);\n"
30807"ushort4 __ovld __cnfn convert_ushort4_rtn(long4);\n"
30808"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(long4);\n"
30809"ushort4 __ovld __cnfn convert_ushort4(long4);\n"
30810"ushort4 __ovld __cnfn convert_ushort4_sat(long4);\n"
30811"ushort4 __ovld __cnfn convert_ushort4_rte(ulong4);\n"
30812"ushort4 __ovld __cnfn convert_ushort4_sat_rte(ulong4);\n"
30813"ushort4 __ovld __cnfn convert_ushort4_rtz(ulong4);\n"
30814"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ulong4);\n"
30815"ushort4 __ovld __cnfn convert_ushort4_rtp(ulong4);\n"
30816"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ulong4);\n"
30817"ushort4 __ovld __cnfn convert_ushort4_rtn(ulong4);\n"
30818"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ulong4);\n"
30819"ushort4 __ovld __cnfn convert_ushort4(ulong4);\n"
30820"ushort4 __ovld __cnfn convert_ushort4_sat(ulong4);\n"
30821"ushort4 __ovld __cnfn convert_ushort4_rte(float4);\n"
30822"ushort4 __ovld __cnfn convert_ushort4_sat_rte(float4);\n"
30823"ushort4 __ovld __cnfn convert_ushort4_rtz(float4);\n"
30824"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(float4);\n"
30825"ushort4 __ovld __cnfn convert_ushort4_rtp(float4);\n"
30826"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(float4);\n"
30827"ushort4 __ovld __cnfn convert_ushort4_rtn(float4);\n"
30828"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(float4);\n"
30829"ushort4 __ovld __cnfn convert_ushort4(float4);\n"
30830"ushort4 __ovld __cnfn convert_ushort4_sat(float4);\n"
30831"int4 __ovld __cnfn convert_int4_rte(char4);\n"
30832"int4 __ovld __cnfn convert_int4_sat_rte(char4);\n"
30833"int4 __ovld __cnfn convert_int4_rtz(char4);\n"
30834"int4 __ovld __cnfn convert_int4_sat_rtz(char4);\n"
30835"int4 __ovld __cnfn convert_int4_rtp(char4);\n"
30836"int4 __ovld __cnfn convert_int4_sat_rtp(char4);\n"
30837"int4 __ovld __cnfn convert_int4_rtn(char4);\n"
30838"int4 __ovld __cnfn convert_int4_sat_rtn(char4);\n"
30839"int4 __ovld __cnfn convert_int4(char4);\n"
30840"int4 __ovld __cnfn convert_int4_sat(char4);\n"
30841"int4 __ovld __cnfn convert_int4_rte(uchar4);\n"
30842"int4 __ovld __cnfn convert_int4_sat_rte(uchar4);\n"
30843"int4 __ovld __cnfn convert_int4_rtz(uchar4);\n"
30844"int4 __ovld __cnfn convert_int4_sat_rtz(uchar4);\n"
30845"int4 __ovld __cnfn convert_int4_rtp(uchar4);\n"
30846"int4 __ovld __cnfn convert_int4_sat_rtp(uchar4);\n"
30847"int4 __ovld __cnfn convert_int4_rtn(uchar4);\n"
30848"int4 __ovld __cnfn convert_int4_sat_rtn(uchar4);\n"
30849"int4 __ovld __cnfn convert_int4(uchar4);\n"
30850"int4 __ovld __cnfn convert_int4_sat(uchar4);\n"
30851"int4 __ovld __cnfn convert_int4_rte(short4);\n"
30852"int4 __ovld __cnfn convert_int4_sat_rte(short4);\n"
30853"int4 __ovld __cnfn convert_int4_rtz(short4);\n"
30854"int4 __ovld __cnfn convert_int4_sat_rtz(short4);\n"
30855"int4 __ovld __cnfn convert_int4_rtp(short4);\n"
30856"int4 __ovld __cnfn convert_int4_sat_rtp(short4);\n"
30857"int4 __ovld __cnfn convert_int4_rtn(short4);\n"
30858"int4 __ovld __cnfn convert_int4_sat_rtn(short4);\n"
30859"int4 __ovld __cnfn convert_int4(short4);\n"
30860"int4 __ovld __cnfn convert_int4_sat(short4);\n"
30861"int4 __ovld __cnfn convert_int4_rte(ushort4);\n"
30862"int4 __ovld __cnfn convert_int4_sat_rte(ushort4);\n"
30863"int4 __ovld __cnfn convert_int4_rtz(ushort4);\n"
30864"int4 __ovld __cnfn convert_int4_sat_rtz(ushort4);\n"
30865"int4 __ovld __cnfn convert_int4_rtp(ushort4);\n"
30866"int4 __ovld __cnfn convert_int4_sat_rtp(ushort4);\n"
30867"int4 __ovld __cnfn convert_int4_rtn(ushort4);\n"
30868"int4 __ovld __cnfn convert_int4_sat_rtn(ushort4);\n"
30869"int4 __ovld __cnfn convert_int4(ushort4);\n"
30870"int4 __ovld __cnfn convert_int4_sat(ushort4);\n"
30871"int4 __ovld __cnfn convert_int4_rte(int4);\n"
30872"int4 __ovld __cnfn convert_int4_sat_rte(int4);\n"
30873"int4 __ovld __cnfn convert_int4_rtz(int4);\n"
30874"int4 __ovld __cnfn convert_int4_sat_rtz(int4);\n"
30875"int4 __ovld __cnfn convert_int4_rtp(int4);\n"
30876"int4 __ovld __cnfn convert_int4_sat_rtp(int4);\n"
30877"int4 __ovld __cnfn convert_int4_rtn(int4);\n"
30878"int4 __ovld __cnfn convert_int4_sat_rtn(int4);\n"
30879"int4 __ovld __cnfn convert_int4(int4);\n"
30880"int4 __ovld __cnfn convert_int4_sat(int4);\n"
30881"int4 __ovld __cnfn convert_int4_rte(uint4);\n"
30882"int4 __ovld __cnfn convert_int4_sat_rte(uint4);\n"
30883"int4 __ovld __cnfn convert_int4_rtz(uint4);\n"
30884"int4 __ovld __cnfn convert_int4_sat_rtz(uint4);\n"
30885"int4 __ovld __cnfn convert_int4_rtp(uint4);\n"
30886"int4 __ovld __cnfn convert_int4_sat_rtp(uint4);\n"
30887"int4 __ovld __cnfn convert_int4_rtn(uint4);\n"
30888"int4 __ovld __cnfn convert_int4_sat_rtn(uint4);\n"
30889"int4 __ovld __cnfn convert_int4(uint4);\n"
30890"int4 __ovld __cnfn convert_int4_sat(uint4);\n"
30891"int4 __ovld __cnfn convert_int4_rte(long4);\n"
30892"int4 __ovld __cnfn convert_int4_sat_rte(long4);\n"
30893"int4 __ovld __cnfn convert_int4_rtz(long4);\n"
30894"int4 __ovld __cnfn convert_int4_sat_rtz(long4);\n"
30895"int4 __ovld __cnfn convert_int4_rtp(long4);\n"
30896"int4 __ovld __cnfn convert_int4_sat_rtp(long4);\n"
30897"int4 __ovld __cnfn convert_int4_rtn(long4);\n"
30898"int4 __ovld __cnfn convert_int4_sat_rtn(long4);\n"
30899"int4 __ovld __cnfn convert_int4(long4);\n"
30900"int4 __ovld __cnfn convert_int4_sat(long4);\n"
30901"int4 __ovld __cnfn convert_int4_rte(ulong4);\n"
30902"int4 __ovld __cnfn convert_int4_sat_rte(ulong4);\n"
30903"int4 __ovld __cnfn convert_int4_rtz(ulong4);\n"
30904"int4 __ovld __cnfn convert_int4_sat_rtz(ulong4);\n"
30905"int4 __ovld __cnfn convert_int4_rtp(ulong4);\n"
30906"int4 __ovld __cnfn convert_int4_sat_rtp(ulong4);\n"
30907"int4 __ovld __cnfn convert_int4_rtn(ulong4);\n"
30908"int4 __ovld __cnfn convert_int4_sat_rtn(ulong4);\n"
30909"int4 __ovld __cnfn convert_int4(ulong4);\n"
30910"int4 __ovld __cnfn convert_int4_sat(ulong4);\n"
30911"int4 __ovld __cnfn convert_int4_rte(float4);\n"
30912"int4 __ovld __cnfn convert_int4_sat_rte(float4);\n"
30913"int4 __ovld __cnfn convert_int4_rtz(float4);\n"
30914"int4 __ovld __cnfn convert_int4_sat_rtz(float4);\n"
30915"int4 __ovld __cnfn convert_int4_rtp(float4);\n"
30916"int4 __ovld __cnfn convert_int4_sat_rtp(float4);\n"
30917"int4 __ovld __cnfn convert_int4_rtn(float4);\n"
30918"int4 __ovld __cnfn convert_int4_sat_rtn(float4);\n"
30919"int4 __ovld __cnfn convert_int4(float4);\n"
30920"int4 __ovld __cnfn convert_int4_sat(float4);\n"
30921"uint4 __ovld __cnfn convert_uint4_rte(char4);\n"
30922"uint4 __ovld __cnfn convert_uint4_sat_rte(char4);\n"
30923"uint4 __ovld __cnfn convert_uint4_rtz(char4);\n"
30924"uint4 __ovld __cnfn convert_uint4_sat_rtz(char4);\n"
30925"uint4 __ovld __cnfn convert_uint4_rtp(char4);\n"
30926"uint4 __ovld __cnfn convert_uint4_sat_rtp(char4);\n"
30927"uint4 __ovld __cnfn convert_uint4_rtn(char4);\n"
30928"uint4 __ovld __cnfn convert_uint4_sat_rtn(char4);\n"
30929"uint4 __ovld __cnfn convert_uint4(char4);\n"
30930"uint4 __ovld __cnfn convert_uint4_sat(char4);\n"
30931"uint4 __ovld __cnfn convert_uint4_rte(uchar4);\n"
30932"uint4 __ovld __cnfn convert_uint4_sat_rte(uchar4);\n"
30933"uint4 __ovld __cnfn convert_uint4_rtz(uchar4);\n"
30934"uint4 __ovld __cnfn convert_uint4_sat_rtz(uchar4);\n"
30935"uint4 __ovld __cnfn convert_uint4_rtp(uchar4);\n"
30936"uint4 __ovld __cnfn convert_uint4_sat_rtp(uchar4);\n"
30937"uint4 __ovld __cnfn convert_uint4_rtn(uchar4);\n"
30938"uint4 __ovld __cnfn convert_uint4_sat_rtn(uchar4);\n"
30939"uint4 __ovld __cnfn convert_uint4(uchar4);\n"
30940"uint4 __ovld __cnfn convert_uint4_sat(uchar4);\n"
30941"uint4 __ovld __cnfn convert_uint4_rte(short4);\n"
30942"uint4 __ovld __cnfn convert_uint4_sat_rte(short4);\n"
30943"uint4 __ovld __cnfn convert_uint4_rtz(short4);\n"
30944"uint4 __ovld __cnfn convert_uint4_sat_rtz(short4);\n"
30945"uint4 __ovld __cnfn convert_uint4_rtp(short4);\n"
30946"uint4 __ovld __cnfn convert_uint4_sat_rtp(short4);\n"
30947"uint4 __ovld __cnfn convert_uint4_rtn(short4);\n"
30948"uint4 __ovld __cnfn convert_uint4_sat_rtn(short4);\n"
30949"uint4 __ovld __cnfn convert_uint4(short4);\n"
30950"uint4 __ovld __cnfn convert_uint4_sat(short4);\n"
30951"uint4 __ovld __cnfn convert_uint4_rte(ushort4);\n"
30952"uint4 __ovld __cnfn convert_uint4_sat_rte(ushort4);\n"
30953"uint4 __ovld __cnfn convert_uint4_rtz(ushort4);\n"
30954"uint4 __ovld __cnfn convert_uint4_sat_rtz(ushort4);\n"
30955"uint4 __ovld __cnfn convert_uint4_rtp(ushort4);\n"
30956"uint4 __ovld __cnfn convert_uint4_sat_rtp(ushort4);\n"
30957"uint4 __ovld __cnfn convert_uint4_rtn(ushort4);\n"
30958"uint4 __ovld __cnfn convert_uint4_sat_rtn(ushort4);\n"
30959"uint4 __ovld __cnfn convert_uint4(ushort4);\n"
30960"uint4 __ovld __cnfn convert_uint4_sat(ushort4);\n"
30961"uint4 __ovld __cnfn convert_uint4_rte(int4);\n"
30962"uint4 __ovld __cnfn convert_uint4_sat_rte(int4);\n"
30963"uint4 __ovld __cnfn convert_uint4_rtz(int4);\n"
30964"uint4 __ovld __cnfn convert_uint4_sat_rtz(int4);\n"
30965"uint4 __ovld __cnfn convert_uint4_rtp(int4);\n"
30966"uint4 __ovld __cnfn convert_uint4_sat_rtp(int4);\n"
30967"uint4 __ovld __cnfn convert_uint4_rtn(int4);\n"
30968"uint4 __ovld __cnfn convert_uint4_sat_rtn(int4);\n"
30969"uint4 __ovld __cnfn convert_uint4(int4);\n"
30970"uint4 __ovld __cnfn convert_uint4_sat(int4);\n"
30971"uint4 __ovld __cnfn convert_uint4_rte(uint4);\n"
30972"uint4 __ovld __cnfn convert_uint4_sat_rte(uint4);\n"
30973"uint4 __ovld __cnfn convert_uint4_rtz(uint4);\n"
30974"uint4 __ovld __cnfn convert_uint4_sat_rtz(uint4);\n"
30975"uint4 __ovld __cnfn convert_uint4_rtp(uint4);\n"
30976"uint4 __ovld __cnfn convert_uint4_sat_rtp(uint4);\n"
30977"uint4 __ovld __cnfn convert_uint4_rtn(uint4);\n"
30978"uint4 __ovld __cnfn convert_uint4_sat_rtn(uint4);\n"
30979"uint4 __ovld __cnfn convert_uint4(uint4);\n"
30980"uint4 __ovld __cnfn convert_uint4_sat(uint4);\n"
30981"uint4 __ovld __cnfn convert_uint4_rte(long4);\n"
30982"uint4 __ovld __cnfn convert_uint4_sat_rte(long4);\n"
30983"uint4 __ovld __cnfn convert_uint4_rtz(long4);\n"
30984"uint4 __ovld __cnfn convert_uint4_sat_rtz(long4);\n"
30985"uint4 __ovld __cnfn convert_uint4_rtp(long4);\n"
30986"uint4 __ovld __cnfn convert_uint4_sat_rtp(long4);\n"
30987"uint4 __ovld __cnfn convert_uint4_rtn(long4);\n"
30988"uint4 __ovld __cnfn convert_uint4_sat_rtn(long4);\n"
30989"uint4 __ovld __cnfn convert_uint4(long4);\n"
30990"uint4 __ovld __cnfn convert_uint4_sat(long4);\n"
30991"uint4 __ovld __cnfn convert_uint4_rte(ulong4);\n"
30992"uint4 __ovld __cnfn convert_uint4_sat_rte(ulong4);\n"
30993"uint4 __ovld __cnfn convert_uint4_rtz(ulong4);\n"
30994"uint4 __ovld __cnfn convert_uint4_sat_rtz(ulong4);\n"
30995"uint4 __ovld __cnfn convert_uint4_rtp(ulong4);\n"
30996"uint4 __ovld __cnfn convert_uint4_sat_rtp(ulong4);\n"
30997"uint4 __ovld __cnfn convert_uint4_rtn(ulong4);\n"
30998"uint4 __ovld __cnfn convert_uint4_sat_rtn(ulong4);\n"
30999"uint4 __ovld __cnfn convert_uint4(ulong4);\n"
31000"uint4 __ovld __cnfn convert_uint4_sat(ulong4);\n"
31001"uint4 __ovld __cnfn convert_uint4_rte(float4);\n"
31002"uint4 __ovld __cnfn convert_uint4_sat_rte(float4);\n"
31003"uint4 __ovld __cnfn convert_uint4_rtz(float4);\n"
31004"uint4 __ovld __cnfn convert_uint4_sat_rtz(float4);\n"
31005"uint4 __ovld __cnfn convert_uint4_rtp(float4);\n"
31006"uint4 __ovld __cnfn convert_uint4_sat_rtp(float4);\n"
31007"uint4 __ovld __cnfn convert_uint4_rtn(float4);\n"
31008"uint4 __ovld __cnfn convert_uint4_sat_rtn(float4);\n"
31009"uint4 __ovld __cnfn convert_uint4(float4);\n"
31010"uint4 __ovld __cnfn convert_uint4_sat(float4);\n"
31011"long4 __ovld __cnfn convert_long4_rte(char4);\n"
31012"long4 __ovld __cnfn convert_long4_sat_rte(char4);\n"
31013"long4 __ovld __cnfn convert_long4_rtz(char4);\n"
31014"long4 __ovld __cnfn convert_long4_sat_rtz(char4);\n"
31015"long4 __ovld __cnfn convert_long4_rtp(char4);\n"
31016"long4 __ovld __cnfn convert_long4_sat_rtp(char4);\n"
31017"long4 __ovld __cnfn convert_long4_rtn(char4);\n"
31018"long4 __ovld __cnfn convert_long4_sat_rtn(char4);\n"
31019"long4 __ovld __cnfn convert_long4(char4);\n"
31020"long4 __ovld __cnfn convert_long4_sat(char4);\n"
31021"long4 __ovld __cnfn convert_long4_rte(uchar4);\n"
31022"long4 __ovld __cnfn convert_long4_sat_rte(uchar4);\n"
31023"long4 __ovld __cnfn convert_long4_rtz(uchar4);\n"
31024"long4 __ovld __cnfn convert_long4_sat_rtz(uchar4);\n"
31025"long4 __ovld __cnfn convert_long4_rtp(uchar4);\n"
31026"long4 __ovld __cnfn convert_long4_sat_rtp(uchar4);\n"
31027"long4 __ovld __cnfn convert_long4_rtn(uchar4);\n"
31028"long4 __ovld __cnfn convert_long4_sat_rtn(uchar4);\n"
31029"long4 __ovld __cnfn convert_long4(uchar4);\n"
31030"long4 __ovld __cnfn convert_long4_sat(uchar4);\n"
31031"long4 __ovld __cnfn convert_long4_rte(short4);\n"
31032"long4 __ovld __cnfn convert_long4_sat_rte(short4);\n"
31033"long4 __ovld __cnfn convert_long4_rtz(short4);\n"
31034"long4 __ovld __cnfn convert_long4_sat_rtz(short4);\n"
31035"long4 __ovld __cnfn convert_long4_rtp(short4);\n"
31036"long4 __ovld __cnfn convert_long4_sat_rtp(short4);\n"
31037"long4 __ovld __cnfn convert_long4_rtn(short4);\n"
31038"long4 __ovld __cnfn convert_long4_sat_rtn(short4);\n"
31039"long4 __ovld __cnfn convert_long4(short4);\n"
31040"long4 __ovld __cnfn convert_long4_sat(short4);\n"
31041"long4 __ovld __cnfn convert_long4_rte(ushort4);\n"
31042"long4 __ovld __cnfn convert_long4_sat_rte(ushort4);\n"
31043"long4 __ovld __cnfn convert_long4_rtz(ushort4);\n"
31044"long4 __ovld __cnfn convert_long4_sat_rtz(ushort4);\n"
31045"long4 __ovld __cnfn convert_long4_rtp(ushort4);\n"
31046"long4 __ovld __cnfn convert_long4_sat_rtp(ushort4);\n"
31047"long4 __ovld __cnfn convert_long4_rtn(ushort4);\n"
31048"long4 __ovld __cnfn convert_long4_sat_rtn(ushort4);\n"
31049"long4 __ovld __cnfn convert_long4(ushort4);\n"
31050"long4 __ovld __cnfn convert_long4_sat(ushort4);\n"
31051"long4 __ovld __cnfn convert_long4_rte(int4);\n"
31052"long4 __ovld __cnfn convert_long4_sat_rte(int4);\n"
31053"long4 __ovld __cnfn convert_long4_rtz(int4);\n"
31054"long4 __ovld __cnfn convert_long4_sat_rtz(int4);\n"
31055"long4 __ovld __cnfn convert_long4_rtp(int4);\n"
31056"long4 __ovld __cnfn convert_long4_sat_rtp(int4);\n"
31057"long4 __ovld __cnfn convert_long4_rtn(int4);\n"
31058"long4 __ovld __cnfn convert_long4_sat_rtn(int4);\n"
31059"long4 __ovld __cnfn convert_long4(int4);\n"
31060"long4 __ovld __cnfn convert_long4_sat(int4);\n"
31061"long4 __ovld __cnfn convert_long4_rte(uint4);\n"
31062"long4 __ovld __cnfn convert_long4_sat_rte(uint4);\n"
31063"long4 __ovld __cnfn convert_long4_rtz(uint4);\n"
31064"long4 __ovld __cnfn convert_long4_sat_rtz(uint4);\n"
31065"long4 __ovld __cnfn convert_long4_rtp(uint4);\n"
31066"long4 __ovld __cnfn convert_long4_sat_rtp(uint4);\n"
31067"long4 __ovld __cnfn convert_long4_rtn(uint4);\n"
31068"long4 __ovld __cnfn convert_long4_sat_rtn(uint4);\n"
31069"long4 __ovld __cnfn convert_long4(uint4);\n"
31070"long4 __ovld __cnfn convert_long4_sat(uint4);\n"
31071"long4 __ovld __cnfn convert_long4_rte(long4);\n"
31072"long4 __ovld __cnfn convert_long4_sat_rte(long4);\n"
31073"long4 __ovld __cnfn convert_long4_rtz(long4);\n"
31074"long4 __ovld __cnfn convert_long4_sat_rtz(long4);\n"
31075"long4 __ovld __cnfn convert_long4_rtp(long4);\n"
31076"long4 __ovld __cnfn convert_long4_sat_rtp(long4);\n"
31077"long4 __ovld __cnfn convert_long4_rtn(long4);\n"
31078"long4 __ovld __cnfn convert_long4_sat_rtn(long4);\n"
31079"long4 __ovld __cnfn convert_long4(long4);\n"
31080"long4 __ovld __cnfn convert_long4_sat(long4);\n"
31081"long4 __ovld __cnfn convert_long4_rte(ulong4);\n"
31082"long4 __ovld __cnfn convert_long4_sat_rte(ulong4);\n"
31083"long4 __ovld __cnfn convert_long4_rtz(ulong4);\n"
31084"long4 __ovld __cnfn convert_long4_sat_rtz(ulong4);\n"
31085"long4 __ovld __cnfn convert_long4_rtp(ulong4);\n"
31086"long4 __ovld __cnfn convert_long4_sat_rtp(ulong4);\n"
31087"long4 __ovld __cnfn convert_long4_rtn(ulong4);\n"
31088"long4 __ovld __cnfn convert_long4_sat_rtn(ulong4);\n"
31089"long4 __ovld __cnfn convert_long4(ulong4);\n"
31090"long4 __ovld __cnfn convert_long4_sat(ulong4);\n"
31091"long4 __ovld __cnfn convert_long4_rte(float4);\n"
31092"long4 __ovld __cnfn convert_long4_sat_rte(float4);\n"
31093"long4 __ovld __cnfn convert_long4_rtz(float4);\n"
31094"long4 __ovld __cnfn convert_long4_sat_rtz(float4);\n"
31095"long4 __ovld __cnfn convert_long4_rtp(float4);\n"
31096"long4 __ovld __cnfn convert_long4_sat_rtp(float4);\n"
31097"long4 __ovld __cnfn convert_long4_rtn(float4);\n"
31098"long4 __ovld __cnfn convert_long4_sat_rtn(float4);\n"
31099"long4 __ovld __cnfn convert_long4(float4);\n"
31100"long4 __ovld __cnfn convert_long4_sat(float4);\n"
31101"ulong4 __ovld __cnfn convert_ulong4_rte(char4);\n"
31102"ulong4 __ovld __cnfn convert_ulong4_sat_rte(char4);\n"
31103"ulong4 __ovld __cnfn convert_ulong4_rtz(char4);\n"
31104"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(char4);\n"
31105"ulong4 __ovld __cnfn convert_ulong4_rtp(char4);\n"
31106"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(char4);\n"
31107"ulong4 __ovld __cnfn convert_ulong4_rtn(char4);\n"
31108"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(char4);\n"
31109"ulong4 __ovld __cnfn convert_ulong4(char4);\n"
31110"ulong4 __ovld __cnfn convert_ulong4_sat(char4);\n"
31111"ulong4 __ovld __cnfn convert_ulong4_rte(uchar4);\n"
31112"ulong4 __ovld __cnfn convert_ulong4_sat_rte(uchar4);\n"
31113"ulong4 __ovld __cnfn convert_ulong4_rtz(uchar4);\n"
31114"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uchar4);\n"
31115"ulong4 __ovld __cnfn convert_ulong4_rtp(uchar4);\n"
31116"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uchar4);\n"
31117"ulong4 __ovld __cnfn convert_ulong4_rtn(uchar4);\n"
31118"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uchar4);\n"
31119"ulong4 __ovld __cnfn convert_ulong4(uchar4);\n"
31120"ulong4 __ovld __cnfn convert_ulong4_sat(uchar4);\n"
31121"ulong4 __ovld __cnfn convert_ulong4_rte(short4);\n"
31122"ulong4 __ovld __cnfn convert_ulong4_sat_rte(short4);\n"
31123"ulong4 __ovld __cnfn convert_ulong4_rtz(short4);\n"
31124"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(short4);\n"
31125"ulong4 __ovld __cnfn convert_ulong4_rtp(short4);\n"
31126"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(short4);\n"
31127"ulong4 __ovld __cnfn convert_ulong4_rtn(short4);\n"
31128"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(short4);\n"
31129"ulong4 __ovld __cnfn convert_ulong4(short4);\n"
31130"ulong4 __ovld __cnfn convert_ulong4_sat(short4);\n"
31131"ulong4 __ovld __cnfn convert_ulong4_rte(ushort4);\n"
31132"ulong4 __ovld __cnfn convert_ulong4_sat_rte(ushort4);\n"
31133"ulong4 __ovld __cnfn convert_ulong4_rtz(ushort4);\n"
31134"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ushort4);\n"
31135"ulong4 __ovld __cnfn convert_ulong4_rtp(ushort4);\n"
31136"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ushort4);\n"
31137"ulong4 __ovld __cnfn convert_ulong4_rtn(ushort4);\n"
31138"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ushort4);\n"
31139"ulong4 __ovld __cnfn convert_ulong4(ushort4);\n"
31140"ulong4 __ovld __cnfn convert_ulong4_sat(ushort4);\n"
31141"ulong4 __ovld __cnfn convert_ulong4_rte(int4);\n"
31142"ulong4 __ovld __cnfn convert_ulong4_sat_rte(int4);\n"
31143"ulong4 __ovld __cnfn convert_ulong4_rtz(int4);\n"
31144"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(int4);\n"
31145"ulong4 __ovld __cnfn convert_ulong4_rtp(int4);\n"
31146"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(int4);\n"
31147"ulong4 __ovld __cnfn convert_ulong4_rtn(int4);\n"
31148"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(int4);\n"
31149"ulong4 __ovld __cnfn convert_ulong4(int4);\n"
31150"ulong4 __ovld __cnfn convert_ulong4_sat(int4);\n"
31151"ulong4 __ovld __cnfn convert_ulong4_rte(uint4);\n"
31152"ulong4 __ovld __cnfn convert_ulong4_sat_rte(uint4);\n"
31153"ulong4 __ovld __cnfn convert_ulong4_rtz(uint4);\n"
31154"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uint4);\n"
31155"ulong4 __ovld __cnfn convert_ulong4_rtp(uint4);\n"
31156"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uint4);\n"
31157"ulong4 __ovld __cnfn convert_ulong4_rtn(uint4);\n"
31158"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uint4);\n"
31159"ulong4 __ovld __cnfn convert_ulong4(uint4);\n"
31160"ulong4 __ovld __cnfn convert_ulong4_sat(uint4);\n"
31161"ulong4 __ovld __cnfn convert_ulong4_rte(long4);\n"
31162"ulong4 __ovld __cnfn convert_ulong4_sat_rte(long4);\n"
31163"ulong4 __ovld __cnfn convert_ulong4_rtz(long4);\n"
31164"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(long4);\n"
31165"ulong4 __ovld __cnfn convert_ulong4_rtp(long4);\n"
31166"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(long4);\n"
31167"ulong4 __ovld __cnfn convert_ulong4_rtn(long4);\n"
31168"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(long4);\n"
31169"ulong4 __ovld __cnfn convert_ulong4(long4);\n"
31170"ulong4 __ovld __cnfn convert_ulong4_sat(long4);\n"
31171"ulong4 __ovld __cnfn convert_ulong4_rte(ulong4);\n"
31172"ulong4 __ovld __cnfn convert_ulong4_sat_rte(ulong4);\n"
31173"ulong4 __ovld __cnfn convert_ulong4_rtz(ulong4);\n"
31174"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ulong4);\n"
31175"ulong4 __ovld __cnfn convert_ulong4_rtp(ulong4);\n"
31176"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ulong4);\n"
31177"ulong4 __ovld __cnfn convert_ulong4_rtn(ulong4);\n"
31178"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ulong4);\n"
31179"ulong4 __ovld __cnfn convert_ulong4(ulong4);\n"
31180"ulong4 __ovld __cnfn convert_ulong4_sat(ulong4);\n"
31181"ulong4 __ovld __cnfn convert_ulong4_rte(float4);\n"
31182"ulong4 __ovld __cnfn convert_ulong4_sat_rte(float4);\n"
31183"ulong4 __ovld __cnfn convert_ulong4_rtz(float4);\n"
31184"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(float4);\n"
31185"ulong4 __ovld __cnfn convert_ulong4_rtp(float4);\n"
31186"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(float4);\n"
31187"ulong4 __ovld __cnfn convert_ulong4_rtn(float4);\n"
31188"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(float4);\n"
31189"ulong4 __ovld __cnfn convert_ulong4(float4);\n"
31190"ulong4 __ovld __cnfn convert_ulong4_sat(float4);\n"
31191"float4 __ovld __cnfn convert_float4_rte(char4);\n"
31192"float4 __ovld __cnfn convert_float4_rtz(char4);\n"
31193"float4 __ovld __cnfn convert_float4_rtp(char4);\n"
31194"float4 __ovld __cnfn convert_float4_rtn(char4);\n"
31195"float4 __ovld __cnfn convert_float4(char4);\n"
31196"float4 __ovld __cnfn convert_float4_rte(uchar4);\n"
31197"float4 __ovld __cnfn convert_float4_rtz(uchar4);\n"
31198"float4 __ovld __cnfn convert_float4_rtp(uchar4);\n"
31199"float4 __ovld __cnfn convert_float4_rtn(uchar4);\n"
31200"float4 __ovld __cnfn convert_float4(uchar4);\n"
31201"float4 __ovld __cnfn convert_float4_rte(short4);\n"
31202"float4 __ovld __cnfn convert_float4_rtz(short4);\n"
31203"float4 __ovld __cnfn convert_float4_rtp(short4);\n"
31204"float4 __ovld __cnfn convert_float4_rtn(short4);\n"
31205"float4 __ovld __cnfn convert_float4(short4);\n"
31206"float4 __ovld __cnfn convert_float4_rte(ushort4);\n"
31207"float4 __ovld __cnfn convert_float4_rtz(ushort4);\n"
31208"float4 __ovld __cnfn convert_float4_rtp(ushort4);\n"
31209"float4 __ovld __cnfn convert_float4_rtn(ushort4);\n"
31210"float4 __ovld __cnfn convert_float4(ushort4);\n"
31211"float4 __ovld __cnfn convert_float4_rte(int4);\n"
31212"float4 __ovld __cnfn convert_float4_rtz(int4);\n"
31213"float4 __ovld __cnfn convert_float4_rtp(int4);\n"
31214"float4 __ovld __cnfn convert_float4_rtn(int4);\n"
31215"float4 __ovld __cnfn convert_float4(int4);\n"
31216"float4 __ovld __cnfn convert_float4_rte(uint4);\n"
31217"float4 __ovld __cnfn convert_float4_rtz(uint4);\n"
31218"float4 __ovld __cnfn convert_float4_rtp(uint4);\n"
31219"float4 __ovld __cnfn convert_float4_rtn(uint4);\n"
31220"float4 __ovld __cnfn convert_float4(uint4);\n"
31221"float4 __ovld __cnfn convert_float4_rte(long4);\n"
31222"float4 __ovld __cnfn convert_float4_rtz(long4);\n"
31223"float4 __ovld __cnfn convert_float4_rtp(long4);\n"
31224"float4 __ovld __cnfn convert_float4_rtn(long4);\n"
31225"float4 __ovld __cnfn convert_float4(long4);\n"
31226"float4 __ovld __cnfn convert_float4_rte(ulong4);\n"
31227"float4 __ovld __cnfn convert_float4_rtz(ulong4);\n"
31228"float4 __ovld __cnfn convert_float4_rtp(ulong4);\n"
31229"float4 __ovld __cnfn convert_float4_rtn(ulong4);\n"
31230"float4 __ovld __cnfn convert_float4(ulong4);\n"
31231"float4 __ovld __cnfn convert_float4_rte(float4);\n"
31232"float4 __ovld __cnfn convert_float4_rtz(float4);\n"
31233"float4 __ovld __cnfn convert_float4_rtp(float4);\n"
31234"float4 __ovld __cnfn convert_float4_rtn(float4);\n"
31235"float4 __ovld __cnfn convert_float4(float4);\n"
31236"char8 __ovld __cnfn convert_char8_rte(char8);\n"
31237"char8 __ovld __cnfn convert_char8_sat_rte(char8);\n"
31238"char8 __ovld __cnfn convert_char8_rtz(char8);\n"
31239"char8 __ovld __cnfn convert_char8_sat_rtz(char8);\n"
31240"char8 __ovld __cnfn convert_char8_rtp(char8);\n"
31241"char8 __ovld __cnfn convert_char8_sat_rtp(char8);\n"
31242"char8 __ovld __cnfn convert_char8_rtn(char8);\n"
31243"char8 __ovld __cnfn convert_char8_sat_rtn(char8);\n"
31244"char8 __ovld __cnfn convert_char8(char8);\n"
31245"char8 __ovld __cnfn convert_char8_sat(char8);\n"
31246"char8 __ovld __cnfn convert_char8_rte(uchar8);\n"
31247"char8 __ovld __cnfn convert_char8_sat_rte(uchar8);\n"
31248"char8 __ovld __cnfn convert_char8_rtz(uchar8);\n"
31249"char8 __ovld __cnfn convert_char8_sat_rtz(uchar8);\n"
31250"char8 __ovld __cnfn convert_char8_rtp(uchar8);\n"
31251"char8 __ovld __cnfn convert_char8_sat_rtp(uchar8);\n"
31252"char8 __ovld __cnfn convert_char8_rtn(uchar8);\n"
31253"char8 __ovld __cnfn convert_char8_sat_rtn(uchar8);\n"
31254"char8 __ovld __cnfn convert_char8(uchar8);\n"
31255"char8 __ovld __cnfn convert_char8_sat(uchar8);\n"
31256"char8 __ovld __cnfn convert_char8_rte(short8);\n"
31257"char8 __ovld __cnfn convert_char8_sat_rte(short8);\n"
31258"char8 __ovld __cnfn convert_char8_rtz(short8);\n"
31259"char8 __ovld __cnfn convert_char8_sat_rtz(short8);\n"
31260"char8 __ovld __cnfn convert_char8_rtp(short8);\n"
31261"char8 __ovld __cnfn convert_char8_sat_rtp(short8);\n"
31262"char8 __ovld __cnfn convert_char8_rtn(short8);\n"
31263"char8 __ovld __cnfn convert_char8_sat_rtn(short8);\n"
31264"char8 __ovld __cnfn convert_char8(short8);\n"
31265"char8 __ovld __cnfn convert_char8_sat(short8);\n"
31266"char8 __ovld __cnfn convert_char8_rte(ushort8);\n"
31267"char8 __ovld __cnfn convert_char8_sat_rte(ushort8);\n"
31268"char8 __ovld __cnfn convert_char8_rtz(ushort8);\n"
31269"char8 __ovld __cnfn convert_char8_sat_rtz(ushort8);\n"
31270"char8 __ovld __cnfn convert_char8_rtp(ushort8);\n"
31271"char8 __ovld __cnfn convert_char8_sat_rtp(ushort8);\n"
31272"char8 __ovld __cnfn convert_char8_rtn(ushort8);\n"
31273"char8 __ovld __cnfn convert_char8_sat_rtn(ushort8);\n"
31274"char8 __ovld __cnfn convert_char8(ushort8);\n"
31275"char8 __ovld __cnfn convert_char8_sat(ushort8);\n"
31276"char8 __ovld __cnfn convert_char8_rte(int8);\n"
31277"char8 __ovld __cnfn convert_char8_sat_rte(int8);\n"
31278"char8 __ovld __cnfn convert_char8_rtz(int8);\n"
31279"char8 __ovld __cnfn convert_char8_sat_rtz(int8);\n"
31280"char8 __ovld __cnfn convert_char8_rtp(int8);\n"
31281"char8 __ovld __cnfn convert_char8_sat_rtp(int8);\n"
31282"char8 __ovld __cnfn convert_char8_rtn(int8);\n"
31283"char8 __ovld __cnfn convert_char8_sat_rtn(int8);\n"
31284"char8 __ovld __cnfn convert_char8(int8);\n"
31285"char8 __ovld __cnfn convert_char8_sat(int8);\n"
31286"char8 __ovld __cnfn convert_char8_rte(uint8);\n"
31287"char8 __ovld __cnfn convert_char8_sat_rte(uint8);\n"
31288"char8 __ovld __cnfn convert_char8_rtz(uint8);\n"
31289"char8 __ovld __cnfn convert_char8_sat_rtz(uint8);\n"
31290"char8 __ovld __cnfn convert_char8_rtp(uint8);\n"
31291"char8 __ovld __cnfn convert_char8_sat_rtp(uint8);\n"
31292"char8 __ovld __cnfn convert_char8_rtn(uint8);\n"
31293"char8 __ovld __cnfn convert_char8_sat_rtn(uint8);\n"
31294"char8 __ovld __cnfn convert_char8(uint8);\n"
31295"char8 __ovld __cnfn convert_char8_sat(uint8);\n"
31296"char8 __ovld __cnfn convert_char8_rte(long8);\n"
31297"char8 __ovld __cnfn convert_char8_sat_rte(long8);\n"
31298"char8 __ovld __cnfn convert_char8_rtz(long8);\n"
31299"char8 __ovld __cnfn convert_char8_sat_rtz(long8);\n"
31300"char8 __ovld __cnfn convert_char8_rtp(long8);\n"
31301"char8 __ovld __cnfn convert_char8_sat_rtp(long8);\n"
31302"char8 __ovld __cnfn convert_char8_rtn(long8);\n"
31303"char8 __ovld __cnfn convert_char8_sat_rtn(long8);\n"
31304"char8 __ovld __cnfn convert_char8(long8);\n"
31305"char8 __ovld __cnfn convert_char8_sat(long8);\n"
31306"char8 __ovld __cnfn convert_char8_rte(ulong8);\n"
31307"char8 __ovld __cnfn convert_char8_sat_rte(ulong8);\n"
31308"char8 __ovld __cnfn convert_char8_rtz(ulong8);\n"
31309"char8 __ovld __cnfn convert_char8_sat_rtz(ulong8);\n"
31310"char8 __ovld __cnfn convert_char8_rtp(ulong8);\n"
31311"char8 __ovld __cnfn convert_char8_sat_rtp(ulong8);\n"
31312"char8 __ovld __cnfn convert_char8_rtn(ulong8);\n"
31313"char8 __ovld __cnfn convert_char8_sat_rtn(ulong8);\n"
31314"char8 __ovld __cnfn convert_char8(ulong8);\n"
31315"char8 __ovld __cnfn convert_char8_sat(ulong8);\n"
31316"char8 __ovld __cnfn convert_char8_rte(float8);\n"
31317"char8 __ovld __cnfn convert_char8_sat_rte(float8);\n"
31318"char8 __ovld __cnfn convert_char8_rtz(float8);\n"
31319"char8 __ovld __cnfn convert_char8_sat_rtz(float8);\n"
31320"char8 __ovld __cnfn convert_char8_rtp(float8);\n"
31321"char8 __ovld __cnfn convert_char8_sat_rtp(float8);\n"
31322"char8 __ovld __cnfn convert_char8_rtn(float8);\n"
31323"char8 __ovld __cnfn convert_char8_sat_rtn(float8);\n"
31324"char8 __ovld __cnfn convert_char8(float8);\n"
31325"char8 __ovld __cnfn convert_char8_sat(float8);\n"
31326"uchar8 __ovld __cnfn convert_uchar8_rte(char8);\n"
31327"uchar8 __ovld __cnfn convert_uchar8_sat_rte(char8);\n"
31328"uchar8 __ovld __cnfn convert_uchar8_rtz(char8);\n"
31329"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(char8);\n"
31330"uchar8 __ovld __cnfn convert_uchar8_rtp(char8);\n"
31331"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(char8);\n"
31332"uchar8 __ovld __cnfn convert_uchar8_rtn(char8);\n"
31333"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(char8);\n"
31334"uchar8 __ovld __cnfn convert_uchar8(char8);\n"
31335"uchar8 __ovld __cnfn convert_uchar8_sat(char8);\n"
31336"uchar8 __ovld __cnfn convert_uchar8_rte(uchar8);\n"
31337"uchar8 __ovld __cnfn convert_uchar8_sat_rte(uchar8);\n"
31338"uchar8 __ovld __cnfn convert_uchar8_rtz(uchar8);\n"
31339"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uchar8);\n"
31340"uchar8 __ovld __cnfn convert_uchar8_rtp(uchar8);\n"
31341"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uchar8);\n"
31342"uchar8 __ovld __cnfn convert_uchar8_rtn(uchar8);\n"
31343"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uchar8);\n"
31344"uchar8 __ovld __cnfn convert_uchar8(uchar8);\n"
31345"uchar8 __ovld __cnfn convert_uchar8_sat(uchar8);\n"
31346"uchar8 __ovld __cnfn convert_uchar8_rte(short8);\n"
31347"uchar8 __ovld __cnfn convert_uchar8_sat_rte(short8);\n"
31348"uchar8 __ovld __cnfn convert_uchar8_rtz(short8);\n"
31349"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(short8);\n"
31350"uchar8 __ovld __cnfn convert_uchar8_rtp(short8);\n"
31351"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(short8);\n"
31352"uchar8 __ovld __cnfn convert_uchar8_rtn(short8);\n"
31353"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(short8);\n"
31354"uchar8 __ovld __cnfn convert_uchar8(short8);\n"
31355"uchar8 __ovld __cnfn convert_uchar8_sat(short8);\n"
31356"uchar8 __ovld __cnfn convert_uchar8_rte(ushort8);\n"
31357"uchar8 __ovld __cnfn convert_uchar8_sat_rte(ushort8);\n"
31358"uchar8 __ovld __cnfn convert_uchar8_rtz(ushort8);\n"
31359"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ushort8);\n"
31360"uchar8 __ovld __cnfn convert_uchar8_rtp(ushort8);\n"
31361"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ushort8);\n"
31362"uchar8 __ovld __cnfn convert_uchar8_rtn(ushort8);\n"
31363"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ushort8);\n"
31364"uchar8 __ovld __cnfn convert_uchar8(ushort8);\n"
31365"uchar8 __ovld __cnfn convert_uchar8_sat(ushort8);\n"
31366"uchar8 __ovld __cnfn convert_uchar8_rte(int8);\n"
31367"uchar8 __ovld __cnfn convert_uchar8_sat_rte(int8);\n"
31368"uchar8 __ovld __cnfn convert_uchar8_rtz(int8);\n"
31369"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(int8);\n"
31370"uchar8 __ovld __cnfn convert_uchar8_rtp(int8);\n"
31371"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(int8);\n"
31372"uchar8 __ovld __cnfn convert_uchar8_rtn(int8);\n"
31373"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(int8);\n"
31374"uchar8 __ovld __cnfn convert_uchar8(int8);\n"
31375"uchar8 __ovld __cnfn convert_uchar8_sat(int8);\n"
31376"uchar8 __ovld __cnfn convert_uchar8_rte(uint8);\n"
31377"uchar8 __ovld __cnfn convert_uchar8_sat_rte(uint8);\n"
31378"uchar8 __ovld __cnfn convert_uchar8_rtz(uint8);\n"
31379"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uint8);\n"
31380"uchar8 __ovld __cnfn convert_uchar8_rtp(uint8);\n"
31381"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uint8);\n"
31382"uchar8 __ovld __cnfn convert_uchar8_rtn(uint8);\n"
31383"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uint8);\n"
31384"uchar8 __ovld __cnfn convert_uchar8(uint8);\n"
31385"uchar8 __ovld __cnfn convert_uchar8_sat(uint8);\n"
31386"uchar8 __ovld __cnfn convert_uchar8_rte(long8);\n"
31387"uchar8 __ovld __cnfn convert_uchar8_sat_rte(long8);\n"
31388"uchar8 __ovld __cnfn convert_uchar8_rtz(long8);\n"
31389"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(long8);\n"
31390"uchar8 __ovld __cnfn convert_uchar8_rtp(long8);\n"
31391"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(long8);\n"
31392"uchar8 __ovld __cnfn convert_uchar8_rtn(long8);\n"
31393"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(long8);\n"
31394"uchar8 __ovld __cnfn convert_uchar8(long8);\n"
31395"uchar8 __ovld __cnfn convert_uchar8_sat(long8);\n"
31396"uchar8 __ovld __cnfn convert_uchar8_rte(ulong8);\n"
31397"uchar8 __ovld __cnfn convert_uchar8_sat_rte(ulong8);\n"
31398"uchar8 __ovld __cnfn convert_uchar8_rtz(ulong8);\n"
31399"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ulong8);\n"
31400"uchar8 __ovld __cnfn convert_uchar8_rtp(ulong8);\n"
31401"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ulong8);\n"
31402"uchar8 __ovld __cnfn convert_uchar8_rtn(ulong8);\n"
31403"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ulong8);\n"
31404"uchar8 __ovld __cnfn convert_uchar8(ulong8);\n"
31405"uchar8 __ovld __cnfn convert_uchar8_sat(ulong8);\n"
31406"uchar8 __ovld __cnfn convert_uchar8_rte(float8);\n"
31407"uchar8 __ovld __cnfn convert_uchar8_sat_rte(float8);\n"
31408"uchar8 __ovld __cnfn convert_uchar8_rtz(float8);\n"
31409"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(float8);\n"
31410"uchar8 __ovld __cnfn convert_uchar8_rtp(float8);\n"
31411"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(float8);\n"
31412"uchar8 __ovld __cnfn convert_uchar8_rtn(float8);\n"
31413"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(float8);\n"
31414"uchar8 __ovld __cnfn convert_uchar8(float8);\n"
31415"uchar8 __ovld __cnfn convert_uchar8_sat(float8);\n"
31416"short8 __ovld __cnfn convert_short8_rte(char8);\n"
31417"short8 __ovld __cnfn convert_short8_sat_rte(char8);\n"
31418"short8 __ovld __cnfn convert_short8_rtz(char8);\n"
31419"short8 __ovld __cnfn convert_short8_sat_rtz(char8);\n"
31420"short8 __ovld __cnfn convert_short8_rtp(char8);\n"
31421"short8 __ovld __cnfn convert_short8_sat_rtp(char8);\n"
31422"short8 __ovld __cnfn convert_short8_rtn(char8);\n"
31423"short8 __ovld __cnfn convert_short8_sat_rtn(char8);\n"
31424"short8 __ovld __cnfn convert_short8(char8);\n"
31425"short8 __ovld __cnfn convert_short8_sat(char8);\n"
31426"short8 __ovld __cnfn convert_short8_rte(uchar8);\n"
31427"short8 __ovld __cnfn convert_short8_sat_rte(uchar8);\n"
31428"short8 __ovld __cnfn convert_short8_rtz(uchar8);\n"
31429"short8 __ovld __cnfn convert_short8_sat_rtz(uchar8);\n"
31430"short8 __ovld __cnfn convert_short8_rtp(uchar8);\n"
31431"short8 __ovld __cnfn convert_short8_sat_rtp(uchar8);\n"
31432"short8 __ovld __cnfn convert_short8_rtn(uchar8);\n"
31433"short8 __ovld __cnfn convert_short8_sat_rtn(uchar8);\n"
31434"short8 __ovld __cnfn convert_short8(uchar8);\n"
31435"short8 __ovld __cnfn convert_short8_sat(uchar8);\n"
31436"short8 __ovld __cnfn convert_short8_rte(short8);\n"
31437"short8 __ovld __cnfn convert_short8_sat_rte(short8);\n"
31438"short8 __ovld __cnfn convert_short8_rtz(short8);\n"
31439"short8 __ovld __cnfn convert_short8_sat_rtz(short8);\n"
31440"short8 __ovld __cnfn convert_short8_rtp(short8);\n"
31441"short8 __ovld __cnfn convert_short8_sat_rtp(short8);\n"
31442"short8 __ovld __cnfn convert_short8_rtn(short8);\n"
31443"short8 __ovld __cnfn convert_short8_sat_rtn(short8);\n"
31444"short8 __ovld __cnfn convert_short8(short8);\n"
31445"short8 __ovld __cnfn convert_short8_sat(short8);\n"
31446"short8 __ovld __cnfn convert_short8_rte(ushort8);\n"
31447"short8 __ovld __cnfn convert_short8_sat_rte(ushort8);\n"
31448"short8 __ovld __cnfn convert_short8_rtz(ushort8);\n"
31449"short8 __ovld __cnfn convert_short8_sat_rtz(ushort8);\n"
31450"short8 __ovld __cnfn convert_short8_rtp(ushort8);\n"
31451"short8 __ovld __cnfn convert_short8_sat_rtp(ushort8);\n"
31452"short8 __ovld __cnfn convert_short8_rtn(ushort8);\n"
31453"short8 __ovld __cnfn convert_short8_sat_rtn(ushort8);\n"
31454"short8 __ovld __cnfn convert_short8(ushort8);\n"
31455"short8 __ovld __cnfn convert_short8_sat(ushort8);\n"
31456"short8 __ovld __cnfn convert_short8_rte(int8);\n"
31457"short8 __ovld __cnfn convert_short8_sat_rte(int8);\n"
31458"short8 __ovld __cnfn convert_short8_rtz(int8);\n"
31459"short8 __ovld __cnfn convert_short8_sat_rtz(int8);\n"
31460"short8 __ovld __cnfn convert_short8_rtp(int8);\n"
31461"short8 __ovld __cnfn convert_short8_sat_rtp(int8);\n"
31462"short8 __ovld __cnfn convert_short8_rtn(int8);\n"
31463"short8 __ovld __cnfn convert_short8_sat_rtn(int8);\n"
31464"short8 __ovld __cnfn convert_short8(int8);\n"
31465"short8 __ovld __cnfn convert_short8_sat(int8);\n"
31466"short8 __ovld __cnfn convert_short8_rte(uint8);\n"
31467"short8 __ovld __cnfn convert_short8_sat_rte(uint8);\n"
31468"short8 __ovld __cnfn convert_short8_rtz(uint8);\n"
31469"short8 __ovld __cnfn convert_short8_sat_rtz(uint8);\n"
31470"short8 __ovld __cnfn convert_short8_rtp(uint8);\n"
31471"short8 __ovld __cnfn convert_short8_sat_rtp(uint8);\n"
31472"short8 __ovld __cnfn convert_short8_rtn(uint8);\n"
31473"short8 __ovld __cnfn convert_short8_sat_rtn(uint8);\n"
31474"short8 __ovld __cnfn convert_short8(uint8);\n"
31475"short8 __ovld __cnfn convert_short8_sat(uint8);\n"
31476"short8 __ovld __cnfn convert_short8_rte(long8);\n"
31477"short8 __ovld __cnfn convert_short8_sat_rte(long8);\n"
31478"short8 __ovld __cnfn convert_short8_rtz(long8);\n"
31479"short8 __ovld __cnfn convert_short8_sat_rtz(long8);\n"
31480"short8 __ovld __cnfn convert_short8_rtp(long8);\n"
31481"short8 __ovld __cnfn convert_short8_sat_rtp(long8);\n"
31482"short8 __ovld __cnfn convert_short8_rtn(long8);\n"
31483"short8 __ovld __cnfn convert_short8_sat_rtn(long8);\n"
31484"short8 __ovld __cnfn convert_short8(long8);\n"
31485"short8 __ovld __cnfn convert_short8_sat(long8);\n"
31486"short8 __ovld __cnfn convert_short8_rte(ulong8);\n"
31487"short8 __ovld __cnfn convert_short8_sat_rte(ulong8);\n"
31488"short8 __ovld __cnfn convert_short8_rtz(ulong8);\n"
31489"short8 __ovld __cnfn convert_short8_sat_rtz(ulong8);\n"
31490"short8 __ovld __cnfn convert_short8_rtp(ulong8);\n"
31491"short8 __ovld __cnfn convert_short8_sat_rtp(ulong8);\n"
31492"short8 __ovld __cnfn convert_short8_rtn(ulong8);\n"
31493"short8 __ovld __cnfn convert_short8_sat_rtn(ulong8);\n"
31494"short8 __ovld __cnfn convert_short8(ulong8);\n"
31495"short8 __ovld __cnfn convert_short8_sat(ulong8);\n"
31496"short8 __ovld __cnfn convert_short8_rte(float8);\n"
31497"short8 __ovld __cnfn convert_short8_sat_rte(float8);\n"
31498"short8 __ovld __cnfn convert_short8_rtz(float8);\n"
31499"short8 __ovld __cnfn convert_short8_sat_rtz(float8);\n"
31500"short8 __ovld __cnfn convert_short8_rtp(float8);\n"
31501"short8 __ovld __cnfn convert_short8_sat_rtp(float8);\n"
31502"short8 __ovld __cnfn convert_short8_rtn(float8);\n"
31503"short8 __ovld __cnfn convert_short8_sat_rtn(float8);\n"
31504"short8 __ovld __cnfn convert_short8(float8);\n"
31505"short8 __ovld __cnfn convert_short8_sat(float8);\n"
31506"ushort8 __ovld __cnfn convert_ushort8_rte(char8);\n"
31507"ushort8 __ovld __cnfn convert_ushort8_sat_rte(char8);\n"
31508"ushort8 __ovld __cnfn convert_ushort8_rtz(char8);\n"
31509"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(char8);\n"
31510"ushort8 __ovld __cnfn convert_ushort8_rtp(char8);\n"
31511"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(char8);\n"
31512"ushort8 __ovld __cnfn convert_ushort8_rtn(char8);\n"
31513"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(char8);\n"
31514"ushort8 __ovld __cnfn convert_ushort8(char8);\n"
31515"ushort8 __ovld __cnfn convert_ushort8_sat(char8);\n"
31516"ushort8 __ovld __cnfn convert_ushort8_rte(uchar8);\n"
31517"ushort8 __ovld __cnfn convert_ushort8_sat_rte(uchar8);\n"
31518"ushort8 __ovld __cnfn convert_ushort8_rtz(uchar8);\n"
31519"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uchar8);\n"
31520"ushort8 __ovld __cnfn convert_ushort8_rtp(uchar8);\n"
31521"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uchar8);\n"
31522"ushort8 __ovld __cnfn convert_ushort8_rtn(uchar8);\n"
31523"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uchar8);\n"
31524"ushort8 __ovld __cnfn convert_ushort8(uchar8);\n"
31525"ushort8 __ovld __cnfn convert_ushort8_sat(uchar8);\n"
31526"ushort8 __ovld __cnfn convert_ushort8_rte(short8);\n"
31527"ushort8 __ovld __cnfn convert_ushort8_sat_rte(short8);\n"
31528"ushort8 __ovld __cnfn convert_ushort8_rtz(short8);\n"
31529"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(short8);\n"
31530"ushort8 __ovld __cnfn convert_ushort8_rtp(short8);\n"
31531"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(short8);\n"
31532"ushort8 __ovld __cnfn convert_ushort8_rtn(short8);\n"
31533"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(short8);\n"
31534"ushort8 __ovld __cnfn convert_ushort8(short8);\n"
31535"ushort8 __ovld __cnfn convert_ushort8_sat(short8);\n"
31536"ushort8 __ovld __cnfn convert_ushort8_rte(ushort8);\n"
31537"ushort8 __ovld __cnfn convert_ushort8_sat_rte(ushort8);\n"
31538"ushort8 __ovld __cnfn convert_ushort8_rtz(ushort8);\n"
31539"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ushort8);\n"
31540"ushort8 __ovld __cnfn convert_ushort8_rtp(ushort8);\n"
31541"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ushort8);\n"
31542"ushort8 __ovld __cnfn convert_ushort8_rtn(ushort8);\n"
31543"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ushort8);\n"
31544"ushort8 __ovld __cnfn convert_ushort8(ushort8);\n"
31545"ushort8 __ovld __cnfn convert_ushort8_sat(ushort8);\n"
31546"ushort8 __ovld __cnfn convert_ushort8_rte(int8);\n"
31547"ushort8 __ovld __cnfn convert_ushort8_sat_rte(int8);\n"
31548"ushort8 __ovld __cnfn convert_ushort8_rtz(int8);\n"
31549"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(int8);\n"
31550"ushort8 __ovld __cnfn convert_ushort8_rtp(int8);\n"
31551"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(int8);\n"
31552"ushort8 __ovld __cnfn convert_ushort8_rtn(int8);\n"
31553"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(int8);\n"
31554"ushort8 __ovld __cnfn convert_ushort8(int8);\n"
31555"ushort8 __ovld __cnfn convert_ushort8_sat(int8);\n"
31556"ushort8 __ovld __cnfn convert_ushort8_rte(uint8);\n"
31557"ushort8 __ovld __cnfn convert_ushort8_sat_rte(uint8);\n"
31558"ushort8 __ovld __cnfn convert_ushort8_rtz(uint8);\n"
31559"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uint8);\n"
31560"ushort8 __ovld __cnfn convert_ushort8_rtp(uint8);\n"
31561"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uint8);\n"
31562"ushort8 __ovld __cnfn convert_ushort8_rtn(uint8);\n"
31563"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uint8);\n"
31564"ushort8 __ovld __cnfn convert_ushort8(uint8);\n"
31565"ushort8 __ovld __cnfn convert_ushort8_sat(uint8);\n"
31566"ushort8 __ovld __cnfn convert_ushort8_rte(long8);\n"
31567"ushort8 __ovld __cnfn convert_ushort8_sat_rte(long8);\n"
31568"ushort8 __ovld __cnfn convert_ushort8_rtz(long8);\n"
31569"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(long8);\n"
31570"ushort8 __ovld __cnfn convert_ushort8_rtp(long8);\n"
31571"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(long8);\n"
31572"ushort8 __ovld __cnfn convert_ushort8_rtn(long8);\n"
31573"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(long8);\n"
31574"ushort8 __ovld __cnfn convert_ushort8(long8);\n"
31575"ushort8 __ovld __cnfn convert_ushort8_sat(long8);\n"
31576"ushort8 __ovld __cnfn convert_ushort8_rte(ulong8);\n"
31577"ushort8 __ovld __cnfn convert_ushort8_sat_rte(ulong8);\n"
31578"ushort8 __ovld __cnfn convert_ushort8_rtz(ulong8);\n"
31579"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ulong8);\n"
31580"ushort8 __ovld __cnfn convert_ushort8_rtp(ulong8);\n"
31581"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ulong8);\n"
31582"ushort8 __ovld __cnfn convert_ushort8_rtn(ulong8);\n"
31583"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ulong8);\n"
31584"ushort8 __ovld __cnfn convert_ushort8(ulong8);\n"
31585"ushort8 __ovld __cnfn convert_ushort8_sat(ulong8);\n"
31586"ushort8 __ovld __cnfn convert_ushort8_rte(float8);\n"
31587"ushort8 __ovld __cnfn convert_ushort8_sat_rte(float8);\n"
31588"ushort8 __ovld __cnfn convert_ushort8_rtz(float8);\n"
31589"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(float8);\n"
31590"ushort8 __ovld __cnfn convert_ushort8_rtp(float8);\n"
31591"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(float8);\n"
31592"ushort8 __ovld __cnfn convert_ushort8_rtn(float8);\n"
31593"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(float8);\n"
31594"ushort8 __ovld __cnfn convert_ushort8(float8);\n"
31595"ushort8 __ovld __cnfn convert_ushort8_sat(float8);\n"
31596"int8 __ovld __cnfn convert_int8_rte(char8);\n"
31597"int8 __ovld __cnfn convert_int8_sat_rte(char8);\n"
31598"int8 __ovld __cnfn convert_int8_rtz(char8);\n"
31599"int8 __ovld __cnfn convert_int8_sat_rtz(char8);\n"
31600"int8 __ovld __cnfn convert_int8_rtp(char8);\n"
31601"int8 __ovld __cnfn convert_int8_sat_rtp(char8);\n"
31602"int8 __ovld __cnfn convert_int8_rtn(char8);\n"
31603"int8 __ovld __cnfn convert_int8_sat_rtn(char8);\n"
31604"int8 __ovld __cnfn convert_int8(char8);\n"
31605"int8 __ovld __cnfn convert_int8_sat(char8);\n"
31606"int8 __ovld __cnfn convert_int8_rte(uchar8);\n"
31607"int8 __ovld __cnfn convert_int8_sat_rte(uchar8);\n"
31608"int8 __ovld __cnfn convert_int8_rtz(uchar8);\n"
31609"int8 __ovld __cnfn convert_int8_sat_rtz(uchar8);\n"
31610"int8 __ovld __cnfn convert_int8_rtp(uchar8);\n"
31611"int8 __ovld __cnfn convert_int8_sat_rtp(uchar8);\n"
31612"int8 __ovld __cnfn convert_int8_rtn(uchar8);\n"
31613"int8 __ovld __cnfn convert_int8_sat_rtn(uchar8);\n"
31614"int8 __ovld __cnfn convert_int8(uchar8);\n"
31615"int8 __ovld __cnfn convert_int8_sat(uchar8);\n"
31616"int8 __ovld __cnfn convert_int8_rte(short8);\n"
31617"int8 __ovld __cnfn convert_int8_sat_rte(short8);\n"
31618"int8 __ovld __cnfn convert_int8_rtz(short8);\n"
31619"int8 __ovld __cnfn convert_int8_sat_rtz(short8);\n"
31620"int8 __ovld __cnfn convert_int8_rtp(short8);\n"
31621"int8 __ovld __cnfn convert_int8_sat_rtp(short8);\n"
31622"int8 __ovld __cnfn convert_int8_rtn(short8);\n"
31623"int8 __ovld __cnfn convert_int8_sat_rtn(short8);\n"
31624"int8 __ovld __cnfn convert_int8(short8);\n"
31625"int8 __ovld __cnfn convert_int8_sat(short8);\n"
31626"int8 __ovld __cnfn convert_int8_rte(ushort8);\n"
31627"int8 __ovld __cnfn convert_int8_sat_rte(ushort8);\n"
31628"int8 __ovld __cnfn convert_int8_rtz(ushort8);\n"
31629"int8 __ovld __cnfn convert_int8_sat_rtz(ushort8);\n"
31630"int8 __ovld __cnfn convert_int8_rtp(ushort8);\n"
31631"int8 __ovld __cnfn convert_int8_sat_rtp(ushort8);\n"
31632"int8 __ovld __cnfn convert_int8_rtn(ushort8);\n"
31633"int8 __ovld __cnfn convert_int8_sat_rtn(ushort8);\n"
31634"int8 __ovld __cnfn convert_int8(ushort8);\n"
31635"int8 __ovld __cnfn convert_int8_sat(ushort8);\n"
31636"int8 __ovld __cnfn convert_int8_rte(int8);\n"
31637"int8 __ovld __cnfn convert_int8_sat_rte(int8);\n"
31638"int8 __ovld __cnfn convert_int8_rtz(int8);\n"
31639"int8 __ovld __cnfn convert_int8_sat_rtz(int8);\n"
31640"int8 __ovld __cnfn convert_int8_rtp(int8);\n"
31641"int8 __ovld __cnfn convert_int8_sat_rtp(int8);\n"
31642"int8 __ovld __cnfn convert_int8_rtn(int8);\n"
31643"int8 __ovld __cnfn convert_int8_sat_rtn(int8);\n"
31644"int8 __ovld __cnfn convert_int8(int8);\n"
31645"int8 __ovld __cnfn convert_int8_sat(int8);\n"
31646"int8 __ovld __cnfn convert_int8_rte(uint8);\n"
31647"int8 __ovld __cnfn convert_int8_sat_rte(uint8);\n"
31648"int8 __ovld __cnfn convert_int8_rtz(uint8);\n"
31649"int8 __ovld __cnfn convert_int8_sat_rtz(uint8);\n"
31650"int8 __ovld __cnfn convert_int8_rtp(uint8);\n"
31651"int8 __ovld __cnfn convert_int8_sat_rtp(uint8);\n"
31652"int8 __ovld __cnfn convert_int8_rtn(uint8);\n"
31653"int8 __ovld __cnfn convert_int8_sat_rtn(uint8);\n"
31654"int8 __ovld __cnfn convert_int8(uint8);\n"
31655"int8 __ovld __cnfn convert_int8_sat(uint8);\n"
31656"int8 __ovld __cnfn convert_int8_rte(long8);\n"
31657"int8 __ovld __cnfn convert_int8_sat_rte(long8);\n"
31658"int8 __ovld __cnfn convert_int8_rtz(long8);\n"
31659"int8 __ovld __cnfn convert_int8_sat_rtz(long8);\n"
31660"int8 __ovld __cnfn convert_int8_rtp(long8);\n"
31661"int8 __ovld __cnfn convert_int8_sat_rtp(long8);\n"
31662"int8 __ovld __cnfn convert_int8_rtn(long8);\n"
31663"int8 __ovld __cnfn convert_int8_sat_rtn(long8);\n"
31664"int8 __ovld __cnfn convert_int8(long8);\n"
31665"int8 __ovld __cnfn convert_int8_sat(long8);\n"
31666"int8 __ovld __cnfn convert_int8_rte(ulong8);\n"
31667"int8 __ovld __cnfn convert_int8_sat_rte(ulong8);\n"
31668"int8 __ovld __cnfn convert_int8_rtz(ulong8);\n"
31669"int8 __ovld __cnfn convert_int8_sat_rtz(ulong8);\n"
31670"int8 __ovld __cnfn convert_int8_rtp(ulong8);\n"
31671"int8 __ovld __cnfn convert_int8_sat_rtp(ulong8);\n"
31672"int8 __ovld __cnfn convert_int8_rtn(ulong8);\n"
31673"int8 __ovld __cnfn convert_int8_sat_rtn(ulong8);\n"
31674"int8 __ovld __cnfn convert_int8(ulong8);\n"
31675"int8 __ovld __cnfn convert_int8_sat(ulong8);\n"
31676"int8 __ovld __cnfn convert_int8_rte(float8);\n"
31677"int8 __ovld __cnfn convert_int8_sat_rte(float8);\n"
31678"int8 __ovld __cnfn convert_int8_rtz(float8);\n"
31679"int8 __ovld __cnfn convert_int8_sat_rtz(float8);\n"
31680"int8 __ovld __cnfn convert_int8_rtp(float8);\n"
31681"int8 __ovld __cnfn convert_int8_sat_rtp(float8);\n"
31682"int8 __ovld __cnfn convert_int8_rtn(float8);\n"
31683"int8 __ovld __cnfn convert_int8_sat_rtn(float8);\n"
31684"int8 __ovld __cnfn convert_int8(float8);\n"
31685"int8 __ovld __cnfn convert_int8_sat(float8);\n"
31686"uint8 __ovld __cnfn convert_uint8_rte(char8);\n"
31687"uint8 __ovld __cnfn convert_uint8_sat_rte(char8);\n"
31688"uint8 __ovld __cnfn convert_uint8_rtz(char8);\n"
31689"uint8 __ovld __cnfn convert_uint8_sat_rtz(char8);\n"
31690"uint8 __ovld __cnfn convert_uint8_rtp(char8);\n"
31691"uint8 __ovld __cnfn convert_uint8_sat_rtp(char8);\n"
31692"uint8 __ovld __cnfn convert_uint8_rtn(char8);\n"
31693"uint8 __ovld __cnfn convert_uint8_sat_rtn(char8);\n"
31694"uint8 __ovld __cnfn convert_uint8(char8);\n"
31695"uint8 __ovld __cnfn convert_uint8_sat(char8);\n"
31696"uint8 __ovld __cnfn convert_uint8_rte(uchar8);\n"
31697"uint8 __ovld __cnfn convert_uint8_sat_rte(uchar8);\n"
31698"uint8 __ovld __cnfn convert_uint8_rtz(uchar8);\n"
31699"uint8 __ovld __cnfn convert_uint8_sat_rtz(uchar8);\n"
31700"uint8 __ovld __cnfn convert_uint8_rtp(uchar8);\n"
31701"uint8 __ovld __cnfn convert_uint8_sat_rtp(uchar8);\n"
31702"uint8 __ovld __cnfn convert_uint8_rtn(uchar8);\n"
31703"uint8 __ovld __cnfn convert_uint8_sat_rtn(uchar8);\n"
31704"uint8 __ovld __cnfn convert_uint8(uchar8);\n"
31705"uint8 __ovld __cnfn convert_uint8_sat(uchar8);\n"
31706"uint8 __ovld __cnfn convert_uint8_rte(short8);\n"
31707"uint8 __ovld __cnfn convert_uint8_sat_rte(short8);\n"
31708"uint8 __ovld __cnfn convert_uint8_rtz(short8);\n"
31709"uint8 __ovld __cnfn convert_uint8_sat_rtz(short8);\n"
31710"uint8 __ovld __cnfn convert_uint8_rtp(short8);\n"
31711"uint8 __ovld __cnfn convert_uint8_sat_rtp(short8);\n"
31712"uint8 __ovld __cnfn convert_uint8_rtn(short8);\n"
31713"uint8 __ovld __cnfn convert_uint8_sat_rtn(short8);\n"
31714"uint8 __ovld __cnfn convert_uint8(short8);\n"
31715"uint8 __ovld __cnfn convert_uint8_sat(short8);\n"
31716"uint8 __ovld __cnfn convert_uint8_rte(ushort8);\n"
31717"uint8 __ovld __cnfn convert_uint8_sat_rte(ushort8);\n"
31718"uint8 __ovld __cnfn convert_uint8_rtz(ushort8);\n"
31719"uint8 __ovld __cnfn convert_uint8_sat_rtz(ushort8);\n"
31720"uint8 __ovld __cnfn convert_uint8_rtp(ushort8);\n"
31721"uint8 __ovld __cnfn convert_uint8_sat_rtp(ushort8);\n"
31722"uint8 __ovld __cnfn convert_uint8_rtn(ushort8);\n"
31723"uint8 __ovld __cnfn convert_uint8_sat_rtn(ushort8);\n"
31724"uint8 __ovld __cnfn convert_uint8(ushort8);\n"
31725"uint8 __ovld __cnfn convert_uint8_sat(ushort8);\n"
31726"uint8 __ovld __cnfn convert_uint8_rte(int8);\n"
31727"uint8 __ovld __cnfn convert_uint8_sat_rte(int8);\n"
31728"uint8 __ovld __cnfn convert_uint8_rtz(int8);\n"
31729"uint8 __ovld __cnfn convert_uint8_sat_rtz(int8);\n"
31730"uint8 __ovld __cnfn convert_uint8_rtp(int8);\n"
31731"uint8 __ovld __cnfn convert_uint8_sat_rtp(int8);\n"
31732"uint8 __ovld __cnfn convert_uint8_rtn(int8);\n"
31733"uint8 __ovld __cnfn convert_uint8_sat_rtn(int8);\n"
31734"uint8 __ovld __cnfn convert_uint8(int8);\n"
31735"uint8 __ovld __cnfn convert_uint8_sat(int8);\n"
31736"uint8 __ovld __cnfn convert_uint8_rte(uint8);\n"
31737"uint8 __ovld __cnfn convert_uint8_sat_rte(uint8);\n"
31738"uint8 __ovld __cnfn convert_uint8_rtz(uint8);\n"
31739"uint8 __ovld __cnfn convert_uint8_sat_rtz(uint8);\n"
31740"uint8 __ovld __cnfn convert_uint8_rtp(uint8);\n"
31741"uint8 __ovld __cnfn convert_uint8_sat_rtp(uint8);\n"
31742"uint8 __ovld __cnfn convert_uint8_rtn(uint8);\n"
31743"uint8 __ovld __cnfn convert_uint8_sat_rtn(uint8);\n"
31744"uint8 __ovld __cnfn convert_uint8(uint8);\n"
31745"uint8 __ovld __cnfn convert_uint8_sat(uint8);\n"
31746"uint8 __ovld __cnfn convert_uint8_rte(long8);\n"
31747"uint8 __ovld __cnfn convert_uint8_sat_rte(long8);\n"
31748"uint8 __ovld __cnfn convert_uint8_rtz(long8);\n"
31749"uint8 __ovld __cnfn convert_uint8_sat_rtz(long8);\n"
31750"uint8 __ovld __cnfn convert_uint8_rtp(long8);\n"
31751"uint8 __ovld __cnfn convert_uint8_sat_rtp(long8);\n"
31752"uint8 __ovld __cnfn convert_uint8_rtn(long8);\n"
31753"uint8 __ovld __cnfn convert_uint8_sat_rtn(long8);\n"
31754"uint8 __ovld __cnfn convert_uint8(long8);\n"
31755"uint8 __ovld __cnfn convert_uint8_sat(long8);\n"
31756"uint8 __ovld __cnfn convert_uint8_rte(ulong8);\n"
31757"uint8 __ovld __cnfn convert_uint8_sat_rte(ulong8);\n"
31758"uint8 __ovld __cnfn convert_uint8_rtz(ulong8);\n"
31759"uint8 __ovld __cnfn convert_uint8_sat_rtz(ulong8);\n"
31760"uint8 __ovld __cnfn convert_uint8_rtp(ulong8);\n"
31761"uint8 __ovld __cnfn convert_uint8_sat_rtp(ulong8);\n"
31762"uint8 __ovld __cnfn convert_uint8_rtn(ulong8);\n"
31763"uint8 __ovld __cnfn convert_uint8_sat_rtn(ulong8);\n"
31764"uint8 __ovld __cnfn convert_uint8(ulong8);\n"
31765"uint8 __ovld __cnfn convert_uint8_sat(ulong8);\n"
31766"uint8 __ovld __cnfn convert_uint8_rte(float8);\n"
31767"uint8 __ovld __cnfn convert_uint8_sat_rte(float8);\n"
31768"uint8 __ovld __cnfn convert_uint8_rtz(float8);\n"
31769"uint8 __ovld __cnfn convert_uint8_sat_rtz(float8);\n"
31770"uint8 __ovld __cnfn convert_uint8_rtp(float8);\n"
31771"uint8 __ovld __cnfn convert_uint8_sat_rtp(float8);\n"
31772"uint8 __ovld __cnfn convert_uint8_rtn(float8);\n"
31773"uint8 __ovld __cnfn convert_uint8_sat_rtn(float8);\n"
31774"uint8 __ovld __cnfn convert_uint8(float8);\n"
31775"uint8 __ovld __cnfn convert_uint8_sat(float8);\n"
31776"long8 __ovld __cnfn convert_long8_rte(char8);\n"
31777"long8 __ovld __cnfn convert_long8_sat_rte(char8);\n"
31778"long8 __ovld __cnfn convert_long8_rtz(char8);\n"
31779"long8 __ovld __cnfn convert_long8_sat_rtz(char8);\n"
31780"long8 __ovld __cnfn convert_long8_rtp(char8);\n"
31781"long8 __ovld __cnfn convert_long8_sat_rtp(char8);\n"
31782"long8 __ovld __cnfn convert_long8_rtn(char8);\n"
31783"long8 __ovld __cnfn convert_long8_sat_rtn(char8);\n"
31784"long8 __ovld __cnfn convert_long8(char8);\n"
31785"long8 __ovld __cnfn convert_long8_sat(char8);\n"
31786"long8 __ovld __cnfn convert_long8_rte(uchar8);\n"
31787"long8 __ovld __cnfn convert_long8_sat_rte(uchar8);\n"
31788"long8 __ovld __cnfn convert_long8_rtz(uchar8);\n"
31789"long8 __ovld __cnfn convert_long8_sat_rtz(uchar8);\n"
31790"long8 __ovld __cnfn convert_long8_rtp(uchar8);\n"
31791"long8 __ovld __cnfn convert_long8_sat_rtp(uchar8);\n"
31792"long8 __ovld __cnfn convert_long8_rtn(uchar8);\n"
31793"long8 __ovld __cnfn convert_long8_sat_rtn(uchar8);\n"
31794"long8 __ovld __cnfn convert_long8(uchar8);\n"
31795"long8 __ovld __cnfn convert_long8_sat(uchar8);\n"
31796"long8 __ovld __cnfn convert_long8_rte(short8);\n"
31797"long8 __ovld __cnfn convert_long8_sat_rte(short8);\n"
31798"long8 __ovld __cnfn convert_long8_rtz(short8);\n"
31799"long8 __ovld __cnfn convert_long8_sat_rtz(short8);\n"
31800"long8 __ovld __cnfn convert_long8_rtp(short8);\n"
31801"long8 __ovld __cnfn convert_long8_sat_rtp(short8);\n"
31802"long8 __ovld __cnfn convert_long8_rtn(short8);\n"
31803"long8 __ovld __cnfn convert_long8_sat_rtn(short8);\n"
31804"long8 __ovld __cnfn convert_long8(short8);\n"
31805"long8 __ovld __cnfn convert_long8_sat(short8);\n"
31806"long8 __ovld __cnfn convert_long8_rte(ushort8);\n"
31807"long8 __ovld __cnfn convert_long8_sat_rte(ushort8);\n"
31808"long8 __ovld __cnfn convert_long8_rtz(ushort8);\n"
31809"long8 __ovld __cnfn convert_long8_sat_rtz(ushort8);\n"
31810"long8 __ovld __cnfn convert_long8_rtp(ushort8);\n"
31811"long8 __ovld __cnfn convert_long8_sat_rtp(ushort8);\n"
31812"long8 __ovld __cnfn convert_long8_rtn(ushort8);\n"
31813"long8 __ovld __cnfn convert_long8_sat_rtn(ushort8);\n"
31814"long8 __ovld __cnfn convert_long8(ushort8);\n"
31815"long8 __ovld __cnfn convert_long8_sat(ushort8);\n"
31816"long8 __ovld __cnfn convert_long8_rte(int8);\n"
31817"long8 __ovld __cnfn convert_long8_sat_rte(int8);\n"
31818"long8 __ovld __cnfn convert_long8_rtz(int8);\n"
31819"long8 __ovld __cnfn convert_long8_sat_rtz(int8);\n"
31820"long8 __ovld __cnfn convert_long8_rtp(int8);\n"
31821"long8 __ovld __cnfn convert_long8_sat_rtp(int8);\n"
31822"long8 __ovld __cnfn convert_long8_rtn(int8);\n"
31823"long8 __ovld __cnfn convert_long8_sat_rtn(int8);\n"
31824"long8 __ovld __cnfn convert_long8(int8);\n"
31825"long8 __ovld __cnfn convert_long8_sat(int8);\n"
31826"long8 __ovld __cnfn convert_long8_rte(uint8);\n"
31827"long8 __ovld __cnfn convert_long8_sat_rte(uint8);\n"
31828"long8 __ovld __cnfn convert_long8_rtz(uint8);\n"
31829"long8 __ovld __cnfn convert_long8_sat_rtz(uint8);\n"
31830"long8 __ovld __cnfn convert_long8_rtp(uint8);\n"
31831"long8 __ovld __cnfn convert_long8_sat_rtp(uint8);\n"
31832"long8 __ovld __cnfn convert_long8_rtn(uint8);\n"
31833"long8 __ovld __cnfn convert_long8_sat_rtn(uint8);\n"
31834"long8 __ovld __cnfn convert_long8(uint8);\n"
31835"long8 __ovld __cnfn convert_long8_sat(uint8);\n"
31836"long8 __ovld __cnfn convert_long8_rte(long8);\n"
31837"long8 __ovld __cnfn convert_long8_sat_rte(long8);\n"
31838"long8 __ovld __cnfn convert_long8_rtz(long8);\n"
31839"long8 __ovld __cnfn convert_long8_sat_rtz(long8);\n"
31840"long8 __ovld __cnfn convert_long8_rtp(long8);\n"
31841"long8 __ovld __cnfn convert_long8_sat_rtp(long8);\n"
31842"long8 __ovld __cnfn convert_long8_rtn(long8);\n"
31843"long8 __ovld __cnfn convert_long8_sat_rtn(long8);\n"
31844"long8 __ovld __cnfn convert_long8(long8);\n"
31845"long8 __ovld __cnfn convert_long8_sat(long8);\n"
31846"long8 __ovld __cnfn convert_long8_rte(ulong8);\n"
31847"long8 __ovld __cnfn convert_long8_sat_rte(ulong8);\n"
31848"long8 __ovld __cnfn convert_long8_rtz(ulong8);\n"
31849"long8 __ovld __cnfn convert_long8_sat_rtz(ulong8);\n"
31850"long8 __ovld __cnfn convert_long8_rtp(ulong8);\n"
31851"long8 __ovld __cnfn convert_long8_sat_rtp(ulong8);\n"
31852"long8 __ovld __cnfn convert_long8_rtn(ulong8);\n"
31853"long8 __ovld __cnfn convert_long8_sat_rtn(ulong8);\n"
31854"long8 __ovld __cnfn convert_long8(ulong8);\n"
31855"long8 __ovld __cnfn convert_long8_sat(ulong8);\n"
31856"long8 __ovld __cnfn convert_long8_rte(float8);\n"
31857"long8 __ovld __cnfn convert_long8_sat_rte(float8);\n"
31858"long8 __ovld __cnfn convert_long8_rtz(float8);\n"
31859"long8 __ovld __cnfn convert_long8_sat_rtz(float8);\n"
31860"long8 __ovld __cnfn convert_long8_rtp(float8);\n"
31861"long8 __ovld __cnfn convert_long8_sat_rtp(float8);\n"
31862"long8 __ovld __cnfn convert_long8_rtn(float8);\n"
31863"long8 __ovld __cnfn convert_long8_sat_rtn(float8);\n"
31864"long8 __ovld __cnfn convert_long8(float8);\n"
31865"long8 __ovld __cnfn convert_long8_sat(float8);\n"
31866"ulong8 __ovld __cnfn convert_ulong8_rte(char8);\n"
31867"ulong8 __ovld __cnfn convert_ulong8_sat_rte(char8);\n"
31868"ulong8 __ovld __cnfn convert_ulong8_rtz(char8);\n"
31869"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(char8);\n"
31870"ulong8 __ovld __cnfn convert_ulong8_rtp(char8);\n"
31871"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(char8);\n"
31872"ulong8 __ovld __cnfn convert_ulong8_rtn(char8);\n"
31873"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(char8);\n"
31874"ulong8 __ovld __cnfn convert_ulong8(char8);\n"
31875"ulong8 __ovld __cnfn convert_ulong8_sat(char8);\n"
31876"ulong8 __ovld __cnfn convert_ulong8_rte(uchar8);\n"
31877"ulong8 __ovld __cnfn convert_ulong8_sat_rte(uchar8);\n"
31878"ulong8 __ovld __cnfn convert_ulong8_rtz(uchar8);\n"
31879"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uchar8);\n"
31880"ulong8 __ovld __cnfn convert_ulong8_rtp(uchar8);\n"
31881"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uchar8);\n"
31882"ulong8 __ovld __cnfn convert_ulong8_rtn(uchar8);\n"
31883"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uchar8);\n"
31884"ulong8 __ovld __cnfn convert_ulong8(uchar8);\n"
31885"ulong8 __ovld __cnfn convert_ulong8_sat(uchar8);\n"
31886"ulong8 __ovld __cnfn convert_ulong8_rte(short8);\n"
31887"ulong8 __ovld __cnfn convert_ulong8_sat_rte(short8);\n"
31888"ulong8 __ovld __cnfn convert_ulong8_rtz(short8);\n"
31889"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(short8);\n"
31890"ulong8 __ovld __cnfn convert_ulong8_rtp(short8);\n"
31891"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(short8);\n"
31892"ulong8 __ovld __cnfn convert_ulong8_rtn(short8);\n"
31893"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(short8);\n"
31894"ulong8 __ovld __cnfn convert_ulong8(short8);\n"
31895"ulong8 __ovld __cnfn convert_ulong8_sat(short8);\n"
31896"ulong8 __ovld __cnfn convert_ulong8_rte(ushort8);\n"
31897"ulong8 __ovld __cnfn convert_ulong8_sat_rte(ushort8);\n"
31898"ulong8 __ovld __cnfn convert_ulong8_rtz(ushort8);\n"
31899"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ushort8);\n"
31900"ulong8 __ovld __cnfn convert_ulong8_rtp(ushort8);\n"
31901"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ushort8);\n"
31902"ulong8 __ovld __cnfn convert_ulong8_rtn(ushort8);\n"
31903"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ushort8);\n"
31904"ulong8 __ovld __cnfn convert_ulong8(ushort8);\n"
31905"ulong8 __ovld __cnfn convert_ulong8_sat(ushort8);\n"
31906"ulong8 __ovld __cnfn convert_ulong8_rte(int8);\n"
31907"ulong8 __ovld __cnfn convert_ulong8_sat_rte(int8);\n"
31908"ulong8 __ovld __cnfn convert_ulong8_rtz(int8);\n"
31909"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(int8);\n"
31910"ulong8 __ovld __cnfn convert_ulong8_rtp(int8);\n"
31911"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(int8);\n"
31912"ulong8 __ovld __cnfn convert_ulong8_rtn(int8);\n"
31913"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(int8);\n"
31914"ulong8 __ovld __cnfn convert_ulong8(int8);\n"
31915"ulong8 __ovld __cnfn convert_ulong8_sat(int8);\n"
31916"ulong8 __ovld __cnfn convert_ulong8_rte(uint8);\n"
31917"ulong8 __ovld __cnfn convert_ulong8_sat_rte(uint8);\n"
31918"ulong8 __ovld __cnfn convert_ulong8_rtz(uint8);\n"
31919"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uint8);\n"
31920"ulong8 __ovld __cnfn convert_ulong8_rtp(uint8);\n"
31921"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uint8);\n"
31922"ulong8 __ovld __cnfn convert_ulong8_rtn(uint8);\n"
31923"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uint8);\n"
31924"ulong8 __ovld __cnfn convert_ulong8(uint8);\n"
31925"ulong8 __ovld __cnfn convert_ulong8_sat(uint8);\n"
31926"ulong8 __ovld __cnfn convert_ulong8_rte(long8);\n"
31927"ulong8 __ovld __cnfn convert_ulong8_sat_rte(long8);\n"
31928"ulong8 __ovld __cnfn convert_ulong8_rtz(long8);\n"
31929"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(long8);\n"
31930"ulong8 __ovld __cnfn convert_ulong8_rtp(long8);\n"
31931"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(long8);\n"
31932"ulong8 __ovld __cnfn convert_ulong8_rtn(long8);\n"
31933"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(long8);\n"
31934"ulong8 __ovld __cnfn convert_ulong8(long8);\n"
31935"ulong8 __ovld __cnfn convert_ulong8_sat(long8);\n"
31936"ulong8 __ovld __cnfn convert_ulong8_rte(ulong8);\n"
31937"ulong8 __ovld __cnfn convert_ulong8_sat_rte(ulong8);\n"
31938"ulong8 __ovld __cnfn convert_ulong8_rtz(ulong8);\n"
31939"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ulong8);\n"
31940"ulong8 __ovld __cnfn convert_ulong8_rtp(ulong8);\n"
31941"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ulong8);\n"
31942"ulong8 __ovld __cnfn convert_ulong8_rtn(ulong8);\n"
31943"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ulong8);\n"
31944"ulong8 __ovld __cnfn convert_ulong8(ulong8);\n"
31945"ulong8 __ovld __cnfn convert_ulong8_sat(ulong8);\n"
31946"ulong8 __ovld __cnfn convert_ulong8_rte(float8);\n"
31947"ulong8 __ovld __cnfn convert_ulong8_sat_rte(float8);\n"
31948"ulong8 __ovld __cnfn convert_ulong8_rtz(float8);\n"
31949"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(float8);\n"
31950"ulong8 __ovld __cnfn convert_ulong8_rtp(float8);\n"
31951"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(float8);\n"
31952"ulong8 __ovld __cnfn convert_ulong8_rtn(float8);\n"
31953"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(float8);\n"
31954"ulong8 __ovld __cnfn convert_ulong8(float8);\n"
31955"ulong8 __ovld __cnfn convert_ulong8_sat(float8);\n"
31956"float8 __ovld __cnfn convert_float8_rte(char8);\n"
31957"float8 __ovld __cnfn convert_float8_rtz(char8);\n"
31958"float8 __ovld __cnfn convert_float8_rtp(char8);\n"
31959"float8 __ovld __cnfn convert_float8_rtn(char8);\n"
31960"float8 __ovld __cnfn convert_float8(char8);\n"
31961"float8 __ovld __cnfn convert_float8_rte(uchar8);\n"
31962"float8 __ovld __cnfn convert_float8_rtz(uchar8);\n"
31963"float8 __ovld __cnfn convert_float8_rtp(uchar8);\n"
31964"float8 __ovld __cnfn convert_float8_rtn(uchar8);\n"
31965"float8 __ovld __cnfn convert_float8(uchar8);\n"
31966"float8 __ovld __cnfn convert_float8_rte(short8);\n"
31967"float8 __ovld __cnfn convert_float8_rtz(short8);\n"
31968"float8 __ovld __cnfn convert_float8_rtp(short8);\n"
31969"float8 __ovld __cnfn convert_float8_rtn(short8);\n"
31970"float8 __ovld __cnfn convert_float8(short8);\n"
31971"float8 __ovld __cnfn convert_float8_rte(ushort8);\n"
31972"float8 __ovld __cnfn convert_float8_rtz(ushort8);\n"
31973"float8 __ovld __cnfn convert_float8_rtp(ushort8);\n"
31974"float8 __ovld __cnfn convert_float8_rtn(ushort8);\n"
31975"float8 __ovld __cnfn convert_float8(ushort8);\n"
31976"float8 __ovld __cnfn convert_float8_rte(int8);\n"
31977"float8 __ovld __cnfn convert_float8_rtz(int8);\n"
31978"float8 __ovld __cnfn convert_float8_rtp(int8);\n"
31979"float8 __ovld __cnfn convert_float8_rtn(int8);\n"
31980"float8 __ovld __cnfn convert_float8(int8);\n"
31981"float8 __ovld __cnfn convert_float8_rte(uint8);\n"
31982"float8 __ovld __cnfn convert_float8_rtz(uint8);\n"
31983"float8 __ovld __cnfn convert_float8_rtp(uint8);\n"
31984"float8 __ovld __cnfn convert_float8_rtn(uint8);\n"
31985"float8 __ovld __cnfn convert_float8(uint8);\n"
31986"float8 __ovld __cnfn convert_float8_rte(long8);\n"
31987"float8 __ovld __cnfn convert_float8_rtz(long8);\n"
31988"float8 __ovld __cnfn convert_float8_rtp(long8);\n"
31989"float8 __ovld __cnfn convert_float8_rtn(long8);\n"
31990"float8 __ovld __cnfn convert_float8(long8);\n"
31991"float8 __ovld __cnfn convert_float8_rte(ulong8);\n"
31992"float8 __ovld __cnfn convert_float8_rtz(ulong8);\n"
31993"float8 __ovld __cnfn convert_float8_rtp(ulong8);\n"
31994"float8 __ovld __cnfn convert_float8_rtn(ulong8);\n"
31995"float8 __ovld __cnfn convert_float8(ulong8);\n"
31996"float8 __ovld __cnfn convert_float8_rte(float8);\n"
31997"float8 __ovld __cnfn convert_float8_rtz(float8);\n"
31998"float8 __ovld __cnfn convert_float8_rtp(float8);\n"
31999"float8 __ovld __cnfn convert_float8_rtn(float8);\n"
32000"float8 __ovld __cnfn convert_float8(float8);\n"
32001"char16 __ovld __cnfn convert_char16_rte(char16);\n"
32002"char16 __ovld __cnfn convert_char16_sat_rte(char16);\n"
32003"char16 __ovld __cnfn convert_char16_rtz(char16);\n"
32004"char16 __ovld __cnfn convert_char16_sat_rtz(char16);\n"
32005"char16 __ovld __cnfn convert_char16_rtp(char16);\n"
32006"char16 __ovld __cnfn convert_char16_sat_rtp(char16);\n"
32007"char16 __ovld __cnfn convert_char16_rtn(char16);\n"
32008"char16 __ovld __cnfn convert_char16_sat_rtn(char16);\n"
32009"char16 __ovld __cnfn convert_char16(char16);\n"
32010"char16 __ovld __cnfn convert_char16_sat(char16);\n"
32011"char16 __ovld __cnfn convert_char16_rte(uchar16);\n"
32012"char16 __ovld __cnfn convert_char16_sat_rte(uchar16);\n"
32013"char16 __ovld __cnfn convert_char16_rtz(uchar16);\n"
32014"char16 __ovld __cnfn convert_char16_sat_rtz(uchar16);\n"
32015"char16 __ovld __cnfn convert_char16_rtp(uchar16);\n"
32016"char16 __ovld __cnfn convert_char16_sat_rtp(uchar16);\n"
32017"char16 __ovld __cnfn convert_char16_rtn(uchar16);\n"
32018"char16 __ovld __cnfn convert_char16_sat_rtn(uchar16);\n"
32019"char16 __ovld __cnfn convert_char16(uchar16);\n"
32020"char16 __ovld __cnfn convert_char16_sat(uchar16);\n"
32021"char16 __ovld __cnfn convert_char16_rte(short16);\n"
32022"char16 __ovld __cnfn convert_char16_sat_rte(short16);\n"
32023"char16 __ovld __cnfn convert_char16_rtz(short16);\n"
32024"char16 __ovld __cnfn convert_char16_sat_rtz(short16);\n"
32025"char16 __ovld __cnfn convert_char16_rtp(short16);\n"
32026"char16 __ovld __cnfn convert_char16_sat_rtp(short16);\n"
32027"char16 __ovld __cnfn convert_char16_rtn(short16);\n"
32028"char16 __ovld __cnfn convert_char16_sat_rtn(short16);\n"
32029"char16 __ovld __cnfn convert_char16(short16);\n"
32030"char16 __ovld __cnfn convert_char16_sat(short16);\n"
32031"char16 __ovld __cnfn convert_char16_rte(ushort16);\n"
32032"char16 __ovld __cnfn convert_char16_sat_rte(ushort16);\n"
32033"char16 __ovld __cnfn convert_char16_rtz(ushort16);\n"
32034"char16 __ovld __cnfn convert_char16_sat_rtz(ushort16);\n"
32035"char16 __ovld __cnfn convert_char16_rtp(ushort16);\n"
32036"char16 __ovld __cnfn convert_char16_sat_rtp(ushort16);\n"
32037"char16 __ovld __cnfn convert_char16_rtn(ushort16);\n"
32038"char16 __ovld __cnfn convert_char16_sat_rtn(ushort16);\n"
32039"char16 __ovld __cnfn convert_char16(ushort16);\n"
32040"char16 __ovld __cnfn convert_char16_sat(ushort16);\n"
32041"char16 __ovld __cnfn convert_char16_rte(int16);\n"
32042"char16 __ovld __cnfn convert_char16_sat_rte(int16);\n"
32043"char16 __ovld __cnfn convert_char16_rtz(int16);\n"
32044"char16 __ovld __cnfn convert_char16_sat_rtz(int16);\n"
32045"char16 __ovld __cnfn convert_char16_rtp(int16);\n"
32046"char16 __ovld __cnfn convert_char16_sat_rtp(int16);\n"
32047"char16 __ovld __cnfn convert_char16_rtn(int16);\n"
32048"char16 __ovld __cnfn convert_char16_sat_rtn(int16);\n"
32049"char16 __ovld __cnfn convert_char16(int16);\n"
32050"char16 __ovld __cnfn convert_char16_sat(int16);\n"
32051"char16 __ovld __cnfn convert_char16_rte(uint16);\n"
32052"char16 __ovld __cnfn convert_char16_sat_rte(uint16);\n"
32053"char16 __ovld __cnfn convert_char16_rtz(uint16);\n"
32054"char16 __ovld __cnfn convert_char16_sat_rtz(uint16);\n"
32055"char16 __ovld __cnfn convert_char16_rtp(uint16);\n"
32056"char16 __ovld __cnfn convert_char16_sat_rtp(uint16);\n"
32057"char16 __ovld __cnfn convert_char16_rtn(uint16);\n"
32058"char16 __ovld __cnfn convert_char16_sat_rtn(uint16);\n"
32059"char16 __ovld __cnfn convert_char16(uint16);\n"
32060"char16 __ovld __cnfn convert_char16_sat(uint16);\n"
32061"char16 __ovld __cnfn convert_char16_rte(long16);\n"
32062"char16 __ovld __cnfn convert_char16_sat_rte(long16);\n"
32063"char16 __ovld __cnfn convert_char16_rtz(long16);\n"
32064"char16 __ovld __cnfn convert_char16_sat_rtz(long16);\n"
32065"char16 __ovld __cnfn convert_char16_rtp(long16);\n"
32066"char16 __ovld __cnfn convert_char16_sat_rtp(long16);\n"
32067"char16 __ovld __cnfn convert_char16_rtn(long16);\n"
32068"char16 __ovld __cnfn convert_char16_sat_rtn(long16);\n"
32069"char16 __ovld __cnfn convert_char16(long16);\n"
32070"char16 __ovld __cnfn convert_char16_sat(long16);\n"
32071"char16 __ovld __cnfn convert_char16_rte(ulong16);\n"
32072"char16 __ovld __cnfn convert_char16_sat_rte(ulong16);\n"
32073"char16 __ovld __cnfn convert_char16_rtz(ulong16);\n"
32074"char16 __ovld __cnfn convert_char16_sat_rtz(ulong16);\n"
32075"char16 __ovld __cnfn convert_char16_rtp(ulong16);\n"
32076"char16 __ovld __cnfn convert_char16_sat_rtp(ulong16);\n"
32077"char16 __ovld __cnfn convert_char16_rtn(ulong16);\n"
32078"char16 __ovld __cnfn convert_char16_sat_rtn(ulong16);\n"
32079"char16 __ovld __cnfn convert_char16(ulong16);\n"
32080"char16 __ovld __cnfn convert_char16_sat(ulong16);\n"
32081"char16 __ovld __cnfn convert_char16_rte(float16);\n"
32082"char16 __ovld __cnfn convert_char16_sat_rte(float16);\n"
32083"char16 __ovld __cnfn convert_char16_rtz(float16);\n"
32084"char16 __ovld __cnfn convert_char16_sat_rtz(float16);\n"
32085"char16 __ovld __cnfn convert_char16_rtp(float16);\n"
32086"char16 __ovld __cnfn convert_char16_sat_rtp(float16);\n"
32087"char16 __ovld __cnfn convert_char16_rtn(float16);\n"
32088"char16 __ovld __cnfn convert_char16_sat_rtn(float16);\n"
32089"char16 __ovld __cnfn convert_char16(float16);\n"
32090"char16 __ovld __cnfn convert_char16_sat(float16);\n"
32091"uchar16 __ovld __cnfn convert_uchar16_rte(char16);\n"
32092"uchar16 __ovld __cnfn convert_uchar16_sat_rte(char16);\n"
32093"uchar16 __ovld __cnfn convert_uchar16_rtz(char16);\n"
32094"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(char16);\n"
32095"uchar16 __ovld __cnfn convert_uchar16_rtp(char16);\n"
32096"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(char16);\n"
32097"uchar16 __ovld __cnfn convert_uchar16_rtn(char16);\n"
32098"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(char16);\n"
32099"uchar16 __ovld __cnfn convert_uchar16(char16);\n"
32100"uchar16 __ovld __cnfn convert_uchar16_sat(char16);\n"
32101"uchar16 __ovld __cnfn convert_uchar16_rte(uchar16);\n"
32102"uchar16 __ovld __cnfn convert_uchar16_sat_rte(uchar16);\n"
32103"uchar16 __ovld __cnfn convert_uchar16_rtz(uchar16);\n"
32104"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uchar16);\n"
32105"uchar16 __ovld __cnfn convert_uchar16_rtp(uchar16);\n"
32106"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uchar16);\n"
32107"uchar16 __ovld __cnfn convert_uchar16_rtn(uchar16);\n"
32108"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uchar16);\n"
32109"uchar16 __ovld __cnfn convert_uchar16(uchar16);\n"
32110"uchar16 __ovld __cnfn convert_uchar16_sat(uchar16);\n"
32111"uchar16 __ovld __cnfn convert_uchar16_rte(short16);\n"
32112"uchar16 __ovld __cnfn convert_uchar16_sat_rte(short16);\n"
32113"uchar16 __ovld __cnfn convert_uchar16_rtz(short16);\n"
32114"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(short16);\n"
32115"uchar16 __ovld __cnfn convert_uchar16_rtp(short16);\n"
32116"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(short16);\n"
32117"uchar16 __ovld __cnfn convert_uchar16_rtn(short16);\n"
32118"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(short16);\n"
32119"uchar16 __ovld __cnfn convert_uchar16(short16);\n"
32120"uchar16 __ovld __cnfn convert_uchar16_sat(short16);\n"
32121"uchar16 __ovld __cnfn convert_uchar16_rte(ushort16);\n"
32122"uchar16 __ovld __cnfn convert_uchar16_sat_rte(ushort16);\n"
32123"uchar16 __ovld __cnfn convert_uchar16_rtz(ushort16);\n"
32124"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ushort16);\n"
32125"uchar16 __ovld __cnfn convert_uchar16_rtp(ushort16);\n"
32126"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ushort16);\n"
32127"uchar16 __ovld __cnfn convert_uchar16_rtn(ushort16);\n"
32128"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ushort16);\n"
32129"uchar16 __ovld __cnfn convert_uchar16(ushort16);\n"
32130"uchar16 __ovld __cnfn convert_uchar16_sat(ushort16);\n"
32131"uchar16 __ovld __cnfn convert_uchar16_rte(int16);\n"
32132"uchar16 __ovld __cnfn convert_uchar16_sat_rte(int16);\n"
32133"uchar16 __ovld __cnfn convert_uchar16_rtz(int16);\n"
32134"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(int16);\n"
32135"uchar16 __ovld __cnfn convert_uchar16_rtp(int16);\n"
32136"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(int16);\n"
32137"uchar16 __ovld __cnfn convert_uchar16_rtn(int16);\n"
32138"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(int16);\n"
32139"uchar16 __ovld __cnfn convert_uchar16(int16);\n"
32140"uchar16 __ovld __cnfn convert_uchar16_sat(int16);\n"
32141"uchar16 __ovld __cnfn convert_uchar16_rte(uint16);\n"
32142"uchar16 __ovld __cnfn convert_uchar16_sat_rte(uint16);\n"
32143"uchar16 __ovld __cnfn convert_uchar16_rtz(uint16);\n"
32144"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uint16);\n"
32145"uchar16 __ovld __cnfn convert_uchar16_rtp(uint16);\n"
32146"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uint16);\n"
32147"uchar16 __ovld __cnfn convert_uchar16_rtn(uint16);\n"
32148"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uint16);\n"
32149"uchar16 __ovld __cnfn convert_uchar16(uint16);\n"
32150"uchar16 __ovld __cnfn convert_uchar16_sat(uint16);\n"
32151"uchar16 __ovld __cnfn convert_uchar16_rte(long16);\n"
32152"uchar16 __ovld __cnfn convert_uchar16_sat_rte(long16);\n"
32153"uchar16 __ovld __cnfn convert_uchar16_rtz(long16);\n"
32154"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(long16);\n"
32155"uchar16 __ovld __cnfn convert_uchar16_rtp(long16);\n"
32156"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(long16);\n"
32157"uchar16 __ovld __cnfn convert_uchar16_rtn(long16);\n"
32158"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(long16);\n"
32159"uchar16 __ovld __cnfn convert_uchar16(long16);\n"
32160"uchar16 __ovld __cnfn convert_uchar16_sat(long16);\n"
32161"uchar16 __ovld __cnfn convert_uchar16_rte(ulong16);\n"
32162"uchar16 __ovld __cnfn convert_uchar16_sat_rte(ulong16);\n"
32163"uchar16 __ovld __cnfn convert_uchar16_rtz(ulong16);\n"
32164"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ulong16);\n"
32165"uchar16 __ovld __cnfn convert_uchar16_rtp(ulong16);\n"
32166"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ulong16);\n"
32167"uchar16 __ovld __cnfn convert_uchar16_rtn(ulong16);\n"
32168"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ulong16);\n"
32169"uchar16 __ovld __cnfn convert_uchar16(ulong16);\n"
32170"uchar16 __ovld __cnfn convert_uchar16_sat(ulong16);\n"
32171"uchar16 __ovld __cnfn convert_uchar16_rte(float16);\n"
32172"uchar16 __ovld __cnfn convert_uchar16_sat_rte(float16);\n"
32173"uchar16 __ovld __cnfn convert_uchar16_rtz(float16);\n"
32174"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(float16);\n"
32175"uchar16 __ovld __cnfn convert_uchar16_rtp(float16);\n"
32176"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(float16);\n"
32177"uchar16 __ovld __cnfn convert_uchar16_rtn(float16);\n"
32178"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(float16);\n"
32179"uchar16 __ovld __cnfn convert_uchar16(float16);\n"
32180"uchar16 __ovld __cnfn convert_uchar16_sat(float16);\n"
32181"short16 __ovld __cnfn convert_short16_rte(char16);\n"
32182"short16 __ovld __cnfn convert_short16_sat_rte(char16);\n"
32183"short16 __ovld __cnfn convert_short16_rtz(char16);\n"
32184"short16 __ovld __cnfn convert_short16_sat_rtz(char16);\n"
32185"short16 __ovld __cnfn convert_short16_rtp(char16);\n"
32186"short16 __ovld __cnfn convert_short16_sat_rtp(char16);\n"
32187"short16 __ovld __cnfn convert_short16_rtn(char16);\n"
32188"short16 __ovld __cnfn convert_short16_sat_rtn(char16);\n"
32189"short16 __ovld __cnfn convert_short16(char16);\n"
32190"short16 __ovld __cnfn convert_short16_sat(char16);\n"
32191"short16 __ovld __cnfn convert_short16_rte(uchar16);\n"
32192"short16 __ovld __cnfn convert_short16_sat_rte(uchar16);\n"
32193"short16 __ovld __cnfn convert_short16_rtz(uchar16);\n"
32194"short16 __ovld __cnfn convert_short16_sat_rtz(uchar16);\n"
32195"short16 __ovld __cnfn convert_short16_rtp(uchar16);\n"
32196"short16 __ovld __cnfn convert_short16_sat_rtp(uchar16);\n"
32197"short16 __ovld __cnfn convert_short16_rtn(uchar16);\n"
32198"short16 __ovld __cnfn convert_short16_sat_rtn(uchar16);\n"
32199"short16 __ovld __cnfn convert_short16(uchar16);\n"
32200"short16 __ovld __cnfn convert_short16_sat(uchar16);\n"
32201"short16 __ovld __cnfn convert_short16_rte(short16);\n"
32202"short16 __ovld __cnfn convert_short16_sat_rte(short16);\n"
32203"short16 __ovld __cnfn convert_short16_rtz(short16);\n"
32204"short16 __ovld __cnfn convert_short16_sat_rtz(short16);\n"
32205"short16 __ovld __cnfn convert_short16_rtp(short16);\n"
32206"short16 __ovld __cnfn convert_short16_sat_rtp(short16);\n"
32207"short16 __ovld __cnfn convert_short16_rtn(short16);\n"
32208"short16 __ovld __cnfn convert_short16_sat_rtn(short16);\n"
32209"short16 __ovld __cnfn convert_short16(short16);\n"
32210"short16 __ovld __cnfn convert_short16_sat(short16);\n"
32211"short16 __ovld __cnfn convert_short16_rte(ushort16);\n"
32212"short16 __ovld __cnfn convert_short16_sat_rte(ushort16);\n"
32213"short16 __ovld __cnfn convert_short16_rtz(ushort16);\n"
32214"short16 __ovld __cnfn convert_short16_sat_rtz(ushort16);\n"
32215"short16 __ovld __cnfn convert_short16_rtp(ushort16);\n"
32216"short16 __ovld __cnfn convert_short16_sat_rtp(ushort16);\n"
32217"short16 __ovld __cnfn convert_short16_rtn(ushort16);\n"
32218"short16 __ovld __cnfn convert_short16_sat_rtn(ushort16);\n"
32219"short16 __ovld __cnfn convert_short16(ushort16);\n"
32220"short16 __ovld __cnfn convert_short16_sat(ushort16);\n"
32221"short16 __ovld __cnfn convert_short16_rte(int16);\n"
32222"short16 __ovld __cnfn convert_short16_sat_rte(int16);\n"
32223"short16 __ovld __cnfn convert_short16_rtz(int16);\n"
32224"short16 __ovld __cnfn convert_short16_sat_rtz(int16);\n"
32225"short16 __ovld __cnfn convert_short16_rtp(int16);\n"
32226"short16 __ovld __cnfn convert_short16_sat_rtp(int16);\n"
32227"short16 __ovld __cnfn convert_short16_rtn(int16);\n"
32228"short16 __ovld __cnfn convert_short16_sat_rtn(int16);\n"
32229"short16 __ovld __cnfn convert_short16(int16);\n"
32230"short16 __ovld __cnfn convert_short16_sat(int16);\n"
32231"short16 __ovld __cnfn convert_short16_rte(uint16);\n"
32232"short16 __ovld __cnfn convert_short16_sat_rte(uint16);\n"
32233"short16 __ovld __cnfn convert_short16_rtz(uint16);\n"
32234"short16 __ovld __cnfn convert_short16_sat_rtz(uint16);\n"
32235"short16 __ovld __cnfn convert_short16_rtp(uint16);\n"
32236"short16 __ovld __cnfn convert_short16_sat_rtp(uint16);\n"
32237"short16 __ovld __cnfn convert_short16_rtn(uint16);\n"
32238"short16 __ovld __cnfn convert_short16_sat_rtn(uint16);\n"
32239"short16 __ovld __cnfn convert_short16(uint16);\n"
32240"short16 __ovld __cnfn convert_short16_sat(uint16);\n"
32241"short16 __ovld __cnfn convert_short16_rte(long16);\n"
32242"short16 __ovld __cnfn convert_short16_sat_rte(long16);\n"
32243"short16 __ovld __cnfn convert_short16_rtz(long16);\n"
32244"short16 __ovld __cnfn convert_short16_sat_rtz(long16);\n"
32245"short16 __ovld __cnfn convert_short16_rtp(long16);\n"
32246"short16 __ovld __cnfn convert_short16_sat_rtp(long16);\n"
32247"short16 __ovld __cnfn convert_short16_rtn(long16);\n"
32248"short16 __ovld __cnfn convert_short16_sat_rtn(long16);\n"
32249"short16 __ovld __cnfn convert_short16(long16);\n"
32250"short16 __ovld __cnfn convert_short16_sat(long16);\n"
32251"short16 __ovld __cnfn convert_short16_rte(ulong16);\n"
32252"short16 __ovld __cnfn convert_short16_sat_rte(ulong16);\n"
32253"short16 __ovld __cnfn convert_short16_rtz(ulong16);\n"
32254"short16 __ovld __cnfn convert_short16_sat_rtz(ulong16);\n"
32255"short16 __ovld __cnfn convert_short16_rtp(ulong16);\n"
32256"short16 __ovld __cnfn convert_short16_sat_rtp(ulong16);\n"
32257"short16 __ovld __cnfn convert_short16_rtn(ulong16);\n"
32258"short16 __ovld __cnfn convert_short16_sat_rtn(ulong16);\n"
32259"short16 __ovld __cnfn convert_short16(ulong16);\n"
32260"short16 __ovld __cnfn convert_short16_sat(ulong16);\n"
32261"short16 __ovld __cnfn convert_short16_rte(float16);\n"
32262"short16 __ovld __cnfn convert_short16_sat_rte(float16);\n"
32263"short16 __ovld __cnfn convert_short16_rtz(float16);\n"
32264"short16 __ovld __cnfn convert_short16_sat_rtz(float16);\n"
32265"short16 __ovld __cnfn convert_short16_rtp(float16);\n"
32266"short16 __ovld __cnfn convert_short16_sat_rtp(float16);\n"
32267"short16 __ovld __cnfn convert_short16_rtn(float16);\n"
32268"short16 __ovld __cnfn convert_short16_sat_rtn(float16);\n"
32269"short16 __ovld __cnfn convert_short16(float16);\n"
32270"short16 __ovld __cnfn convert_short16_sat(float16);\n"
32271"ushort16 __ovld __cnfn convert_ushort16_rte(char16);\n"
32272"ushort16 __ovld __cnfn convert_ushort16_sat_rte(char16);\n"
32273"ushort16 __ovld __cnfn convert_ushort16_rtz(char16);\n"
32274"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(char16);\n"
32275"ushort16 __ovld __cnfn convert_ushort16_rtp(char16);\n"
32276"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(char16);\n"
32277"ushort16 __ovld __cnfn convert_ushort16_rtn(char16);\n"
32278"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(char16);\n"
32279"ushort16 __ovld __cnfn convert_ushort16(char16);\n"
32280"ushort16 __ovld __cnfn convert_ushort16_sat(char16);\n"
32281"ushort16 __ovld __cnfn convert_ushort16_rte(uchar16);\n"
32282"ushort16 __ovld __cnfn convert_ushort16_sat_rte(uchar16);\n"
32283"ushort16 __ovld __cnfn convert_ushort16_rtz(uchar16);\n"
32284"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uchar16);\n"
32285"ushort16 __ovld __cnfn convert_ushort16_rtp(uchar16);\n"
32286"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uchar16);\n"
32287"ushort16 __ovld __cnfn convert_ushort16_rtn(uchar16);\n"
32288"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uchar16);\n"
32289"ushort16 __ovld __cnfn convert_ushort16(uchar16);\n"
32290"ushort16 __ovld __cnfn convert_ushort16_sat(uchar16);\n"
32291"ushort16 __ovld __cnfn convert_ushort16_rte(short16);\n"
32292"ushort16 __ovld __cnfn convert_ushort16_sat_rte(short16);\n"
32293"ushort16 __ovld __cnfn convert_ushort16_rtz(short16);\n"
32294"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(short16);\n"
32295"ushort16 __ovld __cnfn convert_ushort16_rtp(short16);\n"
32296"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(short16);\n"
32297"ushort16 __ovld __cnfn convert_ushort16_rtn(short16);\n"
32298"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(short16);\n"
32299"ushort16 __ovld __cnfn convert_ushort16(short16);\n"
32300"ushort16 __ovld __cnfn convert_ushort16_sat(short16);\n"
32301"ushort16 __ovld __cnfn convert_ushort16_rte(ushort16);\n"
32302"ushort16 __ovld __cnfn convert_ushort16_sat_rte(ushort16);\n"
32303"ushort16 __ovld __cnfn convert_ushort16_rtz(ushort16);\n"
32304"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ushort16);\n"
32305"ushort16 __ovld __cnfn convert_ushort16_rtp(ushort16);\n"
32306"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ushort16);\n"
32307"ushort16 __ovld __cnfn convert_ushort16_rtn(ushort16);\n"
32308"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ushort16);\n"
32309"ushort16 __ovld __cnfn convert_ushort16(ushort16);\n"
32310"ushort16 __ovld __cnfn convert_ushort16_sat(ushort16);\n"
32311"ushort16 __ovld __cnfn convert_ushort16_rte(int16);\n"
32312"ushort16 __ovld __cnfn convert_ushort16_sat_rte(int16);\n"
32313"ushort16 __ovld __cnfn convert_ushort16_rtz(int16);\n"
32314"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(int16);\n"
32315"ushort16 __ovld __cnfn convert_ushort16_rtp(int16);\n"
32316"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(int16);\n"
32317"ushort16 __ovld __cnfn convert_ushort16_rtn(int16);\n"
32318"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(int16);\n"
32319"ushort16 __ovld __cnfn convert_ushort16(int16);\n"
32320"ushort16 __ovld __cnfn convert_ushort16_sat(int16);\n"
32321"ushort16 __ovld __cnfn convert_ushort16_rte(uint16);\n"
32322"ushort16 __ovld __cnfn convert_ushort16_sat_rte(uint16);\n"
32323"ushort16 __ovld __cnfn convert_ushort16_rtz(uint16);\n"
32324"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uint16);\n"
32325"ushort16 __ovld __cnfn convert_ushort16_rtp(uint16);\n"
32326"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uint16);\n"
32327"ushort16 __ovld __cnfn convert_ushort16_rtn(uint16);\n"
32328"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uint16);\n"
32329"ushort16 __ovld __cnfn convert_ushort16(uint16);\n"
32330"ushort16 __ovld __cnfn convert_ushort16_sat(uint16);\n"
32331"ushort16 __ovld __cnfn convert_ushort16_rte(long16);\n"
32332"ushort16 __ovld __cnfn convert_ushort16_sat_rte(long16);\n"
32333"ushort16 __ovld __cnfn convert_ushort16_rtz(long16);\n"
32334"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(long16);\n"
32335"ushort16 __ovld __cnfn convert_ushort16_rtp(long16);\n"
32336"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(long16);\n"
32337"ushort16 __ovld __cnfn convert_ushort16_rtn(long16);\n"
32338"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(long16);\n"
32339"ushort16 __ovld __cnfn convert_ushort16(long16);\n"
32340"ushort16 __ovld __cnfn convert_ushort16_sat(long16);\n"
32341"ushort16 __ovld __cnfn convert_ushort16_rte(ulong16);\n"
32342"ushort16 __ovld __cnfn convert_ushort16_sat_rte(ulong16);\n"
32343"ushort16 __ovld __cnfn convert_ushort16_rtz(ulong16);\n"
32344"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ulong16);\n"
32345"ushort16 __ovld __cnfn convert_ushort16_rtp(ulong16);\n"
32346"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ulong16);\n"
32347"ushort16 __ovld __cnfn convert_ushort16_rtn(ulong16);\n"
32348"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ulong16);\n"
32349"ushort16 __ovld __cnfn convert_ushort16(ulong16);\n"
32350"ushort16 __ovld __cnfn convert_ushort16_sat(ulong16);\n"
32351"ushort16 __ovld __cnfn convert_ushort16_rte(float16);\n"
32352"ushort16 __ovld __cnfn convert_ushort16_sat_rte(float16);\n"
32353"ushort16 __ovld __cnfn convert_ushort16_rtz(float16);\n"
32354"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(float16);\n"
32355"ushort16 __ovld __cnfn convert_ushort16_rtp(float16);\n"
32356"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(float16);\n"
32357"ushort16 __ovld __cnfn convert_ushort16_rtn(float16);\n"
32358"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(float16);\n"
32359"ushort16 __ovld __cnfn convert_ushort16(float16);\n"
32360"ushort16 __ovld __cnfn convert_ushort16_sat(float16);\n"
32361"int16 __ovld __cnfn convert_int16_rte(char16);\n"
32362"int16 __ovld __cnfn convert_int16_sat_rte(char16);\n"
32363"int16 __ovld __cnfn convert_int16_rtz(char16);\n"
32364"int16 __ovld __cnfn convert_int16_sat_rtz(char16);\n"
32365"int16 __ovld __cnfn convert_int16_rtp(char16);\n"
32366"int16 __ovld __cnfn convert_int16_sat_rtp(char16);\n"
32367"int16 __ovld __cnfn convert_int16_rtn(char16);\n"
32368"int16 __ovld __cnfn convert_int16_sat_rtn(char16);\n"
32369"int16 __ovld __cnfn convert_int16(char16);\n"
32370"int16 __ovld __cnfn convert_int16_sat(char16);\n"
32371"int16 __ovld __cnfn convert_int16_rte(uchar16);\n"
32372"int16 __ovld __cnfn convert_int16_sat_rte(uchar16);\n"
32373"int16 __ovld __cnfn convert_int16_rtz(uchar16);\n"
32374"int16 __ovld __cnfn convert_int16_sat_rtz(uchar16);\n"
32375"int16 __ovld __cnfn convert_int16_rtp(uchar16);\n"
32376"int16 __ovld __cnfn convert_int16_sat_rtp(uchar16);\n"
32377"int16 __ovld __cnfn convert_int16_rtn(uchar16);\n"
32378"int16 __ovld __cnfn convert_int16_sat_rtn(uchar16);\n"
32379"int16 __ovld __cnfn convert_int16(uchar16);\n"
32380"int16 __ovld __cnfn convert_int16_sat(uchar16);\n"
32381"int16 __ovld __cnfn convert_int16_rte(short16);\n"
32382"int16 __ovld __cnfn convert_int16_sat_rte(short16);\n"
32383"int16 __ovld __cnfn convert_int16_rtz(short16);\n"
32384"int16 __ovld __cnfn convert_int16_sat_rtz(short16);\n"
32385"int16 __ovld __cnfn convert_int16_rtp(short16);\n"
32386"int16 __ovld __cnfn convert_int16_sat_rtp(short16);\n"
32387"int16 __ovld __cnfn convert_int16_rtn(short16);\n"
32388"int16 __ovld __cnfn convert_int16_sat_rtn(short16);\n"
32389"int16 __ovld __cnfn convert_int16(short16);\n"
32390"int16 __ovld __cnfn convert_int16_sat(short16);\n"
32391"int16 __ovld __cnfn convert_int16_rte(ushort16);\n"
32392"int16 __ovld __cnfn convert_int16_sat_rte(ushort16);\n"
32393"int16 __ovld __cnfn convert_int16_rtz(ushort16);\n"
32394"int16 __ovld __cnfn convert_int16_sat_rtz(ushort16);\n"
32395"int16 __ovld __cnfn convert_int16_rtp(ushort16);\n"
32396"int16 __ovld __cnfn convert_int16_sat_rtp(ushort16);\n"
32397"int16 __ovld __cnfn convert_int16_rtn(ushort16);\n"
32398"int16 __ovld __cnfn convert_int16_sat_rtn(ushort16);\n"
32399"int16 __ovld __cnfn convert_int16(ushort16);\n"
32400"int16 __ovld __cnfn convert_int16_sat(ushort16);\n"
32401"int16 __ovld __cnfn convert_int16_rte(int16);\n"
32402"int16 __ovld __cnfn convert_int16_sat_rte(int16);\n"
32403"int16 __ovld __cnfn convert_int16_rtz(int16);\n"
32404"int16 __ovld __cnfn convert_int16_sat_rtz(int16);\n"
32405"int16 __ovld __cnfn convert_int16_rtp(int16);\n"
32406"int16 __ovld __cnfn convert_int16_sat_rtp(int16);\n"
32407"int16 __ovld __cnfn convert_int16_rtn(int16);\n"
32408"int16 __ovld __cnfn convert_int16_sat_rtn(int16);\n"
32409"int16 __ovld __cnfn convert_int16(int16);\n"
32410"int16 __ovld __cnfn convert_int16_sat(int16);\n"
32411"int16 __ovld __cnfn convert_int16_rte(uint16);\n"
32412"int16 __ovld __cnfn convert_int16_sat_rte(uint16);\n"
32413"int16 __ovld __cnfn convert_int16_rtz(uint16);\n"
32414"int16 __ovld __cnfn convert_int16_sat_rtz(uint16);\n"
32415"int16 __ovld __cnfn convert_int16_rtp(uint16);\n"
32416"int16 __ovld __cnfn convert_int16_sat_rtp(uint16);\n"
32417"int16 __ovld __cnfn convert_int16_rtn(uint16);\n"
32418"int16 __ovld __cnfn convert_int16_sat_rtn(uint16);\n"
32419"int16 __ovld __cnfn convert_int16(uint16);\n"
32420"int16 __ovld __cnfn convert_int16_sat(uint16);\n"
32421"int16 __ovld __cnfn convert_int16_rte(long16);\n"
32422"int16 __ovld __cnfn convert_int16_sat_rte(long16);\n"
32423"int16 __ovld __cnfn convert_int16_rtz(long16);\n"
32424"int16 __ovld __cnfn convert_int16_sat_rtz(long16);\n"
32425"int16 __ovld __cnfn convert_int16_rtp(long16);\n"
32426"int16 __ovld __cnfn convert_int16_sat_rtp(long16);\n"
32427"int16 __ovld __cnfn convert_int16_rtn(long16);\n"
32428"int16 __ovld __cnfn convert_int16_sat_rtn(long16);\n"
32429"int16 __ovld __cnfn convert_int16(long16);\n"
32430"int16 __ovld __cnfn convert_int16_sat(long16);\n"
32431"int16 __ovld __cnfn convert_int16_rte(ulong16);\n"
32432"int16 __ovld __cnfn convert_int16_sat_rte(ulong16);\n"
32433"int16 __ovld __cnfn convert_int16_rtz(ulong16);\n"
32434"int16 __ovld __cnfn convert_int16_sat_rtz(ulong16);\n"
32435"int16 __ovld __cnfn convert_int16_rtp(ulong16);\n"
32436"int16 __ovld __cnfn convert_int16_sat_rtp(ulong16);\n"
32437"int16 __ovld __cnfn convert_int16_rtn(ulong16);\n"
32438"int16 __ovld __cnfn convert_int16_sat_rtn(ulong16);\n"
32439"int16 __ovld __cnfn convert_int16(ulong16);\n"
32440"int16 __ovld __cnfn convert_int16_sat(ulong16);\n"
32441"int16 __ovld __cnfn convert_int16_rte(float16);\n"
32442"int16 __ovld __cnfn convert_int16_sat_rte(float16);\n"
32443"int16 __ovld __cnfn convert_int16_rtz(float16);\n"
32444"int16 __ovld __cnfn convert_int16_sat_rtz(float16);\n"
32445"int16 __ovld __cnfn convert_int16_rtp(float16);\n"
32446"int16 __ovld __cnfn convert_int16_sat_rtp(float16);\n"
32447"int16 __ovld __cnfn convert_int16_rtn(float16);\n"
32448"int16 __ovld __cnfn convert_int16_sat_rtn(float16);\n"
32449"int16 __ovld __cnfn convert_int16(float16);\n"
32450"int16 __ovld __cnfn convert_int16_sat(float16);\n"
32451"uint16 __ovld __cnfn convert_uint16_rte(char16);\n"
32452"uint16 __ovld __cnfn convert_uint16_sat_rte(char16);\n"
32453"uint16 __ovld __cnfn convert_uint16_rtz(char16);\n"
32454"uint16 __ovld __cnfn convert_uint16_sat_rtz(char16);\n"
32455"uint16 __ovld __cnfn convert_uint16_rtp(char16);\n"
32456"uint16 __ovld __cnfn convert_uint16_sat_rtp(char16);\n"
32457"uint16 __ovld __cnfn convert_uint16_rtn(char16);\n"
32458"uint16 __ovld __cnfn convert_uint16_sat_rtn(char16);\n"
32459"uint16 __ovld __cnfn convert_uint16(char16);\n"
32460"uint16 __ovld __cnfn convert_uint16_sat(char16);\n"
32461"uint16 __ovld __cnfn convert_uint16_rte(uchar16);\n"
32462"uint16 __ovld __cnfn convert_uint16_sat_rte(uchar16);\n"
32463"uint16 __ovld __cnfn convert_uint16_rtz(uchar16);\n"
32464"uint16 __ovld __cnfn convert_uint16_sat_rtz(uchar16);\n"
32465"uint16 __ovld __cnfn convert_uint16_rtp(uchar16);\n"
32466"uint16 __ovld __cnfn convert_uint16_sat_rtp(uchar16);\n"
32467"uint16 __ovld __cnfn convert_uint16_rtn(uchar16);\n"
32468"uint16 __ovld __cnfn convert_uint16_sat_rtn(uchar16);\n"
32469"uint16 __ovld __cnfn convert_uint16(uchar16);\n"
32470"uint16 __ovld __cnfn convert_uint16_sat(uchar16);\n"
32471"uint16 __ovld __cnfn convert_uint16_rte(short16);\n"
32472"uint16 __ovld __cnfn convert_uint16_sat_rte(short16);\n"
32473"uint16 __ovld __cnfn convert_uint16_rtz(short16);\n"
32474"uint16 __ovld __cnfn convert_uint16_sat_rtz(short16);\n"
32475"uint16 __ovld __cnfn convert_uint16_rtp(short16);\n"
32476"uint16 __ovld __cnfn convert_uint16_sat_rtp(short16);\n"
32477"uint16 __ovld __cnfn convert_uint16_rtn(short16);\n"
32478"uint16 __ovld __cnfn convert_uint16_sat_rtn(short16);\n"
32479"uint16 __ovld __cnfn convert_uint16(short16);\n"
32480"uint16 __ovld __cnfn convert_uint16_sat(short16);\n"
32481"uint16 __ovld __cnfn convert_uint16_rte(ushort16);\n"
32482"uint16 __ovld __cnfn convert_uint16_sat_rte(ushort16);\n"
32483"uint16 __ovld __cnfn convert_uint16_rtz(ushort16);\n"
32484"uint16 __ovld __cnfn convert_uint16_sat_rtz(ushort16);\n"
32485"uint16 __ovld __cnfn convert_uint16_rtp(ushort16);\n"
32486"uint16 __ovld __cnfn convert_uint16_sat_rtp(ushort16);\n"
32487"uint16 __ovld __cnfn convert_uint16_rtn(ushort16);\n"
32488"uint16 __ovld __cnfn convert_uint16_sat_rtn(ushort16);\n"
32489"uint16 __ovld __cnfn convert_uint16(ushort16);\n"
32490"uint16 __ovld __cnfn convert_uint16_sat(ushort16);\n"
32491"uint16 __ovld __cnfn convert_uint16_rte(int16);\n"
32492"uint16 __ovld __cnfn convert_uint16_sat_rte(int16);\n"
32493"uint16 __ovld __cnfn convert_uint16_rtz(int16);\n"
32494"uint16 __ovld __cnfn convert_uint16_sat_rtz(int16);\n"
32495"uint16 __ovld __cnfn convert_uint16_rtp(int16);\n"
32496"uint16 __ovld __cnfn convert_uint16_sat_rtp(int16);\n"
32497"uint16 __ovld __cnfn convert_uint16_rtn(int16);\n"
32498"uint16 __ovld __cnfn convert_uint16_sat_rtn(int16);\n"
32499"uint16 __ovld __cnfn convert_uint16(int16);\n"
32500"uint16 __ovld __cnfn convert_uint16_sat(int16);\n"
32501"uint16 __ovld __cnfn convert_uint16_rte(uint16);\n"
32502"uint16 __ovld __cnfn convert_uint16_sat_rte(uint16);\n"
32503"uint16 __ovld __cnfn convert_uint16_rtz(uint16);\n"
32504"uint16 __ovld __cnfn convert_uint16_sat_rtz(uint16);\n"
32505"uint16 __ovld __cnfn convert_uint16_rtp(uint16);\n"
32506"uint16 __ovld __cnfn convert_uint16_sat_rtp(uint16);\n"
32507"uint16 __ovld __cnfn convert_uint16_rtn(uint16);\n"
32508"uint16 __ovld __cnfn convert_uint16_sat_rtn(uint16);\n"
32509"uint16 __ovld __cnfn convert_uint16(uint16);\n"
32510"uint16 __ovld __cnfn convert_uint16_sat(uint16);\n"
32511"uint16 __ovld __cnfn convert_uint16_rte(long16);\n"
32512"uint16 __ovld __cnfn convert_uint16_sat_rte(long16);\n"
32513"uint16 __ovld __cnfn convert_uint16_rtz(long16);\n"
32514"uint16 __ovld __cnfn convert_uint16_sat_rtz(long16);\n"
32515"uint16 __ovld __cnfn convert_uint16_rtp(long16);\n"
32516"uint16 __ovld __cnfn convert_uint16_sat_rtp(long16);\n"
32517"uint16 __ovld __cnfn convert_uint16_rtn(long16);\n"
32518"uint16 __ovld __cnfn convert_uint16_sat_rtn(long16);\n"
32519"uint16 __ovld __cnfn convert_uint16(long16);\n"
32520"uint16 __ovld __cnfn convert_uint16_sat(long16);\n"
32521"uint16 __ovld __cnfn convert_uint16_rte(ulong16);\n"
32522"uint16 __ovld __cnfn convert_uint16_sat_rte(ulong16);\n"
32523"uint16 __ovld __cnfn convert_uint16_rtz(ulong16);\n"
32524"uint16 __ovld __cnfn convert_uint16_sat_rtz(ulong16);\n"
32525"uint16 __ovld __cnfn convert_uint16_rtp(ulong16);\n"
32526"uint16 __ovld __cnfn convert_uint16_sat_rtp(ulong16);\n"
32527"uint16 __ovld __cnfn convert_uint16_rtn(ulong16);\n"
32528"uint16 __ovld __cnfn convert_uint16_sat_rtn(ulong16);\n"
32529"uint16 __ovld __cnfn convert_uint16(ulong16);\n"
32530"uint16 __ovld __cnfn convert_uint16_sat(ulong16);\n"
32531"uint16 __ovld __cnfn convert_uint16_rte(float16);\n"
32532"uint16 __ovld __cnfn convert_uint16_sat_rte(float16);\n"
32533"uint16 __ovld __cnfn convert_uint16_rtz(float16);\n"
32534"uint16 __ovld __cnfn convert_uint16_sat_rtz(float16);\n"
32535"uint16 __ovld __cnfn convert_uint16_rtp(float16);\n"
32536"uint16 __ovld __cnfn convert_uint16_sat_rtp(float16);\n"
32537"uint16 __ovld __cnfn convert_uint16_rtn(float16);\n"
32538"uint16 __ovld __cnfn convert_uint16_sat_rtn(float16);\n"
32539"uint16 __ovld __cnfn convert_uint16(float16);\n"
32540"uint16 __ovld __cnfn convert_uint16_sat(float16);\n"
32541"long16 __ovld __cnfn convert_long16_rte(char16);\n"
32542"long16 __ovld __cnfn convert_long16_sat_rte(char16);\n"
32543"long16 __ovld __cnfn convert_long16_rtz(char16);\n"
32544"long16 __ovld __cnfn convert_long16_sat_rtz(char16);\n"
32545"long16 __ovld __cnfn convert_long16_rtp(char16);\n"
32546"long16 __ovld __cnfn convert_long16_sat_rtp(char16);\n"
32547"long16 __ovld __cnfn convert_long16_rtn(char16);\n"
32548"long16 __ovld __cnfn convert_long16_sat_rtn(char16);\n"
32549"long16 __ovld __cnfn convert_long16(char16);\n"
32550"long16 __ovld __cnfn convert_long16_sat(char16);\n"
32551"long16 __ovld __cnfn convert_long16_rte(uchar16);\n"
32552"long16 __ovld __cnfn convert_long16_sat_rte(uchar16);\n"
32553"long16 __ovld __cnfn convert_long16_rtz(uchar16);\n"
32554"long16 __ovld __cnfn convert_long16_sat_rtz(uchar16);\n"
32555"long16 __ovld __cnfn convert_long16_rtp(uchar16);\n"
32556"long16 __ovld __cnfn convert_long16_sat_rtp(uchar16);\n"
32557"long16 __ovld __cnfn convert_long16_rtn(uchar16);\n"
32558"long16 __ovld __cnfn convert_long16_sat_rtn(uchar16);\n"
32559"long16 __ovld __cnfn convert_long16(uchar16);\n"
32560"long16 __ovld __cnfn convert_long16_sat(uchar16);\n"
32561"long16 __ovld __cnfn convert_long16_rte(short16);\n"
32562"long16 __ovld __cnfn convert_long16_sat_rte(short16);\n"
32563"long16 __ovld __cnfn convert_long16_rtz(short16);\n"
32564"long16 __ovld __cnfn convert_long16_sat_rtz(short16);\n"
32565"long16 __ovld __cnfn convert_long16_rtp(short16);\n"
32566"long16 __ovld __cnfn convert_long16_sat_rtp(short16);\n"
32567"long16 __ovld __cnfn convert_long16_rtn(short16);\n"
32568"long16 __ovld __cnfn convert_long16_sat_rtn(short16);\n"
32569"long16 __ovld __cnfn convert_long16(short16);\n"
32570"long16 __ovld __cnfn convert_long16_sat(short16);\n"
32571"long16 __ovld __cnfn convert_long16_rte(ushort16);\n"
32572"long16 __ovld __cnfn convert_long16_sat_rte(ushort16);\n"
32573"long16 __ovld __cnfn convert_long16_rtz(ushort16);\n"
32574"long16 __ovld __cnfn convert_long16_sat_rtz(ushort16);\n"
32575"long16 __ovld __cnfn convert_long16_rtp(ushort16);\n"
32576"long16 __ovld __cnfn convert_long16_sat_rtp(ushort16);\n"
32577"long16 __ovld __cnfn convert_long16_rtn(ushort16);\n"
32578"long16 __ovld __cnfn convert_long16_sat_rtn(ushort16);\n"
32579"long16 __ovld __cnfn convert_long16(ushort16);\n"
32580"long16 __ovld __cnfn convert_long16_sat(ushort16);\n"
32581"long16 __ovld __cnfn convert_long16_rte(int16);\n"
32582"long16 __ovld __cnfn convert_long16_sat_rte(int16);\n"
32583"long16 __ovld __cnfn convert_long16_rtz(int16);\n"
32584"long16 __ovld __cnfn convert_long16_sat_rtz(int16);\n"
32585"long16 __ovld __cnfn convert_long16_rtp(int16);\n"
32586"long16 __ovld __cnfn convert_long16_sat_rtp(int16);\n"
32587"long16 __ovld __cnfn convert_long16_rtn(int16);\n"
32588"long16 __ovld __cnfn convert_long16_sat_rtn(int16);\n"
32589"long16 __ovld __cnfn convert_long16(int16);\n"
32590"long16 __ovld __cnfn convert_long16_sat(int16);\n"
32591"long16 __ovld __cnfn convert_long16_rte(uint16);\n"
32592"long16 __ovld __cnfn convert_long16_sat_rte(uint16);\n"
32593"long16 __ovld __cnfn convert_long16_rtz(uint16);\n"
32594"long16 __ovld __cnfn convert_long16_sat_rtz(uint16);\n"
32595"long16 __ovld __cnfn convert_long16_rtp(uint16);\n"
32596"long16 __ovld __cnfn convert_long16_sat_rtp(uint16);\n"
32597"long16 __ovld __cnfn convert_long16_rtn(uint16);\n"
32598"long16 __ovld __cnfn convert_long16_sat_rtn(uint16);\n"
32599"long16 __ovld __cnfn convert_long16(uint16);\n"
32600"long16 __ovld __cnfn convert_long16_sat(uint16);\n"
32601"long16 __ovld __cnfn convert_long16_rte(long16);\n"
32602"long16 __ovld __cnfn convert_long16_sat_rte(long16);\n"
32603"long16 __ovld __cnfn convert_long16_rtz(long16);\n"
32604"long16 __ovld __cnfn convert_long16_sat_rtz(long16);\n"
32605"long16 __ovld __cnfn convert_long16_rtp(long16);\n"
32606"long16 __ovld __cnfn convert_long16_sat_rtp(long16);\n"
32607"long16 __ovld __cnfn convert_long16_rtn(long16);\n"
32608"long16 __ovld __cnfn convert_long16_sat_rtn(long16);\n"
32609"long16 __ovld __cnfn convert_long16(long16);\n"
32610"long16 __ovld __cnfn convert_long16_sat(long16);\n"
32611"long16 __ovld __cnfn convert_long16_rte(ulong16);\n"
32612"long16 __ovld __cnfn convert_long16_sat_rte(ulong16);\n"
32613"long16 __ovld __cnfn convert_long16_rtz(ulong16);\n"
32614"long16 __ovld __cnfn convert_long16_sat_rtz(ulong16);\n"
32615"long16 __ovld __cnfn convert_long16_rtp(ulong16);\n"
32616"long16 __ovld __cnfn convert_long16_sat_rtp(ulong16);\n"
32617"long16 __ovld __cnfn convert_long16_rtn(ulong16);\n"
32618"long16 __ovld __cnfn convert_long16_sat_rtn(ulong16);\n"
32619"long16 __ovld __cnfn convert_long16(ulong16);\n"
32620"long16 __ovld __cnfn convert_long16_sat(ulong16);\n"
32621"long16 __ovld __cnfn convert_long16_rte(float16);\n"
32622"long16 __ovld __cnfn convert_long16_sat_rte(float16);\n"
32623"long16 __ovld __cnfn convert_long16_rtz(float16);\n"
32624"long16 __ovld __cnfn convert_long16_sat_rtz(float16);\n"
32625"long16 __ovld __cnfn convert_long16_rtp(float16);\n"
32626"long16 __ovld __cnfn convert_long16_sat_rtp(float16);\n"
32627"long16 __ovld __cnfn convert_long16_rtn(float16);\n"
32628"long16 __ovld __cnfn convert_long16_sat_rtn(float16);\n"
32629"long16 __ovld __cnfn convert_long16(float16);\n"
32630"long16 __ovld __cnfn convert_long16_sat(float16);\n"
32631"ulong16 __ovld __cnfn convert_ulong16_rte(char16);\n"
32632"ulong16 __ovld __cnfn convert_ulong16_sat_rte(char16);\n"
32633"ulong16 __ovld __cnfn convert_ulong16_rtz(char16);\n"
32634"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(char16);\n"
32635"ulong16 __ovld __cnfn convert_ulong16_rtp(char16);\n"
32636"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(char16);\n"
32637"ulong16 __ovld __cnfn convert_ulong16_rtn(char16);\n"
32638"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(char16);\n"
32639"ulong16 __ovld __cnfn convert_ulong16(char16);\n"
32640"ulong16 __ovld __cnfn convert_ulong16_sat(char16);\n"
32641"ulong16 __ovld __cnfn convert_ulong16_rte(uchar16);\n"
32642"ulong16 __ovld __cnfn convert_ulong16_sat_rte(uchar16);\n"
32643"ulong16 __ovld __cnfn convert_ulong16_rtz(uchar16);\n"
32644"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uchar16);\n"
32645"ulong16 __ovld __cnfn convert_ulong16_rtp(uchar16);\n"
32646"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uchar16);\n"
32647"ulong16 __ovld __cnfn convert_ulong16_rtn(uchar16);\n"
32648"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uchar16);\n"
32649"ulong16 __ovld __cnfn convert_ulong16(uchar16);\n"
32650"ulong16 __ovld __cnfn convert_ulong16_sat(uchar16);\n"
32651"ulong16 __ovld __cnfn convert_ulong16_rte(short16);\n"
32652"ulong16 __ovld __cnfn convert_ulong16_sat_rte(short16);\n"
32653"ulong16 __ovld __cnfn convert_ulong16_rtz(short16);\n"
32654"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(short16);\n"
32655"ulong16 __ovld __cnfn convert_ulong16_rtp(short16);\n"
32656"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(short16);\n"
32657"ulong16 __ovld __cnfn convert_ulong16_rtn(short16);\n"
32658"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(short16);\n"
32659"ulong16 __ovld __cnfn convert_ulong16(short16);\n"
32660"ulong16 __ovld __cnfn convert_ulong16_sat(short16);\n"
32661"ulong16 __ovld __cnfn convert_ulong16_rte(ushort16);\n"
32662"ulong16 __ovld __cnfn convert_ulong16_sat_rte(ushort16);\n"
32663"ulong16 __ovld __cnfn convert_ulong16_rtz(ushort16);\n"
32664"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ushort16);\n"
32665"ulong16 __ovld __cnfn convert_ulong16_rtp(ushort16);\n"
32666"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ushort16);\n"
32667"ulong16 __ovld __cnfn convert_ulong16_rtn(ushort16);\n"
32668"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ushort16);\n"
32669"ulong16 __ovld __cnfn convert_ulong16(ushort16);\n"
32670"ulong16 __ovld __cnfn convert_ulong16_sat(ushort16);\n"
32671"ulong16 __ovld __cnfn convert_ulong16_rte(int16);\n"
32672"ulong16 __ovld __cnfn convert_ulong16_sat_rte(int16);\n"
32673"ulong16 __ovld __cnfn convert_ulong16_rtz(int16);\n"
32674"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(int16);\n"
32675"ulong16 __ovld __cnfn convert_ulong16_rtp(int16);\n"
32676"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(int16);\n"
32677"ulong16 __ovld __cnfn convert_ulong16_rtn(int16);\n"
32678"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(int16);\n"
32679"ulong16 __ovld __cnfn convert_ulong16(int16);\n"
32680"ulong16 __ovld __cnfn convert_ulong16_sat(int16);\n"
32681"ulong16 __ovld __cnfn convert_ulong16_rte(uint16);\n"
32682"ulong16 __ovld __cnfn convert_ulong16_sat_rte(uint16);\n"
32683"ulong16 __ovld __cnfn convert_ulong16_rtz(uint16);\n"
32684"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uint16);\n"
32685"ulong16 __ovld __cnfn convert_ulong16_rtp(uint16);\n"
32686"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uint16);\n"
32687"ulong16 __ovld __cnfn convert_ulong16_rtn(uint16);\n"
32688"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uint16);\n"
32689"ulong16 __ovld __cnfn convert_ulong16(uint16);\n"
32690"ulong16 __ovld __cnfn convert_ulong16_sat(uint16);\n"
32691"ulong16 __ovld __cnfn convert_ulong16_rte(long16);\n"
32692"ulong16 __ovld __cnfn convert_ulong16_sat_rte(long16);\n"
32693"ulong16 __ovld __cnfn convert_ulong16_rtz(long16);\n"
32694"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(long16);\n"
32695"ulong16 __ovld __cnfn convert_ulong16_rtp(long16);\n"
32696"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(long16);\n"
32697"ulong16 __ovld __cnfn convert_ulong16_rtn(long16);\n"
32698"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(long16);\n"
32699"ulong16 __ovld __cnfn convert_ulong16(long16);\n"
32700"ulong16 __ovld __cnfn convert_ulong16_sat(long16);\n"
32701"ulong16 __ovld __cnfn convert_ulong16_rte(ulong16);\n"
32702"ulong16 __ovld __cnfn convert_ulong16_sat_rte(ulong16);\n"
32703"ulong16 __ovld __cnfn convert_ulong16_rtz(ulong16);\n"
32704"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ulong16);\n"
32705"ulong16 __ovld __cnfn convert_ulong16_rtp(ulong16);\n"
32706"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ulong16);\n"
32707"ulong16 __ovld __cnfn convert_ulong16_rtn(ulong16);\n"
32708"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ulong16);\n"
32709"ulong16 __ovld __cnfn convert_ulong16(ulong16);\n"
32710"ulong16 __ovld __cnfn convert_ulong16_sat(ulong16);\n"
32711"ulong16 __ovld __cnfn convert_ulong16_rte(float16);\n"
32712"ulong16 __ovld __cnfn convert_ulong16_sat_rte(float16);\n"
32713"ulong16 __ovld __cnfn convert_ulong16_rtz(float16);\n"
32714"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(float16);\n"
32715"ulong16 __ovld __cnfn convert_ulong16_rtp(float16);\n"
32716"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(float16);\n"
32717"ulong16 __ovld __cnfn convert_ulong16_rtn(float16);\n"
32718"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(float16);\n"
32719"ulong16 __ovld __cnfn convert_ulong16(float16);\n"
32720"ulong16 __ovld __cnfn convert_ulong16_sat(float16);\n"
32721"float16 __ovld __cnfn convert_float16_rte(char16);\n"
32722"float16 __ovld __cnfn convert_float16_rtz(char16);\n"
32723"float16 __ovld __cnfn convert_float16_rtp(char16);\n"
32724"float16 __ovld __cnfn convert_float16_rtn(char16);\n"
32725"float16 __ovld __cnfn convert_float16(char16);\n"
32726"float16 __ovld __cnfn convert_float16_rte(uchar16);\n"
32727"float16 __ovld __cnfn convert_float16_rtz(uchar16);\n"
32728"float16 __ovld __cnfn convert_float16_rtp(uchar16);\n"
32729"float16 __ovld __cnfn convert_float16_rtn(uchar16);\n"
32730"float16 __ovld __cnfn convert_float16(uchar16);\n"
32731"float16 __ovld __cnfn convert_float16_rte(short16);\n"
32732"float16 __ovld __cnfn convert_float16_rtz(short16);\n"
32733"float16 __ovld __cnfn convert_float16_rtp(short16);\n"
32734"float16 __ovld __cnfn convert_float16_rtn(short16);\n"
32735"float16 __ovld __cnfn convert_float16(short16);\n"
32736"float16 __ovld __cnfn convert_float16_rte(ushort16);\n"
32737"float16 __ovld __cnfn convert_float16_rtz(ushort16);\n"
32738"float16 __ovld __cnfn convert_float16_rtp(ushort16);\n"
32739"float16 __ovld __cnfn convert_float16_rtn(ushort16);\n"
32740"float16 __ovld __cnfn convert_float16(ushort16);\n"
32741"float16 __ovld __cnfn convert_float16_rte(int16);\n"
32742"float16 __ovld __cnfn convert_float16_rtz(int16);\n"
32743"float16 __ovld __cnfn convert_float16_rtp(int16);\n"
32744"float16 __ovld __cnfn convert_float16_rtn(int16);\n"
32745"float16 __ovld __cnfn convert_float16(int16);\n"
32746"float16 __ovld __cnfn convert_float16_rte(uint16);\n"
32747"float16 __ovld __cnfn convert_float16_rtz(uint16);\n"
32748"float16 __ovld __cnfn convert_float16_rtp(uint16);\n"
32749"float16 __ovld __cnfn convert_float16_rtn(uint16);\n"
32750"float16 __ovld __cnfn convert_float16(uint16);\n"
32751"float16 __ovld __cnfn convert_float16_rte(long16);\n"
32752"float16 __ovld __cnfn convert_float16_rtz(long16);\n"
32753"float16 __ovld __cnfn convert_float16_rtp(long16);\n"
32754"float16 __ovld __cnfn convert_float16_rtn(long16);\n"
32755"float16 __ovld __cnfn convert_float16(long16);\n"
32756"float16 __ovld __cnfn convert_float16_rte(ulong16);\n"
32757"float16 __ovld __cnfn convert_float16_rtz(ulong16);\n"
32758"float16 __ovld __cnfn convert_float16_rtp(ulong16);\n"
32759"float16 __ovld __cnfn convert_float16_rtn(ulong16);\n"
32760"float16 __ovld __cnfn convert_float16(ulong16);\n"
32761"float16 __ovld __cnfn convert_float16_rte(float16);\n"
32762"float16 __ovld __cnfn convert_float16_rtz(float16);\n"
32763"float16 __ovld __cnfn convert_float16_rtp(float16);\n"
32764"float16 __ovld __cnfn convert_float16_rtn(float16);\n"
32765"float16 __ovld __cnfn convert_float16(float16);\n"
32766"\n"
32767"// Conversions with double data type parameters or return value.\n"
32768"\n"
32769"#ifdef cl_khr_fp64\n"
32770"char __ovld __cnfn convert_char(double);\n"
32771"char __ovld __cnfn convert_char_rte(double);\n"
32772"char __ovld __cnfn convert_char_rtn(double);\n"
32773"char __ovld __cnfn convert_char_rtp(double);\n"
32774"char __ovld __cnfn convert_char_rtz(double);\n"
32775"char __ovld __cnfn convert_char_sat(double);\n"
32776"char __ovld __cnfn convert_char_sat_rte(double);\n"
32777"char __ovld __cnfn convert_char_sat_rtn(double);\n"
32778"char __ovld __cnfn convert_char_sat_rtp(double);\n"
32779"char __ovld __cnfn convert_char_sat_rtz(double);\n"
32780"char2 __ovld __cnfn convert_char2(double2);\n"
32781"char2 __ovld __cnfn convert_char2_rte(double2);\n"
32782"char2 __ovld __cnfn convert_char2_rtn(double2);\n"
32783"char2 __ovld __cnfn convert_char2_rtp(double2);\n"
32784"char2 __ovld __cnfn convert_char2_rtz(double2);\n"
32785"char2 __ovld __cnfn convert_char2_sat(double2);\n"
32786"char2 __ovld __cnfn convert_char2_sat_rte(double2);\n"
32787"char2 __ovld __cnfn convert_char2_sat_rtn(double2);\n"
32788"char2 __ovld __cnfn convert_char2_sat_rtp(double2);\n"
32789"char2 __ovld __cnfn convert_char2_sat_rtz(double2);\n"
32790"char3 __ovld __cnfn convert_char3(double3);\n"
32791"char3 __ovld __cnfn convert_char3_rte(double3);\n"
32792"char3 __ovld __cnfn convert_char3_rtn(double3);\n"
32793"char3 __ovld __cnfn convert_char3_rtp(double3);\n"
32794"char3 __ovld __cnfn convert_char3_rtz(double3);\n"
32795"char3 __ovld __cnfn convert_char3_sat(double3);\n"
32796"char3 __ovld __cnfn convert_char3_sat_rte(double3);\n"
32797"char3 __ovld __cnfn convert_char3_sat_rtn(double3);\n"
32798"char3 __ovld __cnfn convert_char3_sat_rtp(double3);\n"
32799"char3 __ovld __cnfn convert_char3_sat_rtz(double3);\n"
32800"char4 __ovld __cnfn convert_char4(double4);\n"
32801"char4 __ovld __cnfn convert_char4_rte(double4);\n"
32802"char4 __ovld __cnfn convert_char4_rtn(double4);\n"
32803"char4 __ovld __cnfn convert_char4_rtp(double4);\n"
32804"char4 __ovld __cnfn convert_char4_rtz(double4);\n"
32805"char4 __ovld __cnfn convert_char4_sat(double4);\n"
32806"char4 __ovld __cnfn convert_char4_sat_rte(double4);\n"
32807"char4 __ovld __cnfn convert_char4_sat_rtn(double4);\n"
32808"char4 __ovld __cnfn convert_char4_sat_rtp(double4);\n"
32809"char4 __ovld __cnfn convert_char4_sat_rtz(double4);\n"
32810"char8 __ovld __cnfn convert_char8(double8);\n"
32811"char8 __ovld __cnfn convert_char8_rte(double8);\n"
32812"char8 __ovld __cnfn convert_char8_rtn(double8);\n"
32813"char8 __ovld __cnfn convert_char8_rtp(double8);\n"
32814"char8 __ovld __cnfn convert_char8_rtz(double8);\n"
32815"char8 __ovld __cnfn convert_char8_sat(double8);\n"
32816"char8 __ovld __cnfn convert_char8_sat_rte(double8);\n"
32817"char8 __ovld __cnfn convert_char8_sat_rtn(double8);\n"
32818"char8 __ovld __cnfn convert_char8_sat_rtp(double8);\n"
32819"char8 __ovld __cnfn convert_char8_sat_rtz(double8);\n"
32820"char16 __ovld __cnfn convert_char16(double16);\n"
32821"char16 __ovld __cnfn convert_char16_rte(double16);\n"
32822"char16 __ovld __cnfn convert_char16_rtn(double16);\n"
32823"char16 __ovld __cnfn convert_char16_rtp(double16);\n"
32824"char16 __ovld __cnfn convert_char16_rtz(double16);\n"
32825"char16 __ovld __cnfn convert_char16_sat(double16);\n"
32826"char16 __ovld __cnfn convert_char16_sat_rte(double16);\n"
32827"char16 __ovld __cnfn convert_char16_sat_rtn(double16);\n"
32828"char16 __ovld __cnfn convert_char16_sat_rtp(double16);\n"
32829"char16 __ovld __cnfn convert_char16_sat_rtz(double16);\n"
32830"\n"
32831"uchar __ovld __cnfn convert_uchar(double);\n"
32832"uchar __ovld __cnfn convert_uchar_rte(double);\n"
32833"uchar __ovld __cnfn convert_uchar_rtn(double);\n"
32834"uchar __ovld __cnfn convert_uchar_rtp(double);\n"
32835"uchar __ovld __cnfn convert_uchar_rtz(double);\n"
32836"uchar __ovld __cnfn convert_uchar_sat(double);\n"
32837"uchar __ovld __cnfn convert_uchar_sat_rte(double);\n"
32838"uchar __ovld __cnfn convert_uchar_sat_rtn(double);\n"
32839"uchar __ovld __cnfn convert_uchar_sat_rtp(double);\n"
32840"uchar __ovld __cnfn convert_uchar_sat_rtz(double);\n"
32841"uchar2 __ovld __cnfn convert_uchar2(double2);\n"
32842"uchar2 __ovld __cnfn convert_uchar2_rte(double2);\n"
32843"uchar2 __ovld __cnfn convert_uchar2_rtn(double2);\n"
32844"uchar2 __ovld __cnfn convert_uchar2_rtp(double2);\n"
32845"uchar2 __ovld __cnfn convert_uchar2_rtz(double2);\n"
32846"uchar2 __ovld __cnfn convert_uchar2_sat(double2);\n"
32847"uchar2 __ovld __cnfn convert_uchar2_sat_rte(double2);\n"
32848"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(double2);\n"
32849"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(double2);\n"
32850"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(double2);\n"
32851"uchar3 __ovld __cnfn convert_uchar3(double3);\n"
32852"uchar3 __ovld __cnfn convert_uchar3_rte(double3);\n"
32853"uchar3 __ovld __cnfn convert_uchar3_rtn(double3);\n"
32854"uchar3 __ovld __cnfn convert_uchar3_rtp(double3);\n"
32855"uchar3 __ovld __cnfn convert_uchar3_rtz(double3);\n"
32856"uchar3 __ovld __cnfn convert_uchar3_sat(double3);\n"
32857"uchar3 __ovld __cnfn convert_uchar3_sat_rte(double3);\n"
32858"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(double3);\n"
32859"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(double3);\n"
32860"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(double3);\n"
32861"uchar4 __ovld __cnfn convert_uchar4(double4);\n"
32862"uchar4 __ovld __cnfn convert_uchar4_rte(double4);\n"
32863"uchar4 __ovld __cnfn convert_uchar4_rtn(double4);\n"
32864"uchar4 __ovld __cnfn convert_uchar4_rtp(double4);\n"
32865"uchar4 __ovld __cnfn convert_uchar4_rtz(double4);\n"
32866"uchar4 __ovld __cnfn convert_uchar4_sat(double4);\n"
32867"uchar4 __ovld __cnfn convert_uchar4_sat_rte(double4);\n"
32868"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(double4);\n"
32869"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(double4);\n"
32870"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(double4);\n"
32871"uchar8 __ovld __cnfn convert_uchar8(double8);\n"
32872"uchar8 __ovld __cnfn convert_uchar8_rte(double8);\n"
32873"uchar8 __ovld __cnfn convert_uchar8_rtn(double8);\n"
32874"uchar8 __ovld __cnfn convert_uchar8_rtp(double8);\n"
32875"uchar8 __ovld __cnfn convert_uchar8_rtz(double8);\n"
32876"uchar8 __ovld __cnfn convert_uchar8_sat(double8);\n"
32877"uchar8 __ovld __cnfn convert_uchar8_sat_rte(double8);\n"
32878"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(double8);\n"
32879"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(double8);\n"
32880"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(double8);\n"
32881"uchar16 __ovld __cnfn convert_uchar16(double16);\n"
32882"uchar16 __ovld __cnfn convert_uchar16_rte(double16);\n"
32883"uchar16 __ovld __cnfn convert_uchar16_rtn(double16);\n"
32884"uchar16 __ovld __cnfn convert_uchar16_rtp(double16);\n"
32885"uchar16 __ovld __cnfn convert_uchar16_rtz(double16);\n"
32886"uchar16 __ovld __cnfn convert_uchar16_sat(double16);\n"
32887"uchar16 __ovld __cnfn convert_uchar16_sat_rte(double16);\n"
32888"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(double16);\n"
32889"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(double16);\n"
32890"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(double16);\n"
32891"\n"
32892"short __ovld __cnfn convert_short(double);\n"
32893"short __ovld __cnfn convert_short_rte(double);\n"
32894"short __ovld __cnfn convert_short_rtn(double);\n"
32895"short __ovld __cnfn convert_short_rtp(double);\n"
32896"short __ovld __cnfn convert_short_rtz(double);\n"
32897"short __ovld __cnfn convert_short_sat(double);\n"
32898"short __ovld __cnfn convert_short_sat_rte(double);\n"
32899"short __ovld __cnfn convert_short_sat_rtn(double);\n"
32900"short __ovld __cnfn convert_short_sat_rtp(double);\n"
32901"short __ovld __cnfn convert_short_sat_rtz(double);\n"
32902"short2 __ovld __cnfn convert_short2(double2);\n"
32903"short2 __ovld __cnfn convert_short2_rte(double2);\n"
32904"short2 __ovld __cnfn convert_short2_rtn(double2);\n"
32905"short2 __ovld __cnfn convert_short2_rtp(double2);\n"
32906"short2 __ovld __cnfn convert_short2_rtz(double2);\n"
32907"short2 __ovld __cnfn convert_short2_sat(double2);\n"
32908"short2 __ovld __cnfn convert_short2_sat_rte(double2);\n"
32909"short2 __ovld __cnfn convert_short2_sat_rtn(double2);\n"
32910"short2 __ovld __cnfn convert_short2_sat_rtp(double2);\n"
32911"short2 __ovld __cnfn convert_short2_sat_rtz(double2);\n"
32912"short3 __ovld __cnfn convert_short3(double3);\n"
32913"short3 __ovld __cnfn convert_short3_rte(double3);\n"
32914"short3 __ovld __cnfn convert_short3_rtn(double3);\n"
32915"short3 __ovld __cnfn convert_short3_rtp(double3);\n"
32916"short3 __ovld __cnfn convert_short3_rtz(double3);\n"
32917"short3 __ovld __cnfn convert_short3_sat(double3);\n"
32918"short3 __ovld __cnfn convert_short3_sat_rte(double3);\n"
32919"short3 __ovld __cnfn convert_short3_sat_rtn(double3);\n"
32920"short3 __ovld __cnfn convert_short3_sat_rtp(double3);\n"
32921"short3 __ovld __cnfn convert_short3_sat_rtz(double3);\n"
32922"short4 __ovld __cnfn convert_short4(double4);\n"
32923"short4 __ovld __cnfn convert_short4_rte(double4);\n"
32924"short4 __ovld __cnfn convert_short4_rtn(double4);\n"
32925"short4 __ovld __cnfn convert_short4_rtp(double4);\n"
32926"short4 __ovld __cnfn convert_short4_rtz(double4);\n"
32927"short4 __ovld __cnfn convert_short4_sat(double4);\n"
32928"short4 __ovld __cnfn convert_short4_sat_rte(double4);\n"
32929"short4 __ovld __cnfn convert_short4_sat_rtn(double4);\n"
32930"short4 __ovld __cnfn convert_short4_sat_rtp(double4);\n"
32931"short4 __ovld __cnfn convert_short4_sat_rtz(double4);\n"
32932"short8 __ovld __cnfn convert_short8(double8);\n"
32933"short8 __ovld __cnfn convert_short8_rte(double8);\n"
32934"short8 __ovld __cnfn convert_short8_rtn(double8);\n"
32935"short8 __ovld __cnfn convert_short8_rtp(double8);\n"
32936"short8 __ovld __cnfn convert_short8_rtz(double8);\n"
32937"short8 __ovld __cnfn convert_short8_sat(double8);\n"
32938"short8 __ovld __cnfn convert_short8_sat_rte(double8);\n"
32939"short8 __ovld __cnfn convert_short8_sat_rtn(double8);\n"
32940"short8 __ovld __cnfn convert_short8_sat_rtp(double8);\n"
32941"short8 __ovld __cnfn convert_short8_sat_rtz(double8);\n"
32942"short16 __ovld __cnfn convert_short16(double16);\n"
32943"short16 __ovld __cnfn convert_short16_rte(double16);\n"
32944"short16 __ovld __cnfn convert_short16_rtn(double16);\n"
32945"short16 __ovld __cnfn convert_short16_rtp(double16);\n"
32946"short16 __ovld __cnfn convert_short16_rtz(double16);\n"
32947"short16 __ovld __cnfn convert_short16_sat(double16);\n"
32948"short16 __ovld __cnfn convert_short16_sat_rte(double16);\n"
32949"short16 __ovld __cnfn convert_short16_sat_rtn(double16);\n"
32950"short16 __ovld __cnfn convert_short16_sat_rtp(double16);\n"
32951"short16 __ovld __cnfn convert_short16_sat_rtz(double16);\n"
32952"\n"
32953"ushort __ovld __cnfn convert_ushort(double);\n"
32954"ushort __ovld __cnfn convert_ushort_rte(double);\n"
32955"ushort __ovld __cnfn convert_ushort_rtn(double);\n"
32956"ushort __ovld __cnfn convert_ushort_rtp(double);\n"
32957"ushort __ovld __cnfn convert_ushort_rtz(double);\n"
32958"ushort __ovld __cnfn convert_ushort_sat(double);\n"
32959"ushort __ovld __cnfn convert_ushort_sat_rte(double);\n"
32960"ushort __ovld __cnfn convert_ushort_sat_rtn(double);\n"
32961"ushort __ovld __cnfn convert_ushort_sat_rtp(double);\n"
32962"ushort __ovld __cnfn convert_ushort_sat_rtz(double);\n"
32963"ushort2 __ovld __cnfn convert_ushort2(double2);\n"
32964"ushort2 __ovld __cnfn convert_ushort2_rte(double2);\n"
32965"ushort2 __ovld __cnfn convert_ushort2_rtn(double2);\n"
32966"ushort2 __ovld __cnfn convert_ushort2_rtp(double2);\n"
32967"ushort2 __ovld __cnfn convert_ushort2_rtz(double2);\n"
32968"ushort2 __ovld __cnfn convert_ushort2_sat(double2);\n"
32969"ushort2 __ovld __cnfn convert_ushort2_sat_rte(double2);\n"
32970"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(double2);\n"
32971"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(double2);\n"
32972"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(double2);\n"
32973"ushort3 __ovld __cnfn convert_ushort3(double3);\n"
32974"ushort3 __ovld __cnfn convert_ushort3_rte(double3);\n"
32975"ushort3 __ovld __cnfn convert_ushort3_rtn(double3);\n"
32976"ushort3 __ovld __cnfn convert_ushort3_rtp(double3);\n"
32977"ushort3 __ovld __cnfn convert_ushort3_rtz(double3);\n"
32978"ushort3 __ovld __cnfn convert_ushort3_sat(double3);\n"
32979"ushort3 __ovld __cnfn convert_ushort3_sat_rte(double3);\n"
32980"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(double3);\n"
32981"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(double3);\n"
32982"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(double3);\n"
32983"ushort4 __ovld __cnfn convert_ushort4(double4);\n"
32984"ushort4 __ovld __cnfn convert_ushort4_rte(double4);\n"
32985"ushort4 __ovld __cnfn convert_ushort4_rtn(double4);\n"
32986"ushort4 __ovld __cnfn convert_ushort4_rtp(double4);\n"
32987"ushort4 __ovld __cnfn convert_ushort4_rtz(double4);\n"
32988"ushort4 __ovld __cnfn convert_ushort4_sat(double4);\n"
32989"ushort4 __ovld __cnfn convert_ushort4_sat_rte(double4);\n"
32990"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(double4);\n"
32991"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(double4);\n"
32992"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(double4);\n"
32993"ushort8 __ovld __cnfn convert_ushort8(double8);\n"
32994"ushort8 __ovld __cnfn convert_ushort8_rte(double8);\n"
32995"ushort8 __ovld __cnfn convert_ushort8_rtn(double8);\n"
32996"ushort8 __ovld __cnfn convert_ushort8_rtp(double8);\n"
32997"ushort8 __ovld __cnfn convert_ushort8_rtz(double8);\n"
32998"ushort8 __ovld __cnfn convert_ushort8_sat(double8);\n"
32999"ushort8 __ovld __cnfn convert_ushort8_sat_rte(double8);\n"
33000"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(double8);\n"
33001"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(double8);\n"
33002"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(double8);\n"
33003"ushort16 __ovld __cnfn convert_ushort16(double16);\n"
33004"ushort16 __ovld __cnfn convert_ushort16_rte(double16);\n"
33005"ushort16 __ovld __cnfn convert_ushort16_rtn(double16);\n"
33006"ushort16 __ovld __cnfn convert_ushort16_rtp(double16);\n"
33007"ushort16 __ovld __cnfn convert_ushort16_rtz(double16);\n"
33008"ushort16 __ovld __cnfn convert_ushort16_sat(double16);\n"
33009"ushort16 __ovld __cnfn convert_ushort16_sat_rte(double16);\n"
33010"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(double16);\n"
33011"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(double16);\n"
33012"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(double16);\n"
33013"\n"
33014"int __ovld __cnfn convert_int(double);\n"
33015"int __ovld __cnfn convert_int_rte(double);\n"
33016"int __ovld __cnfn convert_int_rtn(double);\n"
33017"int __ovld __cnfn convert_int_rtp(double);\n"
33018"int __ovld __cnfn convert_int_rtz(double);\n"
33019"int __ovld __cnfn convert_int_sat(double);\n"
33020"int __ovld __cnfn convert_int_sat_rte(double);\n"
33021"int __ovld __cnfn convert_int_sat_rtn(double);\n"
33022"int __ovld __cnfn convert_int_sat_rtp(double);\n"
33023"int __ovld __cnfn convert_int_sat_rtz(double);\n"
33024"int2 __ovld __cnfn convert_int2(double2);\n"
33025"int2 __ovld __cnfn convert_int2_rte(double2);\n"
33026"int2 __ovld __cnfn convert_int2_rtn(double2);\n"
33027"int2 __ovld __cnfn convert_int2_rtp(double2);\n"
33028"int2 __ovld __cnfn convert_int2_rtz(double2);\n"
33029"int2 __ovld __cnfn convert_int2_sat(double2);\n"
33030"int2 __ovld __cnfn convert_int2_sat_rte(double2);\n"
33031"int2 __ovld __cnfn convert_int2_sat_rtn(double2);\n"
33032"int2 __ovld __cnfn convert_int2_sat_rtp(double2);\n"
33033"int2 __ovld __cnfn convert_int2_sat_rtz(double2);\n"
33034"int3 __ovld __cnfn convert_int3(double3);\n"
33035"int3 __ovld __cnfn convert_int3_rte(double3);\n"
33036"int3 __ovld __cnfn convert_int3_rtn(double3);\n"
33037"int3 __ovld __cnfn convert_int3_rtp(double3);\n"
33038"int3 __ovld __cnfn convert_int3_rtz(double3);\n"
33039"int3 __ovld __cnfn convert_int3_sat(double3);\n"
33040"int3 __ovld __cnfn convert_int3_sat_rte(double3);\n"
33041"int3 __ovld __cnfn convert_int3_sat_rtn(double3);\n"
33042"int3 __ovld __cnfn convert_int3_sat_rtp(double3);\n"
33043"int3 __ovld __cnfn convert_int3_sat_rtz(double3);\n"
33044"int4 __ovld __cnfn convert_int4(double4);\n"
33045"int4 __ovld __cnfn convert_int4_rte(double4);\n"
33046"int4 __ovld __cnfn convert_int4_rtn(double4);\n"
33047"int4 __ovld __cnfn convert_int4_rtp(double4);\n"
33048"int4 __ovld __cnfn convert_int4_rtz(double4);\n"
33049"int4 __ovld __cnfn convert_int4_sat(double4);\n"
33050"int4 __ovld __cnfn convert_int4_sat_rte(double4);\n"
33051"int4 __ovld __cnfn convert_int4_sat_rtn(double4);\n"
33052"int4 __ovld __cnfn convert_int4_sat_rtp(double4);\n"
33053"int4 __ovld __cnfn convert_int4_sat_rtz(double4);\n"
33054"int8 __ovld __cnfn convert_int8(double8);\n"
33055"int8 __ovld __cnfn convert_int8_rte(double8);\n"
33056"int8 __ovld __cnfn convert_int8_rtn(double8);\n"
33057"int8 __ovld __cnfn convert_int8_rtp(double8);\n"
33058"int8 __ovld __cnfn convert_int8_rtz(double8);\n"
33059"int8 __ovld __cnfn convert_int8_sat(double8);\n"
33060"int8 __ovld __cnfn convert_int8_sat_rte(double8);\n"
33061"int8 __ovld __cnfn convert_int8_sat_rtn(double8);\n"
33062"int8 __ovld __cnfn convert_int8_sat_rtp(double8);\n"
33063"int8 __ovld __cnfn convert_int8_sat_rtz(double8);\n"
33064"int16 __ovld __cnfn convert_int16(double16);\n"
33065"int16 __ovld __cnfn convert_int16_rte(double16);\n"
33066"int16 __ovld __cnfn convert_int16_rtn(double16);\n"
33067"int16 __ovld __cnfn convert_int16_rtp(double16);\n"
33068"int16 __ovld __cnfn convert_int16_rtz(double16);\n"
33069"int16 __ovld __cnfn convert_int16_sat(double16);\n"
33070"int16 __ovld __cnfn convert_int16_sat_rte(double16);\n"
33071"int16 __ovld __cnfn convert_int16_sat_rtn(double16);\n"
33072"int16 __ovld __cnfn convert_int16_sat_rtp(double16);\n"
33073"int16 __ovld __cnfn convert_int16_sat_rtz(double16);\n"
33074"\n"
33075"uint __ovld __cnfn convert_uint(double);\n"
33076"uint __ovld __cnfn convert_uint_rte(double);\n"
33077"uint __ovld __cnfn convert_uint_rtn(double);\n"
33078"uint __ovld __cnfn convert_uint_rtp(double);\n"
33079"uint __ovld __cnfn convert_uint_rtz(double);\n"
33080"uint __ovld __cnfn convert_uint_sat(double);\n"
33081"uint __ovld __cnfn convert_uint_sat_rte(double);\n"
33082"uint __ovld __cnfn convert_uint_sat_rtn(double);\n"
33083"uint __ovld __cnfn convert_uint_sat_rtp(double);\n"
33084"uint __ovld __cnfn convert_uint_sat_rtz(double);\n"
33085"uint2 __ovld __cnfn convert_uint2(double2);\n"
33086"uint2 __ovld __cnfn convert_uint2_rte(double2);\n"
33087"uint2 __ovld __cnfn convert_uint2_rtn(double2);\n"
33088"uint2 __ovld __cnfn convert_uint2_rtp(double2);\n"
33089"uint2 __ovld __cnfn convert_uint2_rtz(double2);\n"
33090"uint2 __ovld __cnfn convert_uint2_sat(double2);\n"
33091"uint2 __ovld __cnfn convert_uint2_sat_rte(double2);\n"
33092"uint2 __ovld __cnfn convert_uint2_sat_rtn(double2);\n"
33093"uint2 __ovld __cnfn convert_uint2_sat_rtp(double2);\n"
33094"uint2 __ovld __cnfn convert_uint2_sat_rtz(double2);\n"
33095"uint3 __ovld __cnfn convert_uint3(double3);\n"
33096"uint3 __ovld __cnfn convert_uint3_rte(double3);\n"
33097"uint3 __ovld __cnfn convert_uint3_rtn(double3);\n"
33098"uint3 __ovld __cnfn convert_uint3_rtp(double3);\n"
33099"uint3 __ovld __cnfn convert_uint3_rtz(double3);\n"
33100"uint3 __ovld __cnfn convert_uint3_sat(double3);\n"
33101"uint3 __ovld __cnfn convert_uint3_sat_rte(double3);\n"
33102"uint3 __ovld __cnfn convert_uint3_sat_rtn(double3);\n"
33103"uint3 __ovld __cnfn convert_uint3_sat_rtp(double3);\n"
33104"uint3 __ovld __cnfn convert_uint3_sat_rtz(double3);\n"
33105"uint4 __ovld __cnfn convert_uint4(double4);\n"
33106"uint4 __ovld __cnfn convert_uint4_rte(double4);\n"
33107"uint4 __ovld __cnfn convert_uint4_rtn(double4);\n"
33108"uint4 __ovld __cnfn convert_uint4_rtp(double4);\n"
33109"uint4 __ovld __cnfn convert_uint4_rtz(double4);\n"
33110"uint4 __ovld __cnfn convert_uint4_sat(double4);\n"
33111"uint4 __ovld __cnfn convert_uint4_sat_rte(double4);\n"
33112"uint4 __ovld __cnfn convert_uint4_sat_rtn(double4);\n"
33113"uint4 __ovld __cnfn convert_uint4_sat_rtp(double4);\n"
33114"uint4 __ovld __cnfn convert_uint4_sat_rtz(double4);\n"
33115"uint8 __ovld __cnfn convert_uint8(double8);\n"
33116"uint8 __ovld __cnfn convert_uint8_rte(double8);\n"
33117"uint8 __ovld __cnfn convert_uint8_rtn(double8);\n"
33118"uint8 __ovld __cnfn convert_uint8_rtp(double8);\n"
33119"uint8 __ovld __cnfn convert_uint8_rtz(double8);\n"
33120"uint8 __ovld __cnfn convert_uint8_sat(double8);\n"
33121"uint8 __ovld __cnfn convert_uint8_sat_rte(double8);\n"
33122"uint8 __ovld __cnfn convert_uint8_sat_rtn(double8);\n"
33123"uint8 __ovld __cnfn convert_uint8_sat_rtp(double8);\n"
33124"uint8 __ovld __cnfn convert_uint8_sat_rtz(double8);\n"
33125"uint16 __ovld __cnfn convert_uint16(double16);\n"
33126"uint16 __ovld __cnfn convert_uint16_rte(double16);\n"
33127"uint16 __ovld __cnfn convert_uint16_rtn(double16);\n"
33128"uint16 __ovld __cnfn convert_uint16_rtp(double16);\n"
33129"uint16 __ovld __cnfn convert_uint16_rtz(double16);\n"
33130"uint16 __ovld __cnfn convert_uint16_sat(double16);\n"
33131"uint16 __ovld __cnfn convert_uint16_sat_rte(double16);\n"
33132"uint16 __ovld __cnfn convert_uint16_sat_rtn(double16);\n"
33133"uint16 __ovld __cnfn convert_uint16_sat_rtp(double16);\n"
33134"uint16 __ovld __cnfn convert_uint16_sat_rtz(double16);\n"
33135"\n"
33136"long __ovld __cnfn convert_long(double);\n"
33137"long __ovld __cnfn convert_long_rte(double);\n"
33138"long __ovld __cnfn convert_long_rtn(double);\n"
33139"long __ovld __cnfn convert_long_rtp(double);\n"
33140"long __ovld __cnfn convert_long_rtz(double);\n"
33141"long __ovld __cnfn convert_long_sat(double);\n"
33142"long __ovld __cnfn convert_long_sat_rte(double);\n"
33143"long __ovld __cnfn convert_long_sat_rtn(double);\n"
33144"long __ovld __cnfn convert_long_sat_rtp(double);\n"
33145"long __ovld __cnfn convert_long_sat_rtz(double);\n"
33146"long2 __ovld __cnfn convert_long2(double2);\n"
33147"long2 __ovld __cnfn convert_long2_rte(double2);\n"
33148"long2 __ovld __cnfn convert_long2_rtn(double2);\n"
33149"long2 __ovld __cnfn convert_long2_rtp(double2);\n"
33150"long2 __ovld __cnfn convert_long2_rtz(double2);\n"
33151"long2 __ovld __cnfn convert_long2_sat(double2);\n"
33152"long2 __ovld __cnfn convert_long2_sat_rte(double2);\n"
33153"long2 __ovld __cnfn convert_long2_sat_rtn(double2);\n"
33154"long2 __ovld __cnfn convert_long2_sat_rtp(double2);\n"
33155"long2 __ovld __cnfn convert_long2_sat_rtz(double2);\n"
33156"long3 __ovld __cnfn convert_long3(double3);\n"
33157"long3 __ovld __cnfn convert_long3_rte(double3);\n"
33158"long3 __ovld __cnfn convert_long3_rtn(double3);\n"
33159"long3 __ovld __cnfn convert_long3_rtp(double3);\n"
33160"long3 __ovld __cnfn convert_long3_rtz(double3);\n"
33161"long3 __ovld __cnfn convert_long3_sat(double3);\n"
33162"long3 __ovld __cnfn convert_long3_sat_rte(double3);\n"
33163"long3 __ovld __cnfn convert_long3_sat_rtn(double3);\n"
33164"long3 __ovld __cnfn convert_long3_sat_rtp(double3);\n"
33165"long3 __ovld __cnfn convert_long3_sat_rtz(double3);\n"
33166"long4 __ovld __cnfn convert_long4(double4);\n"
33167"long4 __ovld __cnfn convert_long4_rte(double4);\n"
33168"long4 __ovld __cnfn convert_long4_rtn(double4);\n"
33169"long4 __ovld __cnfn convert_long4_rtp(double4);\n"
33170"long4 __ovld __cnfn convert_long4_rtz(double4);\n"
33171"long4 __ovld __cnfn convert_long4_sat(double4);\n"
33172"long4 __ovld __cnfn convert_long4_sat_rte(double4);\n"
33173"long4 __ovld __cnfn convert_long4_sat_rtn(double4);\n"
33174"long4 __ovld __cnfn convert_long4_sat_rtp(double4);\n"
33175"long4 __ovld __cnfn convert_long4_sat_rtz(double4);\n"
33176"long8 __ovld __cnfn convert_long8(double8);\n"
33177"long8 __ovld __cnfn convert_long8_rte(double8);\n"
33178"long8 __ovld __cnfn convert_long8_rtn(double8);\n"
33179"long8 __ovld __cnfn convert_long8_rtp(double8);\n"
33180"long8 __ovld __cnfn convert_long8_rtz(double8);\n"
33181"long8 __ovld __cnfn convert_long8_sat(double8);\n"
33182"long8 __ovld __cnfn convert_long8_sat_rte(double8);\n"
33183"long8 __ovld __cnfn convert_long8_sat_rtn(double8);\n"
33184"long8 __ovld __cnfn convert_long8_sat_rtp(double8);\n"
33185"long8 __ovld __cnfn convert_long8_sat_rtz(double8);\n"
33186"long16 __ovld __cnfn convert_long16(double16);\n"
33187"long16 __ovld __cnfn convert_long16_rte(double16);\n"
33188"long16 __ovld __cnfn convert_long16_rtn(double16);\n"
33189"long16 __ovld __cnfn convert_long16_rtp(double16);\n"
33190"long16 __ovld __cnfn convert_long16_rtz(double16);\n"
33191"long16 __ovld __cnfn convert_long16_sat(double16);\n"
33192"long16 __ovld __cnfn convert_long16_sat_rte(double16);\n"
33193"long16 __ovld __cnfn convert_long16_sat_rtn(double16);\n"
33194"long16 __ovld __cnfn convert_long16_sat_rtp(double16);\n"
33195"long16 __ovld __cnfn convert_long16_sat_rtz(double16);\n"
33196"\n"
33197"ulong __ovld __cnfn convert_ulong(double);\n"
33198"ulong __ovld __cnfn convert_ulong_rte(double);\n"
33199"ulong __ovld __cnfn convert_ulong_rtn(double);\n"
33200"ulong __ovld __cnfn convert_ulong_rtp(double);\n"
33201"ulong __ovld __cnfn convert_ulong_rtz(double);\n"
33202"ulong __ovld __cnfn convert_ulong_sat(double);\n"
33203"ulong __ovld __cnfn convert_ulong_sat_rte(double);\n"
33204"ulong __ovld __cnfn convert_ulong_sat_rtn(double);\n"
33205"ulong __ovld __cnfn convert_ulong_sat_rtp(double);\n"
33206"ulong __ovld __cnfn convert_ulong_sat_rtz(double);\n"
33207"ulong2 __ovld __cnfn convert_ulong2(double2);\n"
33208"ulong2 __ovld __cnfn convert_ulong2_rte(double2);\n"
33209"ulong2 __ovld __cnfn convert_ulong2_rtn(double2);\n"
33210"ulong2 __ovld __cnfn convert_ulong2_rtp(double2);\n"
33211"ulong2 __ovld __cnfn convert_ulong2_rtz(double2);\n"
33212"ulong2 __ovld __cnfn convert_ulong2_sat(double2);\n"
33213"ulong2 __ovld __cnfn convert_ulong2_sat_rte(double2);\n"
33214"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(double2);\n"
33215"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(double2);\n"
33216"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(double2);\n"
33217"ulong3 __ovld __cnfn convert_ulong3(double3);\n"
33218"ulong3 __ovld __cnfn convert_ulong3_rte(double3);\n"
33219"ulong3 __ovld __cnfn convert_ulong3_rtn(double3);\n"
33220"ulong3 __ovld __cnfn convert_ulong3_rtp(double3);\n"
33221"ulong3 __ovld __cnfn convert_ulong3_rtz(double3);\n"
33222"ulong3 __ovld __cnfn convert_ulong3_sat(double3);\n"
33223"ulong3 __ovld __cnfn convert_ulong3_sat_rte(double3);\n"
33224"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(double3);\n"
33225"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(double3);\n"
33226"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(double3);\n"
33227"ulong4 __ovld __cnfn convert_ulong4(double4);\n"
33228"ulong4 __ovld __cnfn convert_ulong4_rte(double4);\n"
33229"ulong4 __ovld __cnfn convert_ulong4_rtn(double4);\n"
33230"ulong4 __ovld __cnfn convert_ulong4_rtp(double4);\n"
33231"ulong4 __ovld __cnfn convert_ulong4_rtz(double4);\n"
33232"ulong4 __ovld __cnfn convert_ulong4_sat(double4);\n"
33233"ulong4 __ovld __cnfn convert_ulong4_sat_rte(double4);\n"
33234"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(double4);\n"
33235"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(double4);\n"
33236"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(double4);\n"
33237"ulong8 __ovld __cnfn convert_ulong8(double8);\n"
33238"ulong8 __ovld __cnfn convert_ulong8_rte(double8);\n"
33239"ulong8 __ovld __cnfn convert_ulong8_rtn(double8);\n"
33240"ulong8 __ovld __cnfn convert_ulong8_rtp(double8);\n"
33241"ulong8 __ovld __cnfn convert_ulong8_rtz(double8);\n"
33242"ulong8 __ovld __cnfn convert_ulong8_sat(double8);\n"
33243"ulong8 __ovld __cnfn convert_ulong8_sat_rte(double8);\n"
33244"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(double8);\n"
33245"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(double8);\n"
33246"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(double8);\n"
33247"ulong16 __ovld __cnfn convert_ulong16(double16);\n"
33248"ulong16 __ovld __cnfn convert_ulong16_rte(double16);\n"
33249"ulong16 __ovld __cnfn convert_ulong16_rtn(double16);\n"
33250"ulong16 __ovld __cnfn convert_ulong16_rtp(double16);\n"
33251"ulong16 __ovld __cnfn convert_ulong16_rtz(double16);\n"
33252"ulong16 __ovld __cnfn convert_ulong16_sat(double16);\n"
33253"ulong16 __ovld __cnfn convert_ulong16_sat_rte(double16);\n"
33254"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(double16);\n"
33255"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(double16);\n"
33256"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(double16);\n"
33257"\n"
33258"float __ovld __cnfn convert_float(double);\n"
33259"float __ovld __cnfn convert_float_rte(double);\n"
33260"float __ovld __cnfn convert_float_rtn(double);\n"
33261"float __ovld __cnfn convert_float_rtp(double);\n"
33262"float __ovld __cnfn convert_float_rtz(double);\n"
33263"float2 __ovld __cnfn convert_float2(double2);\n"
33264"float2 __ovld __cnfn convert_float2_rte(double2);\n"
33265"float2 __ovld __cnfn convert_float2_rtn(double2);\n"
33266"float2 __ovld __cnfn convert_float2_rtp(double2);\n"
33267"float2 __ovld __cnfn convert_float2_rtz(double2);\n"
33268"float3 __ovld __cnfn convert_float3(double3);\n"
33269"float3 __ovld __cnfn convert_float3_rte(double3);\n"
33270"float3 __ovld __cnfn convert_float3_rtn(double3);\n"
33271"float3 __ovld __cnfn convert_float3_rtp(double3);\n"
33272"float3 __ovld __cnfn convert_float3_rtz(double3);\n"
33273"float4 __ovld __cnfn convert_float4(double4);\n"
33274"float4 __ovld __cnfn convert_float4_rte(double4);\n"
33275"float4 __ovld __cnfn convert_float4_rtn(double4);\n"
33276"float4 __ovld __cnfn convert_float4_rtp(double4);\n"
33277"float4 __ovld __cnfn convert_float4_rtz(double4);\n"
33278"float8 __ovld __cnfn convert_float8(double8);\n"
33279"float8 __ovld __cnfn convert_float8_rte(double8);\n"
33280"float8 __ovld __cnfn convert_float8_rtn(double8);\n"
33281"float8 __ovld __cnfn convert_float8_rtp(double8);\n"
33282"float8 __ovld __cnfn convert_float8_rtz(double8);\n"
33283"float16 __ovld __cnfn convert_float16(double16);\n"
33284"float16 __ovld __cnfn convert_float16_rte(double16);\n"
33285"float16 __ovld __cnfn convert_float16_rtn(double16);\n"
33286"float16 __ovld __cnfn convert_float16_rtp(double16);\n"
33287"float16 __ovld __cnfn convert_float16_rtz(double16);\n"
33288"\n"
33289"double __ovld __cnfn convert_double(char);\n"
33290"double __ovld __cnfn convert_double(double);\n"
33291"double __ovld __cnfn convert_double(float);\n"
33292"double __ovld __cnfn convert_double(int);\n"
33293"double __ovld __cnfn convert_double(long);\n"
33294"double __ovld __cnfn convert_double(short);\n"
33295"double __ovld __cnfn convert_double(uchar);\n"
33296"double __ovld __cnfn convert_double(uint);\n"
33297"double __ovld __cnfn convert_double(ulong);\n"
33298"double __ovld __cnfn convert_double(ushort);\n"
33299"double __ovld __cnfn convert_double_rte(char);\n"
33300"double __ovld __cnfn convert_double_rte(double);\n"
33301"double __ovld __cnfn convert_double_rte(float);\n"
33302"double __ovld __cnfn convert_double_rte(int);\n"
33303"double __ovld __cnfn convert_double_rte(long);\n"
33304"double __ovld __cnfn convert_double_rte(short);\n"
33305"double __ovld __cnfn convert_double_rte(uchar);\n"
33306"double __ovld __cnfn convert_double_rte(uint);\n"
33307"double __ovld __cnfn convert_double_rte(ulong);\n"
33308"double __ovld __cnfn convert_double_rte(ushort);\n"
33309"double __ovld __cnfn convert_double_rtn(char);\n"
33310"double __ovld __cnfn convert_double_rtn(double);\n"
33311"double __ovld __cnfn convert_double_rtn(float);\n"
33312"double __ovld __cnfn convert_double_rtn(int);\n"
33313"double __ovld __cnfn convert_double_rtn(long);\n"
33314"double __ovld __cnfn convert_double_rtn(short);\n"
33315"double __ovld __cnfn convert_double_rtn(uchar);\n"
33316"double __ovld __cnfn convert_double_rtn(uint);\n"
33317"double __ovld __cnfn convert_double_rtn(ulong);\n"
33318"double __ovld __cnfn convert_double_rtn(ushort);\n"
33319"double __ovld __cnfn convert_double_rtp(char);\n"
33320"double __ovld __cnfn convert_double_rtp(double);\n"
33321"double __ovld __cnfn convert_double_rtp(float);\n"
33322"double __ovld __cnfn convert_double_rtp(int);\n"
33323"double __ovld __cnfn convert_double_rtp(long);\n"
33324"double __ovld __cnfn convert_double_rtp(short);\n"
33325"double __ovld __cnfn convert_double_rtp(uchar);\n"
33326"double __ovld __cnfn convert_double_rtp(uint);\n"
33327"double __ovld __cnfn convert_double_rtp(ulong);\n"
33328"double __ovld __cnfn convert_double_rtp(ushort);\n"
33329"double __ovld __cnfn convert_double_rtz(char);\n"
33330"double __ovld __cnfn convert_double_rtz(double);\n"
33331"double __ovld __cnfn convert_double_rtz(float);\n"
33332"double __ovld __cnfn convert_double_rtz(int);\n"
33333"double __ovld __cnfn convert_double_rtz(long);\n"
33334"double __ovld __cnfn convert_double_rtz(short);\n"
33335"double __ovld __cnfn convert_double_rtz(uchar);\n"
33336"double __ovld __cnfn convert_double_rtz(uint);\n"
33337"double __ovld __cnfn convert_double_rtz(ulong);\n"
33338"double __ovld __cnfn convert_double_rtz(ushort);\n"
33339"double2 __ovld __cnfn convert_double2(char2);\n"
33340"double2 __ovld __cnfn convert_double2(double2);\n"
33341"double2 __ovld __cnfn convert_double2(float2);\n"
33342"double2 __ovld __cnfn convert_double2(int2);\n"
33343"double2 __ovld __cnfn convert_double2(long2);\n"
33344"double2 __ovld __cnfn convert_double2(short2);\n"
33345"double2 __ovld __cnfn convert_double2(uchar2);\n"
33346"double2 __ovld __cnfn convert_double2(uint2);\n"
33347"double2 __ovld __cnfn convert_double2(ulong2);\n"
33348"double2 __ovld __cnfn convert_double2(ushort2);\n"
33349"double2 __ovld __cnfn convert_double2_rte(char2);\n"
33350"double2 __ovld __cnfn convert_double2_rte(double2);\n"
33351"double2 __ovld __cnfn convert_double2_rte(float2);\n"
33352"double2 __ovld __cnfn convert_double2_rte(int2);\n"
33353"double2 __ovld __cnfn convert_double2_rte(long2);\n"
33354"double2 __ovld __cnfn convert_double2_rte(short2);\n"
33355"double2 __ovld __cnfn convert_double2_rte(uchar2);\n"
33356"double2 __ovld __cnfn convert_double2_rte(uint2);\n"
33357"double2 __ovld __cnfn convert_double2_rte(ulong2);\n"
33358"double2 __ovld __cnfn convert_double2_rte(ushort2);\n"
33359"double2 __ovld __cnfn convert_double2_rtn(char2);\n"
33360"double2 __ovld __cnfn convert_double2_rtn(double2);\n"
33361"double2 __ovld __cnfn convert_double2_rtn(float2);\n"
33362"double2 __ovld __cnfn convert_double2_rtn(int2);\n"
33363"double2 __ovld __cnfn convert_double2_rtn(long2);\n"
33364"double2 __ovld __cnfn convert_double2_rtn(short2);\n"
33365"double2 __ovld __cnfn convert_double2_rtn(uchar2);\n"
33366"double2 __ovld __cnfn convert_double2_rtn(uint2);\n"
33367"double2 __ovld __cnfn convert_double2_rtn(ulong2);\n"
33368"double2 __ovld __cnfn convert_double2_rtn(ushort2);\n"
33369"double2 __ovld __cnfn convert_double2_rtp(char2);\n"
33370"double2 __ovld __cnfn convert_double2_rtp(double2);\n"
33371"double2 __ovld __cnfn convert_double2_rtp(float2);\n"
33372"double2 __ovld __cnfn convert_double2_rtp(int2);\n"
33373"double2 __ovld __cnfn convert_double2_rtp(long2);\n"
33374"double2 __ovld __cnfn convert_double2_rtp(short2);\n"
33375"double2 __ovld __cnfn convert_double2_rtp(uchar2);\n"
33376"double2 __ovld __cnfn convert_double2_rtp(uint2);\n"
33377"double2 __ovld __cnfn convert_double2_rtp(ulong2);\n"
33378"double2 __ovld __cnfn convert_double2_rtp(ushort2);\n"
33379"double2 __ovld __cnfn convert_double2_rtz(char2);\n"
33380"double2 __ovld __cnfn convert_double2_rtz(double2);\n"
33381"double2 __ovld __cnfn convert_double2_rtz(float2);\n"
33382"double2 __ovld __cnfn convert_double2_rtz(int2);\n"
33383"double2 __ovld __cnfn convert_double2_rtz(long2);\n"
33384"double2 __ovld __cnfn convert_double2_rtz(short2);\n"
33385"double2 __ovld __cnfn convert_double2_rtz(uchar2);\n"
33386"double2 __ovld __cnfn convert_double2_rtz(uint2);\n"
33387"double2 __ovld __cnfn convert_double2_rtz(ulong2);\n"
33388"double2 __ovld __cnfn convert_double2_rtz(ushort2);\n"
33389"double3 __ovld __cnfn convert_double3(char3);\n"
33390"double3 __ovld __cnfn convert_double3(double3);\n"
33391"double3 __ovld __cnfn convert_double3(float3);\n"
33392"double3 __ovld __cnfn convert_double3(int3);\n"
33393"double3 __ovld __cnfn convert_double3(long3);\n"
33394"double3 __ovld __cnfn convert_double3(short3);\n"
33395"double3 __ovld __cnfn convert_double3(uchar3);\n"
33396"double3 __ovld __cnfn convert_double3(uint3);\n"
33397"double3 __ovld __cnfn convert_double3(ulong3);\n"
33398"double3 __ovld __cnfn convert_double3(ushort3);\n"
33399"double3 __ovld __cnfn convert_double3_rte(char3);\n"
33400"double3 __ovld __cnfn convert_double3_rte(double3);\n"
33401"double3 __ovld __cnfn convert_double3_rte(float3);\n"
33402"double3 __ovld __cnfn convert_double3_rte(int3);\n"
33403"double3 __ovld __cnfn convert_double3_rte(long3);\n"
33404"double3 __ovld __cnfn convert_double3_rte(short3);\n"
33405"double3 __ovld __cnfn convert_double3_rte(uchar3);\n"
33406"double3 __ovld __cnfn convert_double3_rte(uint3);\n"
33407"double3 __ovld __cnfn convert_double3_rte(ulong3);\n"
33408"double3 __ovld __cnfn convert_double3_rte(ushort3);\n"
33409"double3 __ovld __cnfn convert_double3_rtn(char3);\n"
33410"double3 __ovld __cnfn convert_double3_rtn(double3);\n"
33411"double3 __ovld __cnfn convert_double3_rtn(float3);\n"
33412"double3 __ovld __cnfn convert_double3_rtn(int3);\n"
33413"double3 __ovld __cnfn convert_double3_rtn(long3);\n"
33414"double3 __ovld __cnfn convert_double3_rtn(short3);\n"
33415"double3 __ovld __cnfn convert_double3_rtn(uchar3);\n"
33416"double3 __ovld __cnfn convert_double3_rtn(uint3);\n"
33417"double3 __ovld __cnfn convert_double3_rtn(ulong3);\n"
33418"double3 __ovld __cnfn convert_double3_rtn(ushort3);\n"
33419"double3 __ovld __cnfn convert_double3_rtp(char3);\n"
33420"double3 __ovld __cnfn convert_double3_rtp(double3);\n"
33421"double3 __ovld __cnfn convert_double3_rtp(float3);\n"
33422"double3 __ovld __cnfn convert_double3_rtp(int3);\n"
33423"double3 __ovld __cnfn convert_double3_rtp(long3);\n"
33424"double3 __ovld __cnfn convert_double3_rtp(short3);\n"
33425"double3 __ovld __cnfn convert_double3_rtp(uchar3);\n"
33426"double3 __ovld __cnfn convert_double3_rtp(uint3);\n"
33427"double3 __ovld __cnfn convert_double3_rtp(ulong3);\n"
33428"double3 __ovld __cnfn convert_double3_rtp(ushort3);\n"
33429"double3 __ovld __cnfn convert_double3_rtz(char3);\n"
33430"double3 __ovld __cnfn convert_double3_rtz(double3);\n"
33431"double3 __ovld __cnfn convert_double3_rtz(float3);\n"
33432"double3 __ovld __cnfn convert_double3_rtz(int3);\n"
33433"double3 __ovld __cnfn convert_double3_rtz(long3);\n"
33434"double3 __ovld __cnfn convert_double3_rtz(short3);\n"
33435"double3 __ovld __cnfn convert_double3_rtz(uchar3);\n"
33436"double3 __ovld __cnfn convert_double3_rtz(uint3);\n"
33437"double3 __ovld __cnfn convert_double3_rtz(ulong3);\n"
33438"double3 __ovld __cnfn convert_double3_rtz(ushort3);\n"
33439"double4 __ovld __cnfn convert_double4(char4);\n"
33440"double4 __ovld __cnfn convert_double4(double4);\n"
33441"double4 __ovld __cnfn convert_double4(float4);\n"
33442"double4 __ovld __cnfn convert_double4(int4);\n"
33443"double4 __ovld __cnfn convert_double4(long4);\n"
33444"double4 __ovld __cnfn convert_double4(short4);\n"
33445"double4 __ovld __cnfn convert_double4(uchar4);\n"
33446"double4 __ovld __cnfn convert_double4(uint4);\n"
33447"double4 __ovld __cnfn convert_double4(ulong4);\n"
33448"double4 __ovld __cnfn convert_double4(ushort4);\n"
33449"double4 __ovld __cnfn convert_double4_rte(char4);\n"
33450"double4 __ovld __cnfn convert_double4_rte(double4);\n"
33451"double4 __ovld __cnfn convert_double4_rte(float4);\n"
33452"double4 __ovld __cnfn convert_double4_rte(int4);\n"
33453"double4 __ovld __cnfn convert_double4_rte(long4);\n"
33454"double4 __ovld __cnfn convert_double4_rte(short4);\n"
33455"double4 __ovld __cnfn convert_double4_rte(uchar4);\n"
33456"double4 __ovld __cnfn convert_double4_rte(uint4);\n"
33457"double4 __ovld __cnfn convert_double4_rte(ulong4);\n"
33458"double4 __ovld __cnfn convert_double4_rte(ushort4);\n"
33459"double4 __ovld __cnfn convert_double4_rtn(char4);\n"
33460"double4 __ovld __cnfn convert_double4_rtn(double4);\n"
33461"double4 __ovld __cnfn convert_double4_rtn(float4);\n"
33462"double4 __ovld __cnfn convert_double4_rtn(int4);\n"
33463"double4 __ovld __cnfn convert_double4_rtn(long4);\n"
33464"double4 __ovld __cnfn convert_double4_rtn(short4);\n"
33465"double4 __ovld __cnfn convert_double4_rtn(uchar4);\n"
33466"double4 __ovld __cnfn convert_double4_rtn(uint4);\n"
33467"double4 __ovld __cnfn convert_double4_rtn(ulong4);\n"
33468"double4 __ovld __cnfn convert_double4_rtn(ushort4);\n"
33469"double4 __ovld __cnfn convert_double4_rtp(char4);\n"
33470"double4 __ovld __cnfn convert_double4_rtp(double4);\n"
33471"double4 __ovld __cnfn convert_double4_rtp(float4);\n"
33472"double4 __ovld __cnfn convert_double4_rtp(int4);\n"
33473"double4 __ovld __cnfn convert_double4_rtp(long4);\n"
33474"double4 __ovld __cnfn convert_double4_rtp(short4);\n"
33475"double4 __ovld __cnfn convert_double4_rtp(uchar4);\n"
33476"double4 __ovld __cnfn convert_double4_rtp(uint4);\n"
33477"double4 __ovld __cnfn convert_double4_rtp(ulong4);\n"
33478"double4 __ovld __cnfn convert_double4_rtp(ushort4);\n"
33479"double4 __ovld __cnfn convert_double4_rtz(char4);\n"
33480"double4 __ovld __cnfn convert_double4_rtz(double4);\n"
33481"double4 __ovld __cnfn convert_double4_rtz(float4);\n"
33482"double4 __ovld __cnfn convert_double4_rtz(int4);\n"
33483"double4 __ovld __cnfn convert_double4_rtz(long4);\n"
33484"double4 __ovld __cnfn convert_double4_rtz(short4);\n"
33485"double4 __ovld __cnfn convert_double4_rtz(uchar4);\n"
33486"double4 __ovld __cnfn convert_double4_rtz(uint4);\n"
33487"double4 __ovld __cnfn convert_double4_rtz(ulong4);\n"
33488"double4 __ovld __cnfn convert_double4_rtz(ushort4);\n"
33489"double8 __ovld __cnfn convert_double8(char8);\n"
33490"double8 __ovld __cnfn convert_double8(double8);\n"
33491"double8 __ovld __cnfn convert_double8(float8);\n"
33492"double8 __ovld __cnfn convert_double8(int8);\n"
33493"double8 __ovld __cnfn convert_double8(long8);\n"
33494"double8 __ovld __cnfn convert_double8(short8);\n"
33495"double8 __ovld __cnfn convert_double8(uchar8);\n"
33496"double8 __ovld __cnfn convert_double8(uint8);\n"
33497"double8 __ovld __cnfn convert_double8(ulong8);\n"
33498"double8 __ovld __cnfn convert_double8(ushort8);\n"
33499"double8 __ovld __cnfn convert_double8_rte(char8);\n"
33500"double8 __ovld __cnfn convert_double8_rte(double8);\n"
33501"double8 __ovld __cnfn convert_double8_rte(float8);\n"
33502"double8 __ovld __cnfn convert_double8_rte(int8);\n"
33503"double8 __ovld __cnfn convert_double8_rte(long8);\n"
33504"double8 __ovld __cnfn convert_double8_rte(short8);\n"
33505"double8 __ovld __cnfn convert_double8_rte(uchar8);\n"
33506"double8 __ovld __cnfn convert_double8_rte(uint8);\n"
33507"double8 __ovld __cnfn convert_double8_rte(ulong8);\n"
33508"double8 __ovld __cnfn convert_double8_rte(ushort8);\n"
33509"double8 __ovld __cnfn convert_double8_rtn(char8);\n"
33510"double8 __ovld __cnfn convert_double8_rtn(double8);\n"
33511"double8 __ovld __cnfn convert_double8_rtn(float8);\n"
33512"double8 __ovld __cnfn convert_double8_rtn(int8);\n"
33513"double8 __ovld __cnfn convert_double8_rtn(long8);\n"
33514"double8 __ovld __cnfn convert_double8_rtn(short8);\n"
33515"double8 __ovld __cnfn convert_double8_rtn(uchar8);\n"
33516"double8 __ovld __cnfn convert_double8_rtn(uint8);\n"
33517"double8 __ovld __cnfn convert_double8_rtn(ulong8);\n"
33518"double8 __ovld __cnfn convert_double8_rtn(ushort8);\n"
33519"double8 __ovld __cnfn convert_double8_rtp(char8);\n"
33520"double8 __ovld __cnfn convert_double8_rtp(double8);\n"
33521"double8 __ovld __cnfn convert_double8_rtp(float8);\n"
33522"double8 __ovld __cnfn convert_double8_rtp(int8);\n"
33523"double8 __ovld __cnfn convert_double8_rtp(long8);\n"
33524"double8 __ovld __cnfn convert_double8_rtp(short8);\n"
33525"double8 __ovld __cnfn convert_double8_rtp(uchar8);\n"
33526"double8 __ovld __cnfn convert_double8_rtp(uint8);\n"
33527"double8 __ovld __cnfn convert_double8_rtp(ulong8);\n"
33528"double8 __ovld __cnfn convert_double8_rtp(ushort8);\n"
33529"double8 __ovld __cnfn convert_double8_rtz(char8);\n"
33530"double8 __ovld __cnfn convert_double8_rtz(double8);\n"
33531"double8 __ovld __cnfn convert_double8_rtz(float8);\n"
33532"double8 __ovld __cnfn convert_double8_rtz(int8);\n"
33533"double8 __ovld __cnfn convert_double8_rtz(long8);\n"
33534"double8 __ovld __cnfn convert_double8_rtz(short8);\n"
33535"double8 __ovld __cnfn convert_double8_rtz(uchar8);\n"
33536"double8 __ovld __cnfn convert_double8_rtz(uint8);\n"
33537"double8 __ovld __cnfn convert_double8_rtz(ulong8);\n"
33538"double8 __ovld __cnfn convert_double8_rtz(ushort8);\n"
33539"double16 __ovld __cnfn convert_double16(char16);\n"
33540"double16 __ovld __cnfn convert_double16(double16);\n"
33541"double16 __ovld __cnfn convert_double16(float16);\n"
33542"double16 __ovld __cnfn convert_double16(int16);\n"
33543"double16 __ovld __cnfn convert_double16(long16);\n"
33544"double16 __ovld __cnfn convert_double16(short16);\n"
33545"double16 __ovld __cnfn convert_double16(uchar16);\n"
33546"double16 __ovld __cnfn convert_double16(uint16);\n"
33547"double16 __ovld __cnfn convert_double16(ulong16);\n"
33548"double16 __ovld __cnfn convert_double16(ushort16);\n"
33549"double16 __ovld __cnfn convert_double16_rte(char16);\n"
33550"double16 __ovld __cnfn convert_double16_rte(double16);\n"
33551"double16 __ovld __cnfn convert_double16_rte(float16);\n"
33552"double16 __ovld __cnfn convert_double16_rte(int16);\n"
33553"double16 __ovld __cnfn convert_double16_rte(long16);\n"
33554"double16 __ovld __cnfn convert_double16_rte(short16);\n"
33555"double16 __ovld __cnfn convert_double16_rte(uchar16);\n"
33556"double16 __ovld __cnfn convert_double16_rte(uint16);\n"
33557"double16 __ovld __cnfn convert_double16_rte(ulong16);\n"
33558"double16 __ovld __cnfn convert_double16_rte(ushort16);\n"
33559"double16 __ovld __cnfn convert_double16_rtn(char16);\n"
33560"double16 __ovld __cnfn convert_double16_rtn(double16);\n"
33561"double16 __ovld __cnfn convert_double16_rtn(float16);\n"
33562"double16 __ovld __cnfn convert_double16_rtn(int16);\n"
33563"double16 __ovld __cnfn convert_double16_rtn(long16);\n"
33564"double16 __ovld __cnfn convert_double16_rtn(short16);\n"
33565"double16 __ovld __cnfn convert_double16_rtn(uchar16);\n"
33566"double16 __ovld __cnfn convert_double16_rtn(uint16);\n"
33567"double16 __ovld __cnfn convert_double16_rtn(ulong16);\n"
33568"double16 __ovld __cnfn convert_double16_rtn(ushort16);\n"
33569"double16 __ovld __cnfn convert_double16_rtp(char16);\n"
33570"double16 __ovld __cnfn convert_double16_rtp(double16);\n"
33571"double16 __ovld __cnfn convert_double16_rtp(float16);\n"
33572"double16 __ovld __cnfn convert_double16_rtp(int16);\n"
33573"double16 __ovld __cnfn convert_double16_rtp(long16);\n"
33574"double16 __ovld __cnfn convert_double16_rtp(short16);\n"
33575"double16 __ovld __cnfn convert_double16_rtp(uchar16);\n"
33576"double16 __ovld __cnfn convert_double16_rtp(uint16);\n"
33577"double16 __ovld __cnfn convert_double16_rtp(ulong16);\n"
33578"double16 __ovld __cnfn convert_double16_rtp(ushort16);\n"
33579"double16 __ovld __cnfn convert_double16_rtz(char16);\n"
33580"double16 __ovld __cnfn convert_double16_rtz(double16);\n"
33581"double16 __ovld __cnfn convert_double16_rtz(float16);\n"
33582"double16 __ovld __cnfn convert_double16_rtz(int16);\n"
33583"double16 __ovld __cnfn convert_double16_rtz(long16);\n"
33584"double16 __ovld __cnfn convert_double16_rtz(short16);\n"
33585"double16 __ovld __cnfn convert_double16_rtz(uchar16);\n"
33586"double16 __ovld __cnfn convert_double16_rtz(uint16);\n"
33587"double16 __ovld __cnfn convert_double16_rtz(ulong16);\n"
33588"double16 __ovld __cnfn convert_double16_rtz(ushort16);\n"
33589"#endif //cl_khr_fp64\n"
33590"\n"
33591"#ifdef cl_khr_fp16\n"
33592"// Convert half types to non-double types.\n"
33593"uchar __ovld __cnfn convert_uchar(half);\n"
33594"uchar __ovld __cnfn convert_uchar_rte(half);\n"
33595"uchar __ovld __cnfn convert_uchar_rtp(half);\n"
33596"uchar __ovld __cnfn convert_uchar_rtn(half);\n"
33597"uchar __ovld __cnfn convert_uchar_rtz(half);\n"
33598"uchar __ovld __cnfn convert_uchar_sat(half);\n"
33599"uchar __ovld __cnfn convert_uchar_sat_rte(half);\n"
33600"uchar __ovld __cnfn convert_uchar_sat_rtp(half);\n"
33601"uchar __ovld __cnfn convert_uchar_sat_rtn(half);\n"
33602"uchar __ovld __cnfn convert_uchar_sat_rtz(half);\n"
33603"uchar2 __ovld __cnfn convert_uchar2(half2);\n"
33604"uchar2 __ovld __cnfn convert_uchar2_rte(half2);\n"
33605"uchar2 __ovld __cnfn convert_uchar2_rtp(half2);\n"
33606"uchar2 __ovld __cnfn convert_uchar2_rtn(half2);\n"
33607"uchar2 __ovld __cnfn convert_uchar2_rtz(half2);\n"
33608"uchar2 __ovld __cnfn convert_uchar2_sat(half2);\n"
33609"uchar2 __ovld __cnfn convert_uchar2_sat_rte(half2);\n"
33610"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(half2);\n"
33611"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(half2);\n"
33612"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(half2);\n"
33613"uchar3 __ovld __cnfn convert_uchar3(half3);\n"
33614"uchar3 __ovld __cnfn convert_uchar3_rte(half3);\n"
33615"uchar3 __ovld __cnfn convert_uchar3_rtp(half3);\n"
33616"uchar3 __ovld __cnfn convert_uchar3_rtn(half3);\n"
33617"uchar3 __ovld __cnfn convert_uchar3_rtz(half3);\n"
33618"uchar3 __ovld __cnfn convert_uchar3_sat(half3);\n"
33619"uchar3 __ovld __cnfn convert_uchar3_sat_rte(half3);\n"
33620"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(half3);\n"
33621"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(half3);\n"
33622"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(half3);\n"
33623"uchar4 __ovld __cnfn convert_uchar4(half4);\n"
33624"uchar4 __ovld __cnfn convert_uchar4_rte(half4);\n"
33625"uchar4 __ovld __cnfn convert_uchar4_rtp(half4);\n"
33626"uchar4 __ovld __cnfn convert_uchar4_rtn(half4);\n"
33627"uchar4 __ovld __cnfn convert_uchar4_rtz(half4);\n"
33628"uchar4 __ovld __cnfn convert_uchar4_sat(half4);\n"
33629"uchar4 __ovld __cnfn convert_uchar4_sat_rte(half4);\n"
33630"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(half4);\n"
33631"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(half4);\n"
33632"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(half4);\n"
33633"uchar8 __ovld __cnfn convert_uchar8(half8);\n"
33634"uchar8 __ovld __cnfn convert_uchar8_rte(half8);\n"
33635"uchar8 __ovld __cnfn convert_uchar8_rtp(half8);\n"
33636"uchar8 __ovld __cnfn convert_uchar8_rtn(half8);\n"
33637"uchar8 __ovld __cnfn convert_uchar8_rtz(half8);\n"
33638"uchar8 __ovld __cnfn convert_uchar8_sat(half8);\n"
33639"uchar8 __ovld __cnfn convert_uchar8_sat_rte(half8);\n"
33640"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(half8);\n"
33641"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(half8);\n"
33642"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(half8);\n"
33643"uchar16 __ovld __cnfn convert_uchar16(half16);\n"
33644"uchar16 __ovld __cnfn convert_uchar16_rte(half16);\n"
33645"uchar16 __ovld __cnfn convert_uchar16_rtp(half16);\n"
33646"uchar16 __ovld __cnfn convert_uchar16_rtn(half16);\n"
33647"uchar16 __ovld __cnfn convert_uchar16_rtz(half16);\n"
33648"uchar16 __ovld __cnfn convert_uchar16_sat(half16);\n"
33649"uchar16 __ovld __cnfn convert_uchar16_sat_rte(half16);\n"
33650"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(half16);\n"
33651"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(half16);\n"
33652"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(half16);\n"
33653"ushort __ovld __cnfn convert_ushort(half);\n"
33654"ushort __ovld __cnfn convert_ushort_rte(half);\n"
33655"ushort __ovld __cnfn convert_ushort_rtp(half);\n"
33656"ushort __ovld __cnfn convert_ushort_rtn(half);\n"
33657"ushort __ovld __cnfn convert_ushort_rtz(half);\n"
33658"ushort __ovld __cnfn convert_ushort_sat(half);\n"
33659"ushort __ovld __cnfn convert_ushort_sat_rte(half);\n"
33660"ushort __ovld __cnfn convert_ushort_sat_rtp(half);\n"
33661"ushort __ovld __cnfn convert_ushort_sat_rtn(half);\n"
33662"ushort __ovld __cnfn convert_ushort_sat_rtz(half);\n"
33663"ushort2 __ovld __cnfn convert_ushort2(half2);\n"
33664"ushort2 __ovld __cnfn convert_ushort2_rte(half2);\n"
33665"ushort2 __ovld __cnfn convert_ushort2_rtp(half2);\n"
33666"ushort2 __ovld __cnfn convert_ushort2_rtn(half2);\n"
33667"ushort2 __ovld __cnfn convert_ushort2_rtz(half2);\n"
33668"ushort2 __ovld __cnfn convert_ushort2_sat(half2);\n"
33669"ushort2 __ovld __cnfn convert_ushort2_sat_rte(half2);\n"
33670"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(half2);\n"
33671"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(half2);\n"
33672"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(half2);\n"
33673"ushort3 __ovld __cnfn convert_ushort3(half3);\n"
33674"ushort3 __ovld __cnfn convert_ushort3_rte(half3);\n"
33675"ushort3 __ovld __cnfn convert_ushort3_rtp(half3);\n"
33676"ushort3 __ovld __cnfn convert_ushort3_rtn(half3);\n"
33677"ushort3 __ovld __cnfn convert_ushort3_rtz(half3);\n"
33678"ushort3 __ovld __cnfn convert_ushort3_sat(half3);\n"
33679"ushort3 __ovld __cnfn convert_ushort3_sat_rte(half3);\n"
33680"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(half3);\n"
33681"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(half3);\n"
33682"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(half3);\n"
33683"ushort4 __ovld __cnfn convert_ushort4(half4);\n"
33684"ushort4 __ovld __cnfn convert_ushort4_rte(half4);\n"
33685"ushort4 __ovld __cnfn convert_ushort4_rtp(half4);\n"
33686"ushort4 __ovld __cnfn convert_ushort4_rtn(half4);\n"
33687"ushort4 __ovld __cnfn convert_ushort4_rtz(half4);\n"
33688"ushort4 __ovld __cnfn convert_ushort4_sat(half4);\n"
33689"ushort4 __ovld __cnfn convert_ushort4_sat_rte(half4);\n"
33690"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(half4);\n"
33691"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(half4);\n"
33692"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(half4);\n"
33693"ushort8 __ovld __cnfn convert_ushort8(half8);\n"
33694"ushort8 __ovld __cnfn convert_ushort8_rte(half8);\n"
33695"ushort8 __ovld __cnfn convert_ushort8_rtp(half8);\n"
33696"ushort8 __ovld __cnfn convert_ushort8_rtn(half8);\n"
33697"ushort8 __ovld __cnfn convert_ushort8_rtz(half8);\n"
33698"ushort8 __ovld __cnfn convert_ushort8_sat(half8);\n"
33699"ushort8 __ovld __cnfn convert_ushort8_sat_rte(half8);\n"
33700"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(half8);\n"
33701"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(half8);\n"
33702"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(half8);\n"
33703"ushort16 __ovld __cnfn convert_ushort16(half16);\n"
33704"ushort16 __ovld __cnfn convert_ushort16_rte(half16);\n"
33705"ushort16 __ovld __cnfn convert_ushort16_rtp(half16);\n"
33706"ushort16 __ovld __cnfn convert_ushort16_rtn(half16);\n"
33707"ushort16 __ovld __cnfn convert_ushort16_rtz(half16);\n"
33708"ushort16 __ovld __cnfn convert_ushort16_sat(half16);\n"
33709"ushort16 __ovld __cnfn convert_ushort16_sat_rte(half16);\n"
33710"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(half16);\n"
33711"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(half16);\n"
33712"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(half16);\n"
33713"uint __ovld __cnfn convert_uint(half);\n"
33714"uint __ovld __cnfn convert_uint_rte(half);\n"
33715"uint __ovld __cnfn convert_uint_rtp(half);\n"
33716"uint __ovld __cnfn convert_uint_rtn(half);\n"
33717"uint __ovld __cnfn convert_uint_rtz(half);\n"
33718"uint __ovld __cnfn convert_uint_sat(half);\n"
33719"uint __ovld __cnfn convert_uint_sat_rte(half);\n"
33720"uint __ovld __cnfn convert_uint_sat_rtp(half);\n"
33721"uint __ovld __cnfn convert_uint_sat_rtn(half);\n"
33722"uint __ovld __cnfn convert_uint_sat_rtz(half);\n"
33723"uint2 __ovld __cnfn convert_uint2(half2);\n"
33724"uint2 __ovld __cnfn convert_uint2_rte(half2);\n"
33725"uint2 __ovld __cnfn convert_uint2_rtp(half2);\n"
33726"uint2 __ovld __cnfn convert_uint2_rtn(half2);\n"
33727"uint2 __ovld __cnfn convert_uint2_rtz(half2);\n"
33728"uint2 __ovld __cnfn convert_uint2_sat(half2);\n"
33729"uint2 __ovld __cnfn convert_uint2_sat_rte(half2);\n"
33730"uint2 __ovld __cnfn convert_uint2_sat_rtp(half2);\n"
33731"uint2 __ovld __cnfn convert_uint2_sat_rtn(half2);\n"
33732"uint2 __ovld __cnfn convert_uint2_sat_rtz(half2);\n"
33733"uint3 __ovld __cnfn convert_uint3(half3);\n"
33734"uint3 __ovld __cnfn convert_uint3_rte(half3);\n"
33735"uint3 __ovld __cnfn convert_uint3_rtp(half3);\n"
33736"uint3 __ovld __cnfn convert_uint3_rtn(half3);\n"
33737"uint3 __ovld __cnfn convert_uint3_rtz(half3);\n"
33738"uint3 __ovld __cnfn convert_uint3_sat(half3);\n"
33739"uint3 __ovld __cnfn convert_uint3_sat_rte(half3);\n"
33740"uint3 __ovld __cnfn convert_uint3_sat_rtp(half3);\n"
33741"uint3 __ovld __cnfn convert_uint3_sat_rtn(half3);\n"
33742"uint3 __ovld __cnfn convert_uint3_sat_rtz(half3);\n"
33743"uint4 __ovld __cnfn convert_uint4(half4);\n"
33744"uint4 __ovld __cnfn convert_uint4_rte(half4);\n"
33745"uint4 __ovld __cnfn convert_uint4_rtp(half4);\n"
33746"uint4 __ovld __cnfn convert_uint4_rtn(half4);\n"
33747"uint4 __ovld __cnfn convert_uint4_rtz(half4);\n"
33748"uint4 __ovld __cnfn convert_uint4_sat(half4);\n"
33749"uint4 __ovld __cnfn convert_uint4_sat_rte(half4);\n"
33750"uint4 __ovld __cnfn convert_uint4_sat_rtp(half4);\n"
33751"uint4 __ovld __cnfn convert_uint4_sat_rtn(half4);\n"
33752"uint4 __ovld __cnfn convert_uint4_sat_rtz(half4);\n"
33753"uint8 __ovld __cnfn convert_uint8(half8);\n"
33754"uint8 __ovld __cnfn convert_uint8_rte(half8);\n"
33755"uint8 __ovld __cnfn convert_uint8_rtp(half8);\n"
33756"uint8 __ovld __cnfn convert_uint8_rtn(half8);\n"
33757"uint8 __ovld __cnfn convert_uint8_rtz(half8);\n"
33758"uint8 __ovld __cnfn convert_uint8_sat(half8);\n"
33759"uint8 __ovld __cnfn convert_uint8_sat_rte(half8);\n"
33760"uint8 __ovld __cnfn convert_uint8_sat_rtp(half8);\n"
33761"uint8 __ovld __cnfn convert_uint8_sat_rtn(half8);\n"
33762"uint8 __ovld __cnfn convert_uint8_sat_rtz(half8);\n"
33763"uint16 __ovld __cnfn convert_uint16(half16);\n"
33764"uint16 __ovld __cnfn convert_uint16_rte(half16);\n"
33765"uint16 __ovld __cnfn convert_uint16_rtp(half16);\n"
33766"uint16 __ovld __cnfn convert_uint16_rtn(half16);\n"
33767"uint16 __ovld __cnfn convert_uint16_rtz(half16);\n"
33768"uint16 __ovld __cnfn convert_uint16_sat(half16);\n"
33769"uint16 __ovld __cnfn convert_uint16_sat_rte(half16);\n"
33770"uint16 __ovld __cnfn convert_uint16_sat_rtp(half16);\n"
33771"uint16 __ovld __cnfn convert_uint16_sat_rtn(half16);\n"
33772"uint16 __ovld __cnfn convert_uint16_sat_rtz(half16);\n"
33773"ulong __ovld __cnfn convert_ulong(half);\n"
33774"ulong __ovld __cnfn convert_ulong_rte(half);\n"
33775"ulong __ovld __cnfn convert_ulong_rtp(half);\n"
33776"ulong __ovld __cnfn convert_ulong_rtn(half);\n"
33777"ulong __ovld __cnfn convert_ulong_rtz(half);\n"
33778"ulong __ovld __cnfn convert_ulong_sat(half);\n"
33779"ulong __ovld __cnfn convert_ulong_sat_rte(half);\n"
33780"ulong __ovld __cnfn convert_ulong_sat_rtp(half);\n"
33781"ulong __ovld __cnfn convert_ulong_sat_rtn(half);\n"
33782"ulong __ovld __cnfn convert_ulong_sat_rtz(half);\n"
33783"ulong2 __ovld __cnfn convert_ulong2(half2);\n"
33784"ulong2 __ovld __cnfn convert_ulong2_rte(half2);\n"
33785"ulong2 __ovld __cnfn convert_ulong2_rtp(half2);\n"
33786"ulong2 __ovld __cnfn convert_ulong2_rtn(half2);\n"
33787"ulong2 __ovld __cnfn convert_ulong2_rtz(half2);\n"
33788"ulong2 __ovld __cnfn convert_ulong2_sat(half2);\n"
33789"ulong2 __ovld __cnfn convert_ulong2_sat_rte(half2);\n"
33790"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(half2);\n"
33791"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(half2);\n"
33792"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(half2);\n"
33793"ulong3 __ovld __cnfn convert_ulong3(half3);\n"
33794"ulong3 __ovld __cnfn convert_ulong3_rte(half3);\n"
33795"ulong3 __ovld __cnfn convert_ulong3_rtp(half3);\n"
33796"ulong3 __ovld __cnfn convert_ulong3_rtn(half3);\n"
33797"ulong3 __ovld __cnfn convert_ulong3_rtz(half3);\n"
33798"ulong3 __ovld __cnfn convert_ulong3_sat(half3);\n"
33799"ulong3 __ovld __cnfn convert_ulong3_sat_rte(half3);\n"
33800"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(half3);\n"
33801"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(half3);\n"
33802"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(half3);\n"
33803"ulong4 __ovld __cnfn convert_ulong4(half4);\n"
33804"ulong4 __ovld __cnfn convert_ulong4_rte(half4);\n"
33805"ulong4 __ovld __cnfn convert_ulong4_rtp(half4);\n"
33806"ulong4 __ovld __cnfn convert_ulong4_rtn(half4);\n"
33807"ulong4 __ovld __cnfn convert_ulong4_rtz(half4);\n"
33808"ulong4 __ovld __cnfn convert_ulong4_sat(half4);\n"
33809"ulong4 __ovld __cnfn convert_ulong4_sat_rte(half4);\n"
33810"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(half4);\n"
33811"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(half4);\n"
33812"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(half4);\n"
33813"ulong8 __ovld __cnfn convert_ulong8(half8);\n"
33814"ulong8 __ovld __cnfn convert_ulong8_rte(half8);\n"
33815"ulong8 __ovld __cnfn convert_ulong8_rtp(half8);\n"
33816"ulong8 __ovld __cnfn convert_ulong8_rtn(half8);\n"
33817"ulong8 __ovld __cnfn convert_ulong8_rtz(half8);\n"
33818"ulong8 __ovld __cnfn convert_ulong8_sat(half8);\n"
33819"ulong8 __ovld __cnfn convert_ulong8_sat_rte(half8);\n"
33820"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(half8);\n"
33821"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(half8);\n"
33822"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(half8);\n"
33823"ulong16 __ovld __cnfn convert_ulong16(half16);\n"
33824"ulong16 __ovld __cnfn convert_ulong16_rte(half16);\n"
33825"ulong16 __ovld __cnfn convert_ulong16_rtp(half16);\n"
33826"ulong16 __ovld __cnfn convert_ulong16_rtn(half16);\n"
33827"ulong16 __ovld __cnfn convert_ulong16_rtz(half16);\n"
33828"ulong16 __ovld __cnfn convert_ulong16_sat(half16);\n"
33829"ulong16 __ovld __cnfn convert_ulong16_sat_rte(half16);\n"
33830"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(half16);\n"
33831"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(half16);\n"
33832"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(half16);\n"
33833"char __ovld __cnfn convert_char(half);\n"
33834"char __ovld __cnfn convert_char_rte(half);\n"
33835"char __ovld __cnfn convert_char_rtp(half);\n"
33836"char __ovld __cnfn convert_char_rtn(half);\n"
33837"char __ovld __cnfn convert_char_rtz(half);\n"
33838"char __ovld __cnfn convert_char_sat(half);\n"
33839"char __ovld __cnfn convert_char_sat_rte(half);\n"
33840"char __ovld __cnfn convert_char_sat_rtp(half);\n"
33841"char __ovld __cnfn convert_char_sat_rtn(half);\n"
33842"char __ovld __cnfn convert_char_sat_rtz(half);\n"
33843"char2 __ovld __cnfn convert_char2(half2);\n"
33844"char2 __ovld __cnfn convert_char2_rte(half2);\n"
33845"char2 __ovld __cnfn convert_char2_rtp(half2);\n"
33846"char2 __ovld __cnfn convert_char2_rtn(half2);\n"
33847"char2 __ovld __cnfn convert_char2_rtz(half2);\n"
33848"char2 __ovld __cnfn convert_char2_sat(half2);\n"
33849"char2 __ovld __cnfn convert_char2_sat_rte(half2);\n"
33850"char2 __ovld __cnfn convert_char2_sat_rtp(half2);\n"
33851"char2 __ovld __cnfn convert_char2_sat_rtn(half2);\n"
33852"char2 __ovld __cnfn convert_char2_sat_rtz(half2);\n"
33853"char3 __ovld __cnfn convert_char3(half3);\n"
33854"char3 __ovld __cnfn convert_char3_rte(half3);\n"
33855"char3 __ovld __cnfn convert_char3_rtp(half3);\n"
33856"char3 __ovld __cnfn convert_char3_rtn(half3);\n"
33857"char3 __ovld __cnfn convert_char3_rtz(half3);\n"
33858"char3 __ovld __cnfn convert_char3_sat(half3);\n"
33859"char3 __ovld __cnfn convert_char3_sat_rte(half3);\n"
33860"char3 __ovld __cnfn convert_char3_sat_rtp(half3);\n"
33861"char3 __ovld __cnfn convert_char3_sat_rtn(half3);\n"
33862"char3 __ovld __cnfn convert_char3_sat_rtz(half3);\n"
33863"char4 __ovld __cnfn convert_char4(half4);\n"
33864"char4 __ovld __cnfn convert_char4_rte(half4);\n"
33865"char4 __ovld __cnfn convert_char4_rtp(half4);\n"
33866"char4 __ovld __cnfn convert_char4_rtn(half4);\n"
33867"char4 __ovld __cnfn convert_char4_rtz(half4);\n"
33868"char4 __ovld __cnfn convert_char4_sat(half4);\n"
33869"char4 __ovld __cnfn convert_char4_sat_rte(half4);\n"
33870"char4 __ovld __cnfn convert_char4_sat_rtp(half4);\n"
33871"char4 __ovld __cnfn convert_char4_sat_rtn(half4);\n"
33872"char4 __ovld __cnfn convert_char4_sat_rtz(half4);\n"
33873"char8 __ovld __cnfn convert_char8(half8);\n"
33874"char8 __ovld __cnfn convert_char8_rte(half8);\n"
33875"char8 __ovld __cnfn convert_char8_rtp(half8);\n"
33876"char8 __ovld __cnfn convert_char8_rtn(half8);\n"
33877"char8 __ovld __cnfn convert_char8_rtz(half8);\n"
33878"char8 __ovld __cnfn convert_char8_sat(half8);\n"
33879"char8 __ovld __cnfn convert_char8_sat_rte(half8);\n"
33880"char8 __ovld __cnfn convert_char8_sat_rtp(half8);\n"
33881"char8 __ovld __cnfn convert_char8_sat_rtn(half8);\n"
33882"char8 __ovld __cnfn convert_char8_sat_rtz(half8);\n"
33883"char16 __ovld __cnfn convert_char16(half16);\n"
33884"char16 __ovld __cnfn convert_char16_rte(half16);\n"
33885"char16 __ovld __cnfn convert_char16_rtp(half16);\n"
33886"char16 __ovld __cnfn convert_char16_rtn(half16);\n"
33887"char16 __ovld __cnfn convert_char16_rtz(half16);\n"
33888"char16 __ovld __cnfn convert_char16_sat(half16);\n"
33889"char16 __ovld __cnfn convert_char16_sat_rte(half16);\n"
33890"char16 __ovld __cnfn convert_char16_sat_rtp(half16);\n"
33891"char16 __ovld __cnfn convert_char16_sat_rtn(half16);\n"
33892"char16 __ovld __cnfn convert_char16_sat_rtz(half16);\n"
33893"short __ovld __cnfn convert_short(half);\n"
33894"short __ovld __cnfn convert_short_rte(half);\n"
33895"short __ovld __cnfn convert_short_rtp(half);\n"
33896"short __ovld __cnfn convert_short_rtn(half);\n"
33897"short __ovld __cnfn convert_short_rtz(half);\n"
33898"short __ovld __cnfn convert_short_sat(half);\n"
33899"short __ovld __cnfn convert_short_sat_rte(half);\n"
33900"short __ovld __cnfn convert_short_sat_rtp(half);\n"
33901"short __ovld __cnfn convert_short_sat_rtn(half);\n"
33902"short __ovld __cnfn convert_short_sat_rtz(half);\n"
33903"short2 __ovld __cnfn convert_short2(half2);\n"
33904"short2 __ovld __cnfn convert_short2_rte(half2);\n"
33905"short2 __ovld __cnfn convert_short2_rtp(half2);\n"
33906"short2 __ovld __cnfn convert_short2_rtn(half2);\n"
33907"short2 __ovld __cnfn convert_short2_rtz(half2);\n"
33908"short2 __ovld __cnfn convert_short2_sat(half2);\n"
33909"short2 __ovld __cnfn convert_short2_sat_rte(half2);\n"
33910"short2 __ovld __cnfn convert_short2_sat_rtp(half2);\n"
33911"short2 __ovld __cnfn convert_short2_sat_rtn(half2);\n"
33912"short2 __ovld __cnfn convert_short2_sat_rtz(half2);\n"
33913"short3 __ovld __cnfn convert_short3(half3);\n"
33914"short3 __ovld __cnfn convert_short3_rte(half3);\n"
33915"short3 __ovld __cnfn convert_short3_rtp(half3);\n"
33916"short3 __ovld __cnfn convert_short3_rtn(half3);\n"
33917"short3 __ovld __cnfn convert_short3_rtz(half3);\n"
33918"short3 __ovld __cnfn convert_short3_sat(half3);\n"
33919"short3 __ovld __cnfn convert_short3_sat_rte(half3);\n"
33920"short3 __ovld __cnfn convert_short3_sat_rtp(half3);\n"
33921"short3 __ovld __cnfn convert_short3_sat_rtn(half3);\n"
33922"short3 __ovld __cnfn convert_short3_sat_rtz(half3);\n"
33923"short4 __ovld __cnfn convert_short4(half4);\n"
33924"short4 __ovld __cnfn convert_short4_rte(half4);\n"
33925"short4 __ovld __cnfn convert_short4_rtp(half4);\n"
33926"short4 __ovld __cnfn convert_short4_rtn(half4);\n"
33927"short4 __ovld __cnfn convert_short4_rtz(half4);\n"
33928"short4 __ovld __cnfn convert_short4_sat(half4);\n"
33929"short4 __ovld __cnfn convert_short4_sat_rte(half4);\n"
33930"short4 __ovld __cnfn convert_short4_sat_rtp(half4);\n"
33931"short4 __ovld __cnfn convert_short4_sat_rtn(half4);\n"
33932"short4 __ovld __cnfn convert_short4_sat_rtz(half4);\n"
33933"short8 __ovld __cnfn convert_short8(half8);\n"
33934"short8 __ovld __cnfn convert_short8_rte(half8);\n"
33935"short8 __ovld __cnfn convert_short8_rtp(half8);\n"
33936"short8 __ovld __cnfn convert_short8_rtn(half8);\n"
33937"short8 __ovld __cnfn convert_short8_rtz(half8);\n"
33938"short8 __ovld __cnfn convert_short8_sat(half8);\n"
33939"short8 __ovld __cnfn convert_short8_sat_rte(half8);\n"
33940"short8 __ovld __cnfn convert_short8_sat_rtp(half8);\n"
33941"short8 __ovld __cnfn convert_short8_sat_rtn(half8);\n"
33942"short8 __ovld __cnfn convert_short8_sat_rtz(half8);\n"
33943"short16 __ovld __cnfn convert_short16(half16);\n"
33944"short16 __ovld __cnfn convert_short16_rte(half16);\n"
33945"short16 __ovld __cnfn convert_short16_rtp(half16);\n"
33946"short16 __ovld __cnfn convert_short16_rtn(half16);\n"
33947"short16 __ovld __cnfn convert_short16_rtz(half16);\n"
33948"short16 __ovld __cnfn convert_short16_sat(half16);\n"
33949"short16 __ovld __cnfn convert_short16_sat_rte(half16);\n"
33950"short16 __ovld __cnfn convert_short16_sat_rtp(half16);\n"
33951"short16 __ovld __cnfn convert_short16_sat_rtn(half16);\n"
33952"short16 __ovld __cnfn convert_short16_sat_rtz(half16);\n"
33953"int __ovld __cnfn convert_int(half);\n"
33954"int __ovld __cnfn convert_int_rte(half);\n"
33955"int __ovld __cnfn convert_int_rtp(half);\n"
33956"int __ovld __cnfn convert_int_rtn(half);\n"
33957"int __ovld __cnfn convert_int_rtz(half);\n"
33958"int __ovld __cnfn convert_int_sat(half);\n"
33959"int __ovld __cnfn convert_int_sat_rte(half);\n"
33960"int __ovld __cnfn convert_int_sat_rtp(half);\n"
33961"int __ovld __cnfn convert_int_sat_rtn(half);\n"
33962"int __ovld __cnfn convert_int_sat_rtz(half);\n"
33963"int2 __ovld __cnfn convert_int2(half2);\n"
33964"int2 __ovld __cnfn convert_int2_rte(half2);\n"
33965"int2 __ovld __cnfn convert_int2_rtp(half2);\n"
33966"int2 __ovld __cnfn convert_int2_rtn(half2);\n"
33967"int2 __ovld __cnfn convert_int2_rtz(half2);\n"
33968"int2 __ovld __cnfn convert_int2_sat(half2);\n"
33969"int2 __ovld __cnfn convert_int2_sat_rte(half2);\n"
33970"int2 __ovld __cnfn convert_int2_sat_rtp(half2);\n"
33971"int2 __ovld __cnfn convert_int2_sat_rtn(half2);\n"
33972"int2 __ovld __cnfn convert_int2_sat_rtz(half2);\n"
33973"int3 __ovld __cnfn convert_int3(half3);\n"
33974"int3 __ovld __cnfn convert_int3_rte(half3);\n"
33975"int3 __ovld __cnfn convert_int3_rtp(half3);\n"
33976"int3 __ovld __cnfn convert_int3_rtn(half3);\n"
33977"int3 __ovld __cnfn convert_int3_rtz(half3);\n"
33978"int3 __ovld __cnfn convert_int3_sat(half3);\n"
33979"int3 __ovld __cnfn convert_int3_sat_rte(half3);\n"
33980"int3 __ovld __cnfn convert_int3_sat_rtp(half3);\n"
33981"int3 __ovld __cnfn convert_int3_sat_rtn(half3);\n"
33982"int3 __ovld __cnfn convert_int3_sat_rtz(half3);\n"
33983"int4 __ovld __cnfn convert_int4(half4);\n"
33984"int4 __ovld __cnfn convert_int4_rte(half4);\n"
33985"int4 __ovld __cnfn convert_int4_rtp(half4);\n"
33986"int4 __ovld __cnfn convert_int4_rtn(half4);\n"
33987"int4 __ovld __cnfn convert_int4_rtz(half4);\n"
33988"int4 __ovld __cnfn convert_int4_sat(half4);\n"
33989"int4 __ovld __cnfn convert_int4_sat_rte(half4);\n"
33990"int4 __ovld __cnfn convert_int4_sat_rtp(half4);\n"
33991"int4 __ovld __cnfn convert_int4_sat_rtn(half4);\n"
33992"int4 __ovld __cnfn convert_int4_sat_rtz(half4);\n"
33993"int8 __ovld __cnfn convert_int8(half8);\n"
33994"int8 __ovld __cnfn convert_int8_rte(half8);\n"
33995"int8 __ovld __cnfn convert_int8_rtp(half8);\n"
33996"int8 __ovld __cnfn convert_int8_rtn(half8);\n"
33997"int8 __ovld __cnfn convert_int8_rtz(half8);\n"
33998"int8 __ovld __cnfn convert_int8_sat(half8);\n"
33999"int8 __ovld __cnfn convert_int8_sat_rte(half8);\n"
34000"int8 __ovld __cnfn convert_int8_sat_rtp(half8);\n"
34001"int8 __ovld __cnfn convert_int8_sat_rtn(half8);\n"
34002"int8 __ovld __cnfn convert_int8_sat_rtz(half8);\n"
34003"int16 __ovld __cnfn convert_int16(half16);\n"
34004"int16 __ovld __cnfn convert_int16_rte(half16);\n"
34005"int16 __ovld __cnfn convert_int16_rtp(half16);\n"
34006"int16 __ovld __cnfn convert_int16_rtn(half16);\n"
34007"int16 __ovld __cnfn convert_int16_rtz(half16);\n"
34008"int16 __ovld __cnfn convert_int16_sat(half16);\n"
34009"int16 __ovld __cnfn convert_int16_sat_rte(half16);\n"
34010"int16 __ovld __cnfn convert_int16_sat_rtp(half16);\n"
34011"int16 __ovld __cnfn convert_int16_sat_rtn(half16);\n"
34012"int16 __ovld __cnfn convert_int16_sat_rtz(half16);\n"
34013"long __ovld __cnfn convert_long(half);\n"
34014"long __ovld __cnfn convert_long_rte(half);\n"
34015"long __ovld __cnfn convert_long_rtp(half);\n"
34016"long __ovld __cnfn convert_long_rtn(half);\n"
34017"long __ovld __cnfn convert_long_rtz(half);\n"
34018"long __ovld __cnfn convert_long_sat(half);\n"
34019"long __ovld __cnfn convert_long_sat_rte(half);\n"
34020"long __ovld __cnfn convert_long_sat_rtp(half);\n"
34021"long __ovld __cnfn convert_long_sat_rtn(half);\n"
34022"long __ovld __cnfn convert_long_sat_rtz(half);\n"
34023"long2 __ovld __cnfn convert_long2(half2);\n"
34024"long2 __ovld __cnfn convert_long2_rte(half2);\n"
34025"long2 __ovld __cnfn convert_long2_rtp(half2);\n"
34026"long2 __ovld __cnfn convert_long2_rtn(half2);\n"
34027"long2 __ovld __cnfn convert_long2_rtz(half2);\n"
34028"long2 __ovld __cnfn convert_long2_sat(half2);\n"
34029"long2 __ovld __cnfn convert_long2_sat_rte(half2);\n"
34030"long2 __ovld __cnfn convert_long2_sat_rtp(half2);\n"
34031"long2 __ovld __cnfn convert_long2_sat_rtn(half2);\n"
34032"long2 __ovld __cnfn convert_long2_sat_rtz(half2);\n"
34033"long3 __ovld __cnfn convert_long3(half3);\n"
34034"long3 __ovld __cnfn convert_long3_rte(half3);\n"
34035"long3 __ovld __cnfn convert_long3_rtp(half3);\n"
34036"long3 __ovld __cnfn convert_long3_rtn(half3);\n"
34037"long3 __ovld __cnfn convert_long3_rtz(half3);\n"
34038"long3 __ovld __cnfn convert_long3_sat(half3);\n"
34039"long3 __ovld __cnfn convert_long3_sat_rte(half3);\n"
34040"long3 __ovld __cnfn convert_long3_sat_rtp(half3);\n"
34041"long3 __ovld __cnfn convert_long3_sat_rtn(half3);\n"
34042"long3 __ovld __cnfn convert_long3_sat_rtz(half3);\n"
34043"long4 __ovld __cnfn convert_long4(half4);\n"
34044"long4 __ovld __cnfn convert_long4_rte(half4);\n"
34045"long4 __ovld __cnfn convert_long4_rtp(half4);\n"
34046"long4 __ovld __cnfn convert_long4_rtn(half4);\n"
34047"long4 __ovld __cnfn convert_long4_rtz(half4);\n"
34048"long4 __ovld __cnfn convert_long4_sat(half4);\n"
34049"long4 __ovld __cnfn convert_long4_sat_rte(half4);\n"
34050"long4 __ovld __cnfn convert_long4_sat_rtp(half4);\n"
34051"long4 __ovld __cnfn convert_long4_sat_rtn(half4);\n"
34052"long4 __ovld __cnfn convert_long4_sat_rtz(half4);\n"
34053"long8 __ovld __cnfn convert_long8(half8);\n"
34054"long8 __ovld __cnfn convert_long8_rte(half8);\n"
34055"long8 __ovld __cnfn convert_long8_rtp(half8);\n"
34056"long8 __ovld __cnfn convert_long8_rtn(half8);\n"
34057"long8 __ovld __cnfn convert_long8_rtz(half8);\n"
34058"long8 __ovld __cnfn convert_long8_sat(half8);\n"
34059"long8 __ovld __cnfn convert_long8_sat_rte(half8);\n"
34060"long8 __ovld __cnfn convert_long8_sat_rtp(half8);\n"
34061"long8 __ovld __cnfn convert_long8_sat_rtn(half8);\n"
34062"long8 __ovld __cnfn convert_long8_sat_rtz(half8);\n"
34063"long16 __ovld __cnfn convert_long16(half16);\n"
34064"long16 __ovld __cnfn convert_long16_rte(half16);\n"
34065"long16 __ovld __cnfn convert_long16_rtp(half16);\n"
34066"long16 __ovld __cnfn convert_long16_rtn(half16);\n"
34067"long16 __ovld __cnfn convert_long16_rtz(half16);\n"
34068"long16 __ovld __cnfn convert_long16_sat(half16);\n"
34069"long16 __ovld __cnfn convert_long16_sat_rte(half16);\n"
34070"long16 __ovld __cnfn convert_long16_sat_rtp(half16);\n"
34071"long16 __ovld __cnfn convert_long16_sat_rtn(half16);\n"
34072"long16 __ovld __cnfn convert_long16_sat_rtz(half16);\n"
34073"float __ovld __cnfn convert_float(half);\n"
34074"float __ovld __cnfn convert_float_rte(half);\n"
34075"float __ovld __cnfn convert_float_rtp(half);\n"
34076"float __ovld __cnfn convert_float_rtn(half);\n"
34077"float __ovld __cnfn convert_float_rtz(half);\n"
34078"float2 __ovld __cnfn convert_float2(half2);\n"
34079"float2 __ovld __cnfn convert_float2_rte(half2);\n"
34080"float2 __ovld __cnfn convert_float2_rtp(half2);\n"
34081"float2 __ovld __cnfn convert_float2_rtn(half2);\n"
34082"float2 __ovld __cnfn convert_float2_rtz(half2);\n"
34083"float3 __ovld __cnfn convert_float3(half3);\n"
34084"float3 __ovld __cnfn convert_float3_rte(half3);\n"
34085"float3 __ovld __cnfn convert_float3_rtp(half3);\n"
34086"float3 __ovld __cnfn convert_float3_rtn(half3);\n"
34087"float3 __ovld __cnfn convert_float3_rtz(half3);\n"
34088"float4 __ovld __cnfn convert_float4(half4);\n"
34089"float4 __ovld __cnfn convert_float4_rte(half4);\n"
34090"float4 __ovld __cnfn convert_float4_rtp(half4);\n"
34091"float4 __ovld __cnfn convert_float4_rtn(half4);\n"
34092"float4 __ovld __cnfn convert_float4_rtz(half4);\n"
34093"float8 __ovld __cnfn convert_float8(half8);\n"
34094"float8 __ovld __cnfn convert_float8_rte(half8);\n"
34095"float8 __ovld __cnfn convert_float8_rtp(half8);\n"
34096"float8 __ovld __cnfn convert_float8_rtn(half8);\n"
34097"float8 __ovld __cnfn convert_float8_rtz(half8);\n"
34098"float16 __ovld __cnfn convert_float16(half16);\n"
34099"float16 __ovld __cnfn convert_float16_rte(half16);\n"
34100"float16 __ovld __cnfn convert_float16_rtp(half16);\n"
34101"float16 __ovld __cnfn convert_float16_rtn(half16);\n"
34102"float16 __ovld __cnfn convert_float16_rtz(half16);\n"
34103"\n"
34104"// Convert non-double types to half types.\n"
34105"half __ovld __cnfn convert_half(uchar);\n"
34106"half __ovld __cnfn convert_half(ushort);\n"
34107"half __ovld __cnfn convert_half(uint);\n"
34108"half __ovld __cnfn convert_half(ulong);\n"
34109"half __ovld __cnfn convert_half(char);\n"
34110"half __ovld __cnfn convert_half(short);\n"
34111"half __ovld __cnfn convert_half(int);\n"
34112"half __ovld __cnfn convert_half(long);\n"
34113"half __ovld __cnfn convert_half(float);\n"
34114"half __ovld __cnfn convert_half(half);\n"
34115"half __ovld __cnfn convert_half_rte(uchar);\n"
34116"half __ovld __cnfn convert_half_rte(ushort);\n"
34117"half __ovld __cnfn convert_half_rte(uint);\n"
34118"half __ovld __cnfn convert_half_rte(ulong);\n"
34119"half __ovld __cnfn convert_half_rte(char);\n"
34120"half __ovld __cnfn convert_half_rte(short);\n"
34121"half __ovld __cnfn convert_half_rte(int);\n"
34122"half __ovld __cnfn convert_half_rte(long);\n"
34123"half __ovld __cnfn convert_half_rte(float);\n"
34124"half __ovld __cnfn convert_half_rte(half);\n"
34125"half __ovld __cnfn convert_half_rtp(uchar);\n"
34126"half __ovld __cnfn convert_half_rtp(ushort);\n"
34127"half __ovld __cnfn convert_half_rtp(uint);\n"
34128"half __ovld __cnfn convert_half_rtp(ulong);\n"
34129"half __ovld __cnfn convert_half_rtp(char);\n"
34130"half __ovld __cnfn convert_half_rtp(short);\n"
34131"half __ovld __cnfn convert_half_rtp(int);\n"
34132"half __ovld __cnfn convert_half_rtp(long);\n"
34133"half __ovld __cnfn convert_half_rtp(float);\n"
34134"half __ovld __cnfn convert_half_rtp(half);\n"
34135"half __ovld __cnfn convert_half_rtn(uchar);\n"
34136"half __ovld __cnfn convert_half_rtn(ushort);\n"
34137"half __ovld __cnfn convert_half_rtn(uint);\n"
34138"half __ovld __cnfn convert_half_rtn(ulong);\n"
34139"half __ovld __cnfn convert_half_rtn(char);\n"
34140"half __ovld __cnfn convert_half_rtn(short);\n"
34141"half __ovld __cnfn convert_half_rtn(int);\n"
34142"half __ovld __cnfn convert_half_rtn(long);\n"
34143"half __ovld __cnfn convert_half_rtn(float);\n"
34144"half __ovld __cnfn convert_half_rtn(half);\n"
34145"half __ovld __cnfn convert_half_rtz(uchar);\n"
34146"half __ovld __cnfn convert_half_rtz(ushort);\n"
34147"half __ovld __cnfn convert_half_rtz(uint);\n"
34148"half __ovld __cnfn convert_half_rtz(ulong);\n"
34149"half __ovld __cnfn convert_half_rtz(char);\n"
34150"half __ovld __cnfn convert_half_rtz(short);\n"
34151"half __ovld __cnfn convert_half_rtz(int);\n"
34152"half __ovld __cnfn convert_half_rtz(long);\n"
34153"half __ovld __cnfn convert_half_rtz(float);\n"
34154"half __ovld __cnfn convert_half_rtz(half);\n"
34155"half2 __ovld __cnfn convert_half2(char2);\n"
34156"half2 __ovld __cnfn convert_half2(uchar2);\n"
34157"half2 __ovld __cnfn convert_half2(short2);\n"
34158"half2 __ovld __cnfn convert_half2(ushort2);\n"
34159"half2 __ovld __cnfn convert_half2(int2);\n"
34160"half2 __ovld __cnfn convert_half2(uint2);\n"
34161"half2 __ovld __cnfn convert_half2(long2);\n"
34162"half2 __ovld __cnfn convert_half2(ulong2);\n"
34163"half2 __ovld __cnfn convert_half2(float2);\n"
34164"half2 __ovld __cnfn convert_half2(half2);\n"
34165"half2 __ovld __cnfn convert_half2_rte(char2);\n"
34166"half2 __ovld __cnfn convert_half2_rte(uchar2);\n"
34167"half2 __ovld __cnfn convert_half2_rte(short2);\n"
34168"half2 __ovld __cnfn convert_half2_rte(ushort2);\n"
34169"half2 __ovld __cnfn convert_half2_rte(int2);\n"
34170"half2 __ovld __cnfn convert_half2_rte(uint2);\n"
34171"half2 __ovld __cnfn convert_half2_rte(long2);\n"
34172"half2 __ovld __cnfn convert_half2_rte(ulong2);\n"
34173"half2 __ovld __cnfn convert_half2_rte(float2);\n"
34174"half2 __ovld __cnfn convert_half2_rte(half2);\n"
34175"half2 __ovld __cnfn convert_half2_rtp(char2);\n"
34176"half2 __ovld __cnfn convert_half2_rtp(uchar2);\n"
34177"half2 __ovld __cnfn convert_half2_rtp(short2);\n"
34178"half2 __ovld __cnfn convert_half2_rtp(ushort2);\n"
34179"half2 __ovld __cnfn convert_half2_rtp(int2);\n"
34180"half2 __ovld __cnfn convert_half2_rtp(uint2);\n"
34181"half2 __ovld __cnfn convert_half2_rtp(long2);\n"
34182"half2 __ovld __cnfn convert_half2_rtp(ulong2);\n"
34183"half2 __ovld __cnfn convert_half2_rtp(float2);\n"
34184"half2 __ovld __cnfn convert_half2_rtp(half2);\n"
34185"half2 __ovld __cnfn convert_half2_rtn(char2);\n"
34186"half2 __ovld __cnfn convert_half2_rtn(uchar2);\n"
34187"half2 __ovld __cnfn convert_half2_rtn(short2);\n"
34188"half2 __ovld __cnfn convert_half2_rtn(ushort2);\n"
34189"half2 __ovld __cnfn convert_half2_rtn(int2);\n"
34190"half2 __ovld __cnfn convert_half2_rtn(uint2);\n"
34191"half2 __ovld __cnfn convert_half2_rtn(long2);\n"
34192"half2 __ovld __cnfn convert_half2_rtn(ulong2);\n"
34193"half2 __ovld __cnfn convert_half2_rtn(float2);\n"
34194"half2 __ovld __cnfn convert_half2_rtn(half2);\n"
34195"half2 __ovld __cnfn convert_half2_rtz(char2);\n"
34196"half2 __ovld __cnfn convert_half2_rtz(uchar2);\n"
34197"half2 __ovld __cnfn convert_half2_rtz(short2);\n"
34198"half2 __ovld __cnfn convert_half2_rtz(ushort2);\n"
34199"half2 __ovld __cnfn convert_half2_rtz(int2);\n"
34200"half2 __ovld __cnfn convert_half2_rtz(uint2);\n"
34201"half2 __ovld __cnfn convert_half2_rtz(long2);\n"
34202"half2 __ovld __cnfn convert_half2_rtz(ulong2);\n"
34203"half2 __ovld __cnfn convert_half2_rtz(float2);\n"
34204"half2 __ovld __cnfn convert_half2_rtz(half2);\n"
34205"half3 __ovld __cnfn convert_half3(char3);\n"
34206"half3 __ovld __cnfn convert_half3(uchar3);\n"
34207"half3 __ovld __cnfn convert_half3(short3);\n"
34208"half3 __ovld __cnfn convert_half3(ushort3);\n"
34209"half3 __ovld __cnfn convert_half3(int3);\n"
34210"half3 __ovld __cnfn convert_half3(uint3);\n"
34211"half3 __ovld __cnfn convert_half3(long3);\n"
34212"half3 __ovld __cnfn convert_half3(ulong3);\n"
34213"half3 __ovld __cnfn convert_half3(float3);\n"
34214"half3 __ovld __cnfn convert_half3(half3);\n"
34215"half3 __ovld __cnfn convert_half3_rte(char3);\n"
34216"half3 __ovld __cnfn convert_half3_rte(uchar3);\n"
34217"half3 __ovld __cnfn convert_half3_rte(short3);\n"
34218"half3 __ovld __cnfn convert_half3_rte(ushort3);\n"
34219"half3 __ovld __cnfn convert_half3_rte(int3);\n"
34220"half3 __ovld __cnfn convert_half3_rte(uint3);\n"
34221"half3 __ovld __cnfn convert_half3_rte(long3);\n"
34222"half3 __ovld __cnfn convert_half3_rte(ulong3);\n"
34223"half3 __ovld __cnfn convert_half3_rte(float3);\n"
34224"half3 __ovld __cnfn convert_half3_rte(half3);\n"
34225"half3 __ovld __cnfn convert_half3_rtp(char3);\n"
34226"half3 __ovld __cnfn convert_half3_rtp(uchar3);\n"
34227"half3 __ovld __cnfn convert_half3_rtp(short3);\n"
34228"half3 __ovld __cnfn convert_half3_rtp(ushort3);\n"
34229"half3 __ovld __cnfn convert_half3_rtp(int3);\n"
34230"half3 __ovld __cnfn convert_half3_rtp(uint3);\n"
34231"half3 __ovld __cnfn convert_half3_rtp(long3);\n"
34232"half3 __ovld __cnfn convert_half3_rtp(ulong3);\n"
34233"half3 __ovld __cnfn convert_half3_rtp(float3);\n"
34234"half3 __ovld __cnfn convert_half3_rtp(half3);\n"
34235"half3 __ovld __cnfn convert_half3_rtn(char3);\n"
34236"half3 __ovld __cnfn convert_half3_rtn(uchar3);\n"
34237"half3 __ovld __cnfn convert_half3_rtn(short3);\n"
34238"half3 __ovld __cnfn convert_half3_rtn(ushort3);\n"
34239"half3 __ovld __cnfn convert_half3_rtn(int3);\n"
34240"half3 __ovld __cnfn convert_half3_rtn(uint3);\n"
34241"half3 __ovld __cnfn convert_half3_rtn(long3);\n"
34242"half3 __ovld __cnfn convert_half3_rtn(ulong3);\n"
34243"half3 __ovld __cnfn convert_half3_rtn(float3);\n"
34244"half3 __ovld __cnfn convert_half3_rtn(half3);\n"
34245"half3 __ovld __cnfn convert_half3_rtz(char3);\n"
34246"half3 __ovld __cnfn convert_half3_rtz(uchar3);\n"
34247"half3 __ovld __cnfn convert_half3_rtz(short3);\n"
34248"half3 __ovld __cnfn convert_half3_rtz(ushort3);\n"
34249"half3 __ovld __cnfn convert_half3_rtz(int3);\n"
34250"half3 __ovld __cnfn convert_half3_rtz(uint3);\n"
34251"half3 __ovld __cnfn convert_half3_rtz(long3);\n"
34252"half3 __ovld __cnfn convert_half3_rtz(ulong3);\n"
34253"half3 __ovld __cnfn convert_half3_rtz(float3);\n"
34254"half3 __ovld __cnfn convert_half3_rtz(half3);\n"
34255"half4 __ovld __cnfn convert_half4(char4);\n"
34256"half4 __ovld __cnfn convert_half4(uchar4);\n"
34257"half4 __ovld __cnfn convert_half4(short4);\n"
34258"half4 __ovld __cnfn convert_half4(ushort4);\n"
34259"half4 __ovld __cnfn convert_half4(int4);\n"
34260"half4 __ovld __cnfn convert_half4(uint4);\n"
34261"half4 __ovld __cnfn convert_half4(long4);\n"
34262"half4 __ovld __cnfn convert_half4(ulong4);\n"
34263"half4 __ovld __cnfn convert_half4(float4);\n"
34264"half4 __ovld __cnfn convert_half4(half4);\n"
34265"half4 __ovld __cnfn convert_half4_rte(char4);\n"
34266"half4 __ovld __cnfn convert_half4_rte(uchar4);\n"
34267"half4 __ovld __cnfn convert_half4_rte(short4);\n"
34268"half4 __ovld __cnfn convert_half4_rte(ushort4);\n"
34269"half4 __ovld __cnfn convert_half4_rte(int4);\n"
34270"half4 __ovld __cnfn convert_half4_rte(uint4);\n"
34271"half4 __ovld __cnfn convert_half4_rte(long4);\n"
34272"half4 __ovld __cnfn convert_half4_rte(ulong4);\n"
34273"half4 __ovld __cnfn convert_half4_rte(float4);\n"
34274"half4 __ovld __cnfn convert_half4_rte(half4);\n"
34275"half4 __ovld __cnfn convert_half4_rtp(char4);\n"
34276"half4 __ovld __cnfn convert_half4_rtp(uchar4);\n"
34277"half4 __ovld __cnfn convert_half4_rtp(short4);\n"
34278"half4 __ovld __cnfn convert_half4_rtp(ushort4);\n"
34279"half4 __ovld __cnfn convert_half4_rtp(int4);\n"
34280"half4 __ovld __cnfn convert_half4_rtp(uint4);\n"
34281"half4 __ovld __cnfn convert_half4_rtp(long4);\n"
34282"half4 __ovld __cnfn convert_half4_rtp(ulong4);\n"
34283"half4 __ovld __cnfn convert_half4_rtp(float4);\n"
34284"half4 __ovld __cnfn convert_half4_rtp(half4);\n"
34285"half4 __ovld __cnfn convert_half4_rtn(char4);\n"
34286"half4 __ovld __cnfn convert_half4_rtn(uchar4);\n"
34287"half4 __ovld __cnfn convert_half4_rtn(short4);\n"
34288"half4 __ovld __cnfn convert_half4_rtn(ushort4);\n"
34289"half4 __ovld __cnfn convert_half4_rtn(int4);\n"
34290"half4 __ovld __cnfn convert_half4_rtn(uint4);\n"
34291"half4 __ovld __cnfn convert_half4_rtn(long4);\n"
34292"half4 __ovld __cnfn convert_half4_rtn(ulong4);\n"
34293"half4 __ovld __cnfn convert_half4_rtn(float4);\n"
34294"half4 __ovld __cnfn convert_half4_rtn(half4);\n"
34295"half4 __ovld __cnfn convert_half4_rtz(char4);\n"
34296"half4 __ovld __cnfn convert_half4_rtz(uchar4);\n"
34297"half4 __ovld __cnfn convert_half4_rtz(short4);\n"
34298"half4 __ovld __cnfn convert_half4_rtz(ushort4);\n"
34299"half4 __ovld __cnfn convert_half4_rtz(int4);\n"
34300"half4 __ovld __cnfn convert_half4_rtz(uint4);\n"
34301"half4 __ovld __cnfn convert_half4_rtz(long4);\n"
34302"half4 __ovld __cnfn convert_half4_rtz(ulong4);\n"
34303"half4 __ovld __cnfn convert_half4_rtz(float4);\n"
34304"half4 __ovld __cnfn convert_half4_rtz(half4);\n"
34305"half8 __ovld __cnfn convert_half8(char8);\n"
34306"half8 __ovld __cnfn convert_half8(uchar8);\n"
34307"half8 __ovld __cnfn convert_half8(short8);\n"
34308"half8 __ovld __cnfn convert_half8(ushort8);\n"
34309"half8 __ovld __cnfn convert_half8(int8);\n"
34310"half8 __ovld __cnfn convert_half8(uint8);\n"
34311"half8 __ovld __cnfn convert_half8(long8);\n"
34312"half8 __ovld __cnfn convert_half8(ulong8);\n"
34313"half8 __ovld __cnfn convert_half8(float8);\n"
34314"half8 __ovld __cnfn convert_half8(half8);\n"
34315"half8 __ovld __cnfn convert_half8_rte(char8);\n"
34316"half8 __ovld __cnfn convert_half8_rte(uchar8);\n"
34317"half8 __ovld __cnfn convert_half8_rte(short8);\n"
34318"half8 __ovld __cnfn convert_half8_rte(ushort8);\n"
34319"half8 __ovld __cnfn convert_half8_rte(int8);\n"
34320"half8 __ovld __cnfn convert_half8_rte(uint8);\n"
34321"half8 __ovld __cnfn convert_half8_rte(long8);\n"
34322"half8 __ovld __cnfn convert_half8_rte(ulong8);\n"
34323"half8 __ovld __cnfn convert_half8_rte(float8);\n"
34324"half8 __ovld __cnfn convert_half8_rte(half8);\n"
34325"half8 __ovld __cnfn convert_half8_rtp(char8);\n"
34326"half8 __ovld __cnfn convert_half8_rtp(uchar8);\n"
34327"half8 __ovld __cnfn convert_half8_rtp(short8);\n"
34328"half8 __ovld __cnfn convert_half8_rtp(ushort8);\n"
34329"half8 __ovld __cnfn convert_half8_rtp(int8);\n"
34330"half8 __ovld __cnfn convert_half8_rtp(uint8);\n"
34331"half8 __ovld __cnfn convert_half8_rtp(long8);\n"
34332"half8 __ovld __cnfn convert_half8_rtp(ulong8);\n"
34333"half8 __ovld __cnfn convert_half8_rtp(float8);\n"
34334"half8 __ovld __cnfn convert_half8_rtp(half8);\n"
34335"half8 __ovld __cnfn convert_half8_rtn(char8);\n"
34336"half8 __ovld __cnfn convert_half8_rtn(uchar8);\n"
34337"half8 __ovld __cnfn convert_half8_rtn(short8);\n"
34338"half8 __ovld __cnfn convert_half8_rtn(ushort8);\n"
34339"half8 __ovld __cnfn convert_half8_rtn(int8);\n"
34340"half8 __ovld __cnfn convert_half8_rtn(uint8);\n"
34341"half8 __ovld __cnfn convert_half8_rtn(long8);\n"
34342"half8 __ovld __cnfn convert_half8_rtn(ulong8);\n"
34343"half8 __ovld __cnfn convert_half8_rtn(float8);\n"
34344"half8 __ovld __cnfn convert_half8_rtn(half8);\n"
34345"half8 __ovld __cnfn convert_half8_rtz(char8);\n"
34346"half8 __ovld __cnfn convert_half8_rtz(uchar8);\n"
34347"half8 __ovld __cnfn convert_half8_rtz(short8);\n"
34348"half8 __ovld __cnfn convert_half8_rtz(ushort8);\n"
34349"half8 __ovld __cnfn convert_half8_rtz(int8);\n"
34350"half8 __ovld __cnfn convert_half8_rtz(uint8);\n"
34351"half8 __ovld __cnfn convert_half8_rtz(long8);\n"
34352"half8 __ovld __cnfn convert_half8_rtz(ulong8);\n"
34353"half8 __ovld __cnfn convert_half8_rtz(float8);\n"
34354"half8 __ovld __cnfn convert_half8_rtz(half8);\n"
34355"half16 __ovld __cnfn convert_half16(char16);\n"
34356"half16 __ovld __cnfn convert_half16(uchar16);\n"
34357"half16 __ovld __cnfn convert_half16(short16);\n"
34358"half16 __ovld __cnfn convert_half16(ushort16);\n"
34359"half16 __ovld __cnfn convert_half16(int16);\n"
34360"half16 __ovld __cnfn convert_half16(uint16);\n"
34361"half16 __ovld __cnfn convert_half16(long16);\n"
34362"half16 __ovld __cnfn convert_half16(ulong16);\n"
34363"half16 __ovld __cnfn convert_half16(float16);\n"
34364"half16 __ovld __cnfn convert_half16(half16);\n"
34365"half16 __ovld __cnfn convert_half16_rte(char16);\n"
34366"half16 __ovld __cnfn convert_half16_rte(uchar16);\n"
34367"half16 __ovld __cnfn convert_half16_rte(short16);\n"
34368"half16 __ovld __cnfn convert_half16_rte(ushort16);\n"
34369"half16 __ovld __cnfn convert_half16_rte(int16);\n"
34370"half16 __ovld __cnfn convert_half16_rte(uint16);\n"
34371"half16 __ovld __cnfn convert_half16_rte(long16);\n"
34372"half16 __ovld __cnfn convert_half16_rte(ulong16);\n"
34373"half16 __ovld __cnfn convert_half16_rte(float16);\n"
34374"half16 __ovld __cnfn convert_half16_rte(half16);\n"
34375"half16 __ovld __cnfn convert_half16_rtp(char16);\n"
34376"half16 __ovld __cnfn convert_half16_rtp(uchar16);\n"
34377"half16 __ovld __cnfn convert_half16_rtp(short16);\n"
34378"half16 __ovld __cnfn convert_half16_rtp(ushort16);\n"
34379"half16 __ovld __cnfn convert_half16_rtp(int16);\n"
34380"half16 __ovld __cnfn convert_half16_rtp(uint16);\n"
34381"half16 __ovld __cnfn convert_half16_rtp(long16);\n"
34382"half16 __ovld __cnfn convert_half16_rtp(ulong16);\n"
34383"half16 __ovld __cnfn convert_half16_rtp(float16);\n"
34384"half16 __ovld __cnfn convert_half16_rtp(half16);\n"
34385"half16 __ovld __cnfn convert_half16_rtn(char16);\n"
34386"half16 __ovld __cnfn convert_half16_rtn(uchar16);\n"
34387"half16 __ovld __cnfn convert_half16_rtn(short16);\n"
34388"half16 __ovld __cnfn convert_half16_rtn(ushort16);\n"
34389"half16 __ovld __cnfn convert_half16_rtn(int16);\n"
34390"half16 __ovld __cnfn convert_half16_rtn(uint16);\n"
34391"half16 __ovld __cnfn convert_half16_rtn(long16);\n"
34392"half16 __ovld __cnfn convert_half16_rtn(ulong16);\n"
34393"half16 __ovld __cnfn convert_half16_rtn(float16);\n"
34394"half16 __ovld __cnfn convert_half16_rtn(half16);\n"
34395"half16 __ovld __cnfn convert_half16_rtz(char16);\n"
34396"half16 __ovld __cnfn convert_half16_rtz(uchar16);\n"
34397"half16 __ovld __cnfn convert_half16_rtz(short16);\n"
34398"half16 __ovld __cnfn convert_half16_rtz(ushort16);\n"
34399"half16 __ovld __cnfn convert_half16_rtz(int16);\n"
34400"half16 __ovld __cnfn convert_half16_rtz(uint16);\n"
34401"half16 __ovld __cnfn convert_half16_rtz(long16);\n"
34402"half16 __ovld __cnfn convert_half16_rtz(ulong16);\n"
34403"half16 __ovld __cnfn convert_half16_rtz(float16);\n"
34404"half16 __ovld __cnfn convert_half16_rtz(half16);\n"
34405"\n"
34406"// Convert half types to double types.\n"
34407"#ifdef cl_khr_fp64\n"
34408"double __ovld __cnfn convert_double(half);\n"
34409"double __ovld __cnfn convert_double_rte(half);\n"
34410"double __ovld __cnfn convert_double_rtp(half);\n"
34411"double __ovld __cnfn convert_double_rtn(half);\n"
34412"double __ovld __cnfn convert_double_rtz(half);\n"
34413"double2 __ovld __cnfn convert_double2(half2);\n"
34414"double2 __ovld __cnfn convert_double2_rte(half2);\n"
34415"double2 __ovld __cnfn convert_double2_rtp(half2);\n"
34416"double2 __ovld __cnfn convert_double2_rtn(half2);\n"
34417"double2 __ovld __cnfn convert_double2_rtz(half2);\n"
34418"double3 __ovld __cnfn convert_double3(half3);\n"
34419"double3 __ovld __cnfn convert_double3_rte(half3);\n"
34420"double3 __ovld __cnfn convert_double3_rtp(half3);\n"
34421"double3 __ovld __cnfn convert_double3_rtn(half3);\n"
34422"double3 __ovld __cnfn convert_double3_rtz(half3);\n"
34423"double4 __ovld __cnfn convert_double4(half4);\n"
34424"double4 __ovld __cnfn convert_double4_rte(half4);\n"
34425"double4 __ovld __cnfn convert_double4_rtp(half4);\n"
34426"double4 __ovld __cnfn convert_double4_rtn(half4);\n"
34427"double4 __ovld __cnfn convert_double4_rtz(half4);\n"
34428"double8 __ovld __cnfn convert_double8(half8);\n"
34429"double8 __ovld __cnfn convert_double8_rte(half8);\n"
34430"double8 __ovld __cnfn convert_double8_rtp(half8);\n"
34431"double8 __ovld __cnfn convert_double8_rtn(half8);\n"
34432"double8 __ovld __cnfn convert_double8_rtz(half8);\n"
34433"double16 __ovld __cnfn convert_double16(half16);\n"
34434"double16 __ovld __cnfn convert_double16_rte(half16);\n"
34435"double16 __ovld __cnfn convert_double16_rtp(half16);\n"
34436"double16 __ovld __cnfn convert_double16_rtn(half16);\n"
34437"double16 __ovld __cnfn convert_double16_rtz(half16);\n"
34438"\n"
34439"// Convert double types to half types.\n"
34440"half __ovld __cnfn convert_half(double);\n"
34441"half __ovld __cnfn convert_half_rte(double);\n"
34442"half __ovld __cnfn convert_half_rtp(double);\n"
34443"half __ovld __cnfn convert_half_rtn(double);\n"
34444"half __ovld __cnfn convert_half_rtz(double);\n"
34445"half2 __ovld __cnfn convert_half2(double2);\n"
34446"half2 __ovld __cnfn convert_half2_rte(double2);\n"
34447"half2 __ovld __cnfn convert_half2_rtp(double2);\n"
34448"half2 __ovld __cnfn convert_half2_rtn(double2);\n"
34449"half2 __ovld __cnfn convert_half2_rtz(double2);\n"
34450"half3 __ovld __cnfn convert_half3(double3);\n"
34451"half3 __ovld __cnfn convert_half3_rte(double3);\n"
34452"half3 __ovld __cnfn convert_half3_rtp(double3);\n"
34453"half3 __ovld __cnfn convert_half3_rtn(double3);\n"
34454"half3 __ovld __cnfn convert_half3_rtz(double3);\n"
34455"half4 __ovld __cnfn convert_half4(double4);\n"
34456"half4 __ovld __cnfn convert_half4_rte(double4);\n"
34457"half4 __ovld __cnfn convert_half4_rtp(double4);\n"
34458"half4 __ovld __cnfn convert_half4_rtn(double4);\n"
34459"half4 __ovld __cnfn convert_half4_rtz(double4);\n"
34460"half8 __ovld __cnfn convert_half8(double8);\n"
34461"half8 __ovld __cnfn convert_half8_rte(double8);\n"
34462"half8 __ovld __cnfn convert_half8_rtp(double8);\n"
34463"half8 __ovld __cnfn convert_half8_rtn(double8);\n"
34464"half8 __ovld __cnfn convert_half8_rtz(double8);\n"
34465"half16 __ovld __cnfn convert_half16(double16);\n"
34466"half16 __ovld __cnfn convert_half16_rte(double16);\n"
34467"half16 __ovld __cnfn convert_half16_rtp(double16);\n"
34468"half16 __ovld __cnfn convert_half16_rtn(double16);\n"
34469"half16 __ovld __cnfn convert_half16_rtz(double16);\n"
34470"#endif //cl_khr_fp64\n"
34471"\n"
34472"#endif // cl_khr_fp16\n"
34473"\n"
34474"/**\n"
34475" * OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators\n"
34476" * Reinterprets a data type as another data type of the same size\n"
34477" */\n"
34478"#define as_char(x) __builtin_astype((x), char)\n"
34479"#define as_char2(x) __builtin_astype((x), char2)\n"
34480"#define as_char3(x) __builtin_astype((x), char3)\n"
34481"#define as_char4(x) __builtin_astype((x), char4)\n"
34482"#define as_char8(x) __builtin_astype((x), char8)\n"
34483"#define as_char16(x) __builtin_astype((x), char16)\n"
34484"\n"
34485"#define as_uchar(x) __builtin_astype((x), uchar)\n"
34486"#define as_uchar2(x) __builtin_astype((x), uchar2)\n"
34487"#define as_uchar3(x) __builtin_astype((x), uchar3)\n"
34488"#define as_uchar4(x) __builtin_astype((x), uchar4)\n"
34489"#define as_uchar8(x) __builtin_astype((x), uchar8)\n"
34490"#define as_uchar16(x) __builtin_astype((x), uchar16)\n"
34491"\n"
34492"#define as_short(x) __builtin_astype((x), short)\n"
34493"#define as_short2(x) __builtin_astype((x), short2)\n"
34494"#define as_short3(x) __builtin_astype((x), short3)\n"
34495"#define as_short4(x) __builtin_astype((x), short4)\n"
34496"#define as_short8(x) __builtin_astype((x), short8)\n"
34497"#define as_short16(x) __builtin_astype((x), short16)\n"
34498"\n"
34499"#define as_ushort(x) __builtin_astype((x), ushort)\n"
34500"#define as_ushort2(x) __builtin_astype((x), ushort2)\n"
34501"#define as_ushort3(x) __builtin_astype((x), ushort3)\n"
34502"#define as_ushort4(x) __builtin_astype((x), ushort4)\n"
34503"#define as_ushort8(x) __builtin_astype((x), ushort8)\n"
34504"#define as_ushort16(x) __builtin_astype((x), ushort16)\n"
34505"\n"
34506"#define as_int(x) __builtin_astype((x), int)\n"
34507"#define as_int2(x) __builtin_astype((x), int2)\n"
34508"#define as_int3(x) __builtin_astype((x), int3)\n"
34509"#define as_int4(x) __builtin_astype((x), int4)\n"
34510"#define as_int8(x) __builtin_astype((x), int8)\n"
34511"#define as_int16(x) __builtin_astype((x), int16)\n"
34512"\n"
34513"#define as_uint(x) __builtin_astype((x), uint)\n"
34514"#define as_uint2(x) __builtin_astype((x), uint2)\n"
34515"#define as_uint3(x) __builtin_astype((x), uint3)\n"
34516"#define as_uint4(x) __builtin_astype((x), uint4)\n"
34517"#define as_uint8(x) __builtin_astype((x), uint8)\n"
34518"#define as_uint16(x) __builtin_astype((x), uint16)\n"
34519"\n"
34520"#define as_long(x) __builtin_astype((x), long)\n"
34521"#define as_long2(x) __builtin_astype((x), long2)\n"
34522"#define as_long3(x) __builtin_astype((x), long3)\n"
34523"#define as_long4(x) __builtin_astype((x), long4)\n"
34524"#define as_long8(x) __builtin_astype((x), long8)\n"
34525"#define as_long16(x) __builtin_astype((x), long16)\n"
34526"\n"
34527"#define as_ulong(x) __builtin_astype((x), ulong)\n"
34528"#define as_ulong2(x) __builtin_astype((x), ulong2)\n"
34529"#define as_ulong3(x) __builtin_astype((x), ulong3)\n"
34530"#define as_ulong4(x) __builtin_astype((x), ulong4)\n"
34531"#define as_ulong8(x) __builtin_astype((x), ulong8)\n"
34532"#define as_ulong16(x) __builtin_astype((x), ulong16)\n"
34533"\n"
34534"#define as_float(x) __builtin_astype((x), float)\n"
34535"#define as_float2(x) __builtin_astype((x), float2)\n"
34536"#define as_float3(x) __builtin_astype((x), float3)\n"
34537"#define as_float4(x) __builtin_astype((x), float4)\n"
34538"#define as_float8(x) __builtin_astype((x), float8)\n"
34539"#define as_float16(x) __builtin_astype((x), float16)\n"
34540"\n"
34541"#ifdef cl_khr_fp64\n"
34542"#define as_double(x) __builtin_astype((x), double)\n"
34543"#define as_double2(x) __builtin_astype((x), double2)\n"
34544"#define as_double3(x) __builtin_astype((x), double3)\n"
34545"#define as_double4(x) __builtin_astype((x), double4)\n"
34546"#define as_double8(x) __builtin_astype((x), double8)\n"
34547"#define as_double16(x) __builtin_astype((x), double16)\n"
34548"#endif //cl_khr_fp64\n"
34549"\n"
34550"#ifdef cl_khr_fp16\n"
34551"#define as_half(x) __builtin_astype((x), half)\n"
34552"#define as_half2(x) __builtin_astype((x), half2)\n"
34553"#define as_half3(x) __builtin_astype((x), half3)\n"
34554"#define as_half4(x) __builtin_astype((x), half4)\n"
34555"#define as_half8(x) __builtin_astype((x), half8)\n"
34556"#define as_half16(x) __builtin_astype((x), half16)\n"
34557"#endif //cl_khr_fp16\n"
34558"\n"
34559"// OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers\n"
34560"\n"
34561"#define __kernel_exec(X, typen) __kernel \\\n"
34562" __attribute__((work_group_size_hint(X, 1, 1))) \\\n"
34563" __attribute__((vec_type_hint(typen)))\n"
34564"\n"
34565"#define kernel_exec(X, typen) __kernel \\\n"
34566" __attribute__((work_group_size_hint(X, 1, 1))) \\\n"
34567" __attribute__((vec_type_hint(typen)))\n"
34568"\n"
34569"// OpenCL v1.1 s6.11.1, v1.2 s6.12.1, v2.0 s6.13.1 - Work-item Functions\n"
34570"\n"
34571"/**\n"
34572" * Returns the number of dimensions in use. This is the\n"
34573" * value given to the work_dim argument specified in\n"
34574" * clEnqueueNDRangeKernel.\n"
34575" * For clEnqueueTask, this returns 1.\n"
34576" */\n"
34577"uint __ovld __cnfn get_work_dim(void);\n"
34578"\n"
34579"/**\n"
34580" * Returns the number of global work-items specified for\n"
34581" * dimension identified by dimindx. This value is given by\n"
34582" * the global_work_size argument to\n"
34583" * clEnqueueNDRangeKernel. Valid values of dimindx\n"
34584" * are 0 to get_work_dim() - 1. For other values of\n"
34585" * dimindx, get_global_size() returns 1.\n"
34586" * For clEnqueueTask, this always returns 1.\n"
34587" */\n"
34588"size_t __ovld __cnfn get_global_size(uint dimindx);\n"
34589"\n"
34590"/**\n"
34591" * Returns the unique global work-item ID value for\n"
34592" * dimension identified by dimindx. The global work-item\n"
34593" * ID specifies the work-item ID based on the number of\n"
34594" * global work-items specified to execute the kernel. Valid\n"
34595" * values of dimindx are 0 to get_work_dim() - 1. For\n"
34596" * other values of dimindx, get_global_id() returns 0.\n"
34597" * For clEnqueueTask, this returns 0.\n"
34598" */\n"
34599"size_t __ovld __cnfn get_global_id(uint dimindx);\n"
34600"\n"
34601"/**\n"
34602" * Returns the number of local work-items specified in\n"
34603" * dimension identified by dimindx. This value is given by\n"
34604" * the local_work_size argument to\n"
34605" * clEnqueueNDRangeKernel if local_work_size is not\n"
34606" * NULL; otherwise the OpenCL implementation chooses\n"
34607" * an appropriate local_work_size value which is returned\n"
34608" * by this function. Valid values of dimindx are 0 to\n"
34609" * get_work_dim() - 1. For other values of dimindx,\n"
34610" * get_local_size() returns 1.\n"
34611" * For clEnqueueTask, this always returns 1.\n"
34612" */\n"
34613"size_t __ovld __cnfn get_local_size(uint dimindx);\n"
34614"\n"
34615"/**\n"
34616" * Returns the unique local work-item ID i.e. a work-item\n"
34617" * within a specific work-group for dimension identified by\n"
34618" * dimindx. Valid values of dimindx are 0 to\n"
34619" * get_work_dim() - 1. For other values of dimindx,\n"
34620" * get_local_id() returns 0.\n"
34621" * For clEnqueueTask, this returns 0.\n"
34622" */\n"
34623"size_t __ovld __cnfn get_local_id(uint dimindx);\n"
34624"\n"
34625"/**\n"
34626" * Returns the number of work-groups that will execute a\n"
34627" * kernel for dimension identified by dimindx.\n"
34628" * Valid values of dimindx are 0 to get_work_dim() - 1.\n"
34629" * For other values of dimindx, get_num_groups () returns\n"
34630" * 1.\n"
34631" * For clEnqueueTask, this always returns 1.\n"
34632" */\n"
34633"size_t __ovld __cnfn get_num_groups(uint dimindx);\n"
34634"\n"
34635"/**\n"
34636" * get_group_id returns the work-group ID which is a\n"
34637" * number from 0 .. get_num_groups(dimindx) - 1.\n"
34638" * Valid values of dimindx are 0 to get_work_dim() - 1.\n"
34639" * For other values, get_group_id() returns 0.\n"
34640" * For clEnqueueTask, this returns 0.\n"
34641" */\n"
34642"size_t __ovld __cnfn get_group_id(uint dimindx);\n"
34643"\n"
34644"/**\n"
34645" * get_global_offset returns the offset values specified in\n"
34646" * global_work_offset argument to\n"
34647" * clEnqueueNDRangeKernel.\n"
34648" * Valid values of dimindx are 0 to get_work_dim() - 1.\n"
34649" * For other values, get_global_offset() returns 0.\n"
34650" * For clEnqueueTask, this returns 0.\n"
34651" */\n"
34652"size_t __ovld __cnfn get_global_offset(uint dimindx);\n"
34653"\n"
34654"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34655"size_t __ovld get_enqueued_local_size(uint dimindx);\n"
34656"size_t __ovld get_global_linear_id(void);\n"
34657"size_t __ovld get_local_linear_id(void);\n"
34658"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34659"\n"
34660"// OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions\n"
34661"\n"
34662"/**\n"
34663" * Arc cosine function.\n"
34664" */\n"
34665"float __ovld __cnfn acos(float);\n"
34666"float2 __ovld __cnfn acos(float2);\n"
34667"float3 __ovld __cnfn acos(float3);\n"
34668"float4 __ovld __cnfn acos(float4);\n"
34669"float8 __ovld __cnfn acos(float8);\n"
34670"float16 __ovld __cnfn acos(float16);\n"
34671"#ifdef cl_khr_fp64\n"
34672"double __ovld __cnfn acos(double);\n"
34673"double2 __ovld __cnfn acos(double2);\n"
34674"double3 __ovld __cnfn acos(double3);\n"
34675"double4 __ovld __cnfn acos(double4);\n"
34676"double8 __ovld __cnfn acos(double8);\n"
34677"double16 __ovld __cnfn acos(double16);\n"
34678"#endif //cl_khr_fp64\n"
34679"#ifdef cl_khr_fp16\n"
34680"half __ovld __cnfn acos(half);\n"
34681"half2 __ovld __cnfn acos(half2);\n"
34682"half3 __ovld __cnfn acos(half3);\n"
34683"half4 __ovld __cnfn acos(half4);\n"
34684"half8 __ovld __cnfn acos(half8);\n"
34685"half16 __ovld __cnfn acos(half16);\n"
34686"#endif //cl_khr_fp16\n"
34687"\n"
34688"/**\n"
34689" * Inverse hyperbolic cosine.\n"
34690" */\n"
34691"float __ovld __cnfn acosh(float);\n"
34692"float2 __ovld __cnfn acosh(float2);\n"
34693"float3 __ovld __cnfn acosh(float3);\n"
34694"float4 __ovld __cnfn acosh(float4);\n"
34695"float8 __ovld __cnfn acosh(float8);\n"
34696"float16 __ovld __cnfn acosh(float16);\n"
34697"#ifdef cl_khr_fp64\n"
34698"double __ovld __cnfn acosh(double);\n"
34699"double2 __ovld __cnfn acosh(double2);\n"
34700"double3 __ovld __cnfn acosh(double3);\n"
34701"double4 __ovld __cnfn acosh(double4);\n"
34702"double8 __ovld __cnfn acosh(double8);\n"
34703"double16 __ovld __cnfn acosh(double16);\n"
34704"#endif //cl_khr_fp64\n"
34705"#ifdef cl_khr_fp16\n"
34706"half __ovld __cnfn acosh(half);\n"
34707"half2 __ovld __cnfn acosh(half2);\n"
34708"half3 __ovld __cnfn acosh(half3);\n"
34709"half4 __ovld __cnfn acosh(half4);\n"
34710"half8 __ovld __cnfn acosh(half8);\n"
34711"half16 __ovld __cnfn acosh(half16);\n"
34712"#endif //cl_khr_fp16\n"
34713"\n"
34714"/**\n"
34715" * Compute acos (x) / PI.\n"
34716" */\n"
34717"float __ovld __cnfn acospi(float x);\n"
34718"float2 __ovld __cnfn acospi(float2 x);\n"
34719"float3 __ovld __cnfn acospi(float3 x);\n"
34720"float4 __ovld __cnfn acospi(float4 x);\n"
34721"float8 __ovld __cnfn acospi(float8 x);\n"
34722"float16 __ovld __cnfn acospi(float16 x);\n"
34723"#ifdef cl_khr_fp64\n"
34724"double __ovld __cnfn acospi(double x);\n"
34725"double2 __ovld __cnfn acospi(double2 x);\n"
34726"double3 __ovld __cnfn acospi(double3 x);\n"
34727"double4 __ovld __cnfn acospi(double4 x);\n"
34728"double8 __ovld __cnfn acospi(double8 x);\n"
34729"double16 __ovld __cnfn acospi(double16 x);\n"
34730"#endif //cl_khr_fp64\n"
34731"#ifdef cl_khr_fp16\n"
34732"half __ovld __cnfn acospi(half x);\n"
34733"half2 __ovld __cnfn acospi(half2 x);\n"
34734"half3 __ovld __cnfn acospi(half3 x);\n"
34735"half4 __ovld __cnfn acospi(half4 x);\n"
34736"half8 __ovld __cnfn acospi(half8 x);\n"
34737"half16 __ovld __cnfn acospi(half16 x);\n"
34738"#endif //cl_khr_fp16\n"
34739"\n"
34740"/**\n"
34741" * Arc sine function.\n"
34742" */\n"
34743"float __ovld __cnfn asin(float);\n"
34744"float2 __ovld __cnfn asin(float2);\n"
34745"float3 __ovld __cnfn asin(float3);\n"
34746"float4 __ovld __cnfn asin(float4);\n"
34747"float8 __ovld __cnfn asin(float8);\n"
34748"float16 __ovld __cnfn asin(float16);\n"
34749"#ifdef cl_khr_fp64\n"
34750"double __ovld __cnfn asin(double);\n"
34751"double2 __ovld __cnfn asin(double2);\n"
34752"double3 __ovld __cnfn asin(double3);\n"
34753"double4 __ovld __cnfn asin(double4);\n"
34754"double8 __ovld __cnfn asin(double8);\n"
34755"double16 __ovld __cnfn asin(double16);\n"
34756"#endif //cl_khr_fp64\n"
34757"#ifdef cl_khr_fp16\n"
34758"half __ovld __cnfn asin(half);\n"
34759"half2 __ovld __cnfn asin(half2);\n"
34760"half3 __ovld __cnfn asin(half3);\n"
34761"half4 __ovld __cnfn asin(half4);\n"
34762"half8 __ovld __cnfn asin(half8);\n"
34763"half16 __ovld __cnfn asin(half16);\n"
34764"#endif //cl_khr_fp16\n"
34765"\n"
34766"/**\n"
34767" * Inverse hyperbolic sine.\n"
34768" */\n"
34769"float __ovld __cnfn asinh(float);\n"
34770"float2 __ovld __cnfn asinh(float2);\n"
34771"float3 __ovld __cnfn asinh(float3);\n"
34772"float4 __ovld __cnfn asinh(float4);\n"
34773"float8 __ovld __cnfn asinh(float8);\n"
34774"float16 __ovld __cnfn asinh(float16);\n"
34775"#ifdef cl_khr_fp64\n"
34776"double __ovld __cnfn asinh(double);\n"
34777"double2 __ovld __cnfn asinh(double2);\n"
34778"double3 __ovld __cnfn asinh(double3);\n"
34779"double4 __ovld __cnfn asinh(double4);\n"
34780"double8 __ovld __cnfn asinh(double8);\n"
34781"double16 __ovld __cnfn asinh(double16);\n"
34782"#endif //cl_khr_fp64\n"
34783"#ifdef cl_khr_fp16\n"
34784"half __ovld __cnfn asinh(half);\n"
34785"half2 __ovld __cnfn asinh(half2);\n"
34786"half3 __ovld __cnfn asinh(half3);\n"
34787"half4 __ovld __cnfn asinh(half4);\n"
34788"half8 __ovld __cnfn asinh(half8);\n"
34789"half16 __ovld __cnfn asinh(half16);\n"
34790"#endif //cl_khr_fp16\n"
34791"\n"
34792"/**\n"
34793" * Compute asin (x) / PI.\n"
34794" */\n"
34795"float __ovld __cnfn asinpi(float x);\n"
34796"float2 __ovld __cnfn asinpi(float2 x);\n"
34797"float3 __ovld __cnfn asinpi(float3 x);\n"
34798"float4 __ovld __cnfn asinpi(float4 x);\n"
34799"float8 __ovld __cnfn asinpi(float8 x);\n"
34800"float16 __ovld __cnfn asinpi(float16 x);\n"
34801"#ifdef cl_khr_fp64\n"
34802"double __ovld __cnfn asinpi(double x);\n"
34803"double2 __ovld __cnfn asinpi(double2 x);\n"
34804"double3 __ovld __cnfn asinpi(double3 x);\n"
34805"double4 __ovld __cnfn asinpi(double4 x);\n"
34806"double8 __ovld __cnfn asinpi(double8 x);\n"
34807"double16 __ovld __cnfn asinpi(double16 x);\n"
34808"#endif //cl_khr_fp64\n"
34809"#ifdef cl_khr_fp16\n"
34810"half __ovld __cnfn asinpi(half x);\n"
34811"half2 __ovld __cnfn asinpi(half2 x);\n"
34812"half3 __ovld __cnfn asinpi(half3 x);\n"
34813"half4 __ovld __cnfn asinpi(half4 x);\n"
34814"half8 __ovld __cnfn asinpi(half8 x);\n"
34815"half16 __ovld __cnfn asinpi(half16 x);\n"
34816"#endif //cl_khr_fp16\n"
34817"\n"
34818"/**\n"
34819" * Arc tangent function.\n"
34820" */\n"
34821"float __ovld __cnfn atan(float y_over_x);\n"
34822"float2 __ovld __cnfn atan(float2 y_over_x);\n"
34823"float3 __ovld __cnfn atan(float3 y_over_x);\n"
34824"float4 __ovld __cnfn atan(float4 y_over_x);\n"
34825"float8 __ovld __cnfn atan(float8 y_over_x);\n"
34826"float16 __ovld __cnfn atan(float16 y_over_x);\n"
34827"#ifdef cl_khr_fp64\n"
34828"double __ovld __cnfn atan(double y_over_x);\n"
34829"double2 __ovld __cnfn atan(double2 y_over_x);\n"
34830"double3 __ovld __cnfn atan(double3 y_over_x);\n"
34831"double4 __ovld __cnfn atan(double4 y_over_x);\n"
34832"double8 __ovld __cnfn atan(double8 y_over_x);\n"
34833"double16 __ovld __cnfn atan(double16 y_over_x);\n"
34834"#endif //cl_khr_fp64\n"
34835"#ifdef cl_khr_fp16\n"
34836"half __ovld __cnfn atan(half y_over_x);\n"
34837"half2 __ovld __cnfn atan(half2 y_over_x);\n"
34838"half3 __ovld __cnfn atan(half3 y_over_x);\n"
34839"half4 __ovld __cnfn atan(half4 y_over_x);\n"
34840"half8 __ovld __cnfn atan(half8 y_over_x);\n"
34841"half16 __ovld __cnfn atan(half16 y_over_x);\n"
34842"#endif //cl_khr_fp16\n"
34843"\n"
34844"/**\n"
34845" * Arc tangent of y / x.\n"
34846" */\n"
34847"float __ovld __cnfn atan2(float y, float x);\n"
34848"float2 __ovld __cnfn atan2(float2 y, float2 x);\n"
34849"float3 __ovld __cnfn atan2(float3 y, float3 x);\n"
34850"float4 __ovld __cnfn atan2(float4 y, float4 x);\n"
34851"float8 __ovld __cnfn atan2(float8 y, float8 x);\n"
34852"float16 __ovld __cnfn atan2(float16 y, float16 x);\n"
34853"#ifdef cl_khr_fp64\n"
34854"double __ovld __cnfn atan2(double y, double x);\n"
34855"double2 __ovld __cnfn atan2(double2 y, double2 x);\n"
34856"double3 __ovld __cnfn atan2(double3 y, double3 x);\n"
34857"double4 __ovld __cnfn atan2(double4 y, double4 x);\n"
34858"double8 __ovld __cnfn atan2(double8 y, double8 x);\n"
34859"double16 __ovld __cnfn atan2(double16 y, double16 x);\n"
34860"#endif //cl_khr_fp64\n"
34861"#ifdef cl_khr_fp16\n"
34862"half __ovld __cnfn atan2(half y, half x);\n"
34863"half2 __ovld __cnfn atan2(half2 y, half2 x);\n"
34864"half3 __ovld __cnfn atan2(half3 y, half3 x);\n"
34865"half4 __ovld __cnfn atan2(half4 y, half4 x);\n"
34866"half8 __ovld __cnfn atan2(half8 y, half8 x);\n"
34867"half16 __ovld __cnfn atan2(half16 y, half16 x);\n"
34868"#endif //cl_khr_fp16\n"
34869"\n"
34870"/**\n"
34871" * Hyperbolic arc tangent.\n"
34872" */\n"
34873"float __ovld __cnfn atanh(float);\n"
34874"float2 __ovld __cnfn atanh(float2);\n"
34875"float3 __ovld __cnfn atanh(float3);\n"
34876"float4 __ovld __cnfn atanh(float4);\n"
34877"float8 __ovld __cnfn atanh(float8);\n"
34878"float16 __ovld __cnfn atanh(float16);\n"
34879"#ifdef cl_khr_fp64\n"
34880"double __ovld __cnfn atanh(double);\n"
34881"double2 __ovld __cnfn atanh(double2);\n"
34882"double3 __ovld __cnfn atanh(double3);\n"
34883"double4 __ovld __cnfn atanh(double4);\n"
34884"double8 __ovld __cnfn atanh(double8);\n"
34885"double16 __ovld __cnfn atanh(double16);\n"
34886"#endif //cl_khr_fp64\n"
34887"#ifdef cl_khr_fp16\n"
34888"half __ovld __cnfn atanh(half);\n"
34889"half2 __ovld __cnfn atanh(half2);\n"
34890"half3 __ovld __cnfn atanh(half3);\n"
34891"half4 __ovld __cnfn atanh(half4);\n"
34892"half8 __ovld __cnfn atanh(half8);\n"
34893"half16 __ovld __cnfn atanh(half16);\n"
34894"#endif //cl_khr_fp16\n"
34895"\n"
34896"/**\n"
34897" * Compute atan (x) / PI.\n"
34898" */\n"
34899"float __ovld __cnfn atanpi(float x);\n"
34900"float2 __ovld __cnfn atanpi(float2 x);\n"
34901"float3 __ovld __cnfn atanpi(float3 x);\n"
34902"float4 __ovld __cnfn atanpi(float4 x);\n"
34903"float8 __ovld __cnfn atanpi(float8 x);\n"
34904"float16 __ovld __cnfn atanpi(float16 x);\n"
34905"#ifdef cl_khr_fp64\n"
34906"double __ovld __cnfn atanpi(double x);\n"
34907"double2 __ovld __cnfn atanpi(double2 x);\n"
34908"double3 __ovld __cnfn atanpi(double3 x);\n"
34909"double4 __ovld __cnfn atanpi(double4 x);\n"
34910"double8 __ovld __cnfn atanpi(double8 x);\n"
34911"double16 __ovld __cnfn atanpi(double16 x);\n"
34912"#endif //cl_khr_fp64\n"
34913"#ifdef cl_khr_fp16\n"
34914"half __ovld __cnfn atanpi(half x);\n"
34915"half2 __ovld __cnfn atanpi(half2 x);\n"
34916"half3 __ovld __cnfn atanpi(half3 x);\n"
34917"half4 __ovld __cnfn atanpi(half4 x);\n"
34918"half8 __ovld __cnfn atanpi(half8 x);\n"
34919"half16 __ovld __cnfn atanpi(half16 x);\n"
34920"#endif //cl_khr_fp16\n"
34921"\n"
34922"/**\n"
34923" * Compute atan2 (y, x) / PI.\n"
34924" */\n"
34925"float __ovld __cnfn atan2pi(float y, float x);\n"
34926"float2 __ovld __cnfn atan2pi(float2 y, float2 x);\n"
34927"float3 __ovld __cnfn atan2pi(float3 y, float3 x);\n"
34928"float4 __ovld __cnfn atan2pi(float4 y, float4 x);\n"
34929"float8 __ovld __cnfn atan2pi(float8 y, float8 x);\n"
34930"float16 __ovld __cnfn atan2pi(float16 y, float16 x);\n"
34931"#ifdef cl_khr_fp64\n"
34932"double __ovld __cnfn atan2pi(double y, double x);\n"
34933"double2 __ovld __cnfn atan2pi(double2 y, double2 x);\n"
34934"double3 __ovld __cnfn atan2pi(double3 y, double3 x);\n"
34935"double4 __ovld __cnfn atan2pi(double4 y, double4 x);\n"
34936"double8 __ovld __cnfn atan2pi(double8 y, double8 x);\n"
34937"double16 __ovld __cnfn atan2pi(double16 y, double16 x);\n"
34938"#endif //cl_khr_fp64\n"
34939"#ifdef cl_khr_fp16\n"
34940"half __ovld __cnfn atan2pi(half y, half x);\n"
34941"half2 __ovld __cnfn atan2pi(half2 y, half2 x);\n"
34942"half3 __ovld __cnfn atan2pi(half3 y, half3 x);\n"
34943"half4 __ovld __cnfn atan2pi(half4 y, half4 x);\n"
34944"half8 __ovld __cnfn atan2pi(half8 y, half8 x);\n"
34945"half16 __ovld __cnfn atan2pi(half16 y, half16 x);\n"
34946"#endif //cl_khr_fp16\n"
34947"\n"
34948"/**\n"
34949" * Compute cube-root.\n"
34950" */\n"
34951"float __ovld __cnfn cbrt(float);\n"
34952"float2 __ovld __cnfn cbrt(float2);\n"
34953"float3 __ovld __cnfn cbrt(float3);\n"
34954"float4 __ovld __cnfn cbrt(float4);\n"
34955"float8 __ovld __cnfn cbrt(float8);\n"
34956"float16 __ovld __cnfn cbrt(float16);\n"
34957"#ifdef cl_khr_fp64\n"
34958"double __ovld __cnfn cbrt(double);\n"
34959"double2 __ovld __cnfn cbrt(double2);\n"
34960"double3 __ovld __cnfn cbrt(double3);\n"
34961"double4 __ovld __cnfn cbrt(double4);\n"
34962"double8 __ovld __cnfn cbrt(double8);\n"
34963"double16 __ovld __cnfn cbrt(double16);\n"
34964"#endif //cl_khr_fp64\n"
34965"#ifdef cl_khr_fp16\n"
34966"half __ovld __cnfn cbrt(half);\n"
34967"half2 __ovld __cnfn cbrt(half2);\n"
34968"half3 __ovld __cnfn cbrt(half3);\n"
34969"half4 __ovld __cnfn cbrt(half4);\n"
34970"half8 __ovld __cnfn cbrt(half8);\n"
34971"half16 __ovld __cnfn cbrt(half16);\n"
34972"#endif //cl_khr_fp16\n"
34973"\n"
34974"/**\n"
34975" * Round to integral value using the round to positive\n"
34976" * infinity rounding mode.\n"
34977" */\n"
34978"float __ovld __cnfn ceil(float);\n"
34979"float2 __ovld __cnfn ceil(float2);\n"
34980"float3 __ovld __cnfn ceil(float3);\n"
34981"float4 __ovld __cnfn ceil(float4);\n"
34982"float8 __ovld __cnfn ceil(float8);\n"
34983"float16 __ovld __cnfn ceil(float16);\n"
34984"#ifdef cl_khr_fp64\n"
34985"double __ovld __cnfn ceil(double);\n"
34986"double2 __ovld __cnfn ceil(double2);\n"
34987"double3 __ovld __cnfn ceil(double3);\n"
34988"double4 __ovld __cnfn ceil(double4);\n"
34989"double8 __ovld __cnfn ceil(double8);\n"
34990"double16 __ovld __cnfn ceil(double16);\n"
34991"#endif //cl_khr_fp64\n"
34992"#ifdef cl_khr_fp16\n"
34993"half __ovld __cnfn ceil(half);\n"
34994"half2 __ovld __cnfn ceil(half2);\n"
34995"half3 __ovld __cnfn ceil(half3);\n"
34996"half4 __ovld __cnfn ceil(half4);\n"
34997"half8 __ovld __cnfn ceil(half8);\n"
34998"half16 __ovld __cnfn ceil(half16);\n"
34999"#endif //cl_khr_fp16\n"
35000"\n"
35001"/**\n"
35002" * Returns x with its sign changed to match the sign of y.\n"
35003" */\n"
35004"float __ovld __cnfn copysign(float x, float y);\n"
35005"float2 __ovld __cnfn copysign(float2 x, float2 y);\n"
35006"float3 __ovld __cnfn copysign(float3 x, float3 y);\n"
35007"float4 __ovld __cnfn copysign(float4 x, float4 y);\n"
35008"float8 __ovld __cnfn copysign(float8 x, float8 y);\n"
35009"float16 __ovld __cnfn copysign(float16 x, float16 y);\n"
35010"#ifdef cl_khr_fp64\n"
35011"double __ovld __cnfn copysign(double x, double y);\n"
35012"double2 __ovld __cnfn copysign(double2 x, double2 y);\n"
35013"double3 __ovld __cnfn copysign(double3 x, double3 y);\n"
35014"double4 __ovld __cnfn copysign(double4 x, double4 y);\n"
35015"double8 __ovld __cnfn copysign(double8 x, double8 y);\n"
35016"double16 __ovld __cnfn copysign(double16 x, double16 y);\n"
35017"#endif //cl_khr_fp64\n"
35018"#ifdef cl_khr_fp16\n"
35019"half __ovld __cnfn copysign(half x, half y);\n"
35020"half2 __ovld __cnfn copysign(half2 x, half2 y);\n"
35021"half3 __ovld __cnfn copysign(half3 x, half3 y);\n"
35022"half4 __ovld __cnfn copysign(half4 x, half4 y);\n"
35023"half8 __ovld __cnfn copysign(half8 x, half8 y);\n"
35024"half16 __ovld __cnfn copysign(half16 x, half16 y);\n"
35025"#endif //cl_khr_fp16\n"
35026"\n"
35027"/**\n"
35028" * Compute cosine.\n"
35029" */\n"
35030"float __ovld __cnfn cos(float);\n"
35031"float2 __ovld __cnfn cos(float2);\n"
35032"float3 __ovld __cnfn cos(float3);\n"
35033"float4 __ovld __cnfn cos(float4);\n"
35034"float8 __ovld __cnfn cos(float8);\n"
35035"float16 __ovld __cnfn cos(float16);\n"
35036"#ifdef cl_khr_fp64\n"
35037"double __ovld __cnfn cos(double);\n"
35038"double2 __ovld __cnfn cos(double2);\n"
35039"double3 __ovld __cnfn cos(double3);\n"
35040"double4 __ovld __cnfn cos(double4);\n"
35041"double8 __ovld __cnfn cos(double8);\n"
35042"double16 __ovld __cnfn cos(double16);\n"
35043"#endif //cl_khr_fp64\n"
35044"#ifdef cl_khr_fp16\n"
35045"half __ovld __cnfn cos(half);\n"
35046"half2 __ovld __cnfn cos(half2);\n"
35047"half3 __ovld __cnfn cos(half3);\n"
35048"half4 __ovld __cnfn cos(half4);\n"
35049"half8 __ovld __cnfn cos(half8);\n"
35050"half16 __ovld __cnfn cos(half16);\n"
35051"#endif //cl_khr_fp16\n"
35052"\n"
35053"/**\n"
35054" * Compute hyperbolic cosine.\n"
35055" */\n"
35056"float __ovld __cnfn cosh(float);\n"
35057"float2 __ovld __cnfn cosh(float2);\n"
35058"float3 __ovld __cnfn cosh(float3);\n"
35059"float4 __ovld __cnfn cosh(float4);\n"
35060"float8 __ovld __cnfn cosh(float8);\n"
35061"float16 __ovld __cnfn cosh(float16);\n"
35062"#ifdef cl_khr_fp64\n"
35063"double __ovld __cnfn cosh(double);\n"
35064"double2 __ovld __cnfn cosh(double2);\n"
35065"double3 __ovld __cnfn cosh(double3);\n"
35066"double4 __ovld __cnfn cosh(double4);\n"
35067"double8 __ovld __cnfn cosh(double8);\n"
35068"double16 __ovld __cnfn cosh(double16);\n"
35069"#endif //cl_khr_fp64\n"
35070"#ifdef cl_khr_fp16\n"
35071"half __ovld __cnfn cosh(half);\n"
35072"half2 __ovld __cnfn cosh(half2);\n"
35073"half3 __ovld __cnfn cosh(half3);\n"
35074"half4 __ovld __cnfn cosh(half4);\n"
35075"half8 __ovld __cnfn cosh(half8);\n"
35076"half16 __ovld __cnfn cosh(half16);\n"
35077"#endif //cl_khr_fp16\n"
35078"\n"
35079"/**\n"
35080" * Compute cos (PI * x).\n"
35081" */\n"
35082"float __ovld __cnfn cospi(float x);\n"
35083"float2 __ovld __cnfn cospi(float2 x);\n"
35084"float3 __ovld __cnfn cospi(float3 x);\n"
35085"float4 __ovld __cnfn cospi(float4 x);\n"
35086"float8 __ovld __cnfn cospi(float8 x);\n"
35087"float16 __ovld __cnfn cospi(float16 x);\n"
35088"#ifdef cl_khr_fp64\n"
35089"double __ovld __cnfn cospi(double x);\n"
35090"double2 __ovld __cnfn cospi(double2 x);\n"
35091"double3 __ovld __cnfn cospi(double3 x);\n"
35092"double4 __ovld __cnfn cospi(double4 x);\n"
35093"double8 __ovld __cnfn cospi(double8 x);\n"
35094"double16 __ovld __cnfn cospi(double16 x);\n"
35095"#endif //cl_khr_fp64\n"
35096"#ifdef cl_khr_fp16\n"
35097"half __ovld __cnfn cospi(half x);\n"
35098"half2 __ovld __cnfn cospi(half2 x);\n"
35099"half3 __ovld __cnfn cospi(half3 x);\n"
35100"half4 __ovld __cnfn cospi(half4 x);\n"
35101"half8 __ovld __cnfn cospi(half8 x);\n"
35102"half16 __ovld __cnfn cospi(half16 x);\n"
35103"#endif //cl_khr_fp16\n"
35104"\n"
35105"/**\n"
35106" * Complementary error function.\n"
35107" */\n"
35108"float __ovld __cnfn erfc(float);\n"
35109"float2 __ovld __cnfn erfc(float2);\n"
35110"float3 __ovld __cnfn erfc(float3);\n"
35111"float4 __ovld __cnfn erfc(float4);\n"
35112"float8 __ovld __cnfn erfc(float8);\n"
35113"float16 __ovld __cnfn erfc(float16);\n"
35114"#ifdef cl_khr_fp64\n"
35115"double __ovld __cnfn erfc(double);\n"
35116"double2 __ovld __cnfn erfc(double2);\n"
35117"double3 __ovld __cnfn erfc(double3);\n"
35118"double4 __ovld __cnfn erfc(double4);\n"
35119"double8 __ovld __cnfn erfc(double8);\n"
35120"double16 __ovld __cnfn erfc(double16);\n"
35121"#endif //cl_khr_fp64\n"
35122"#ifdef cl_khr_fp16\n"
35123"half __ovld __cnfn erfc(half);\n"
35124"half2 __ovld __cnfn erfc(half2);\n"
35125"half3 __ovld __cnfn erfc(half3);\n"
35126"half4 __ovld __cnfn erfc(half4);\n"
35127"half8 __ovld __cnfn erfc(half8);\n"
35128"half16 __ovld __cnfn erfc(half16);\n"
35129"#endif //cl_khr_fp16\n"
35130"\n"
35131"/**\n"
35132" * Error function encountered in integrating the\n"
35133" * normal distribution.\n"
35134" */\n"
35135"float __ovld __cnfn erf(float);\n"
35136"float2 __ovld __cnfn erf(float2);\n"
35137"float3 __ovld __cnfn erf(float3);\n"
35138"float4 __ovld __cnfn erf(float4);\n"
35139"float8 __ovld __cnfn erf(float8);\n"
35140"float16 __ovld __cnfn erf(float16);\n"
35141"#ifdef cl_khr_fp64\n"
35142"double __ovld __cnfn erf(double);\n"
35143"double2 __ovld __cnfn erf(double2);\n"
35144"double3 __ovld __cnfn erf(double3);\n"
35145"double4 __ovld __cnfn erf(double4);\n"
35146"double8 __ovld __cnfn erf(double8);\n"
35147"double16 __ovld __cnfn erf(double16);\n"
35148"#endif //cl_khr_fp64\n"
35149"#ifdef cl_khr_fp16\n"
35150"half __ovld __cnfn erf(half);\n"
35151"half2 __ovld __cnfn erf(half2);\n"
35152"half3 __ovld __cnfn erf(half3);\n"
35153"half4 __ovld __cnfn erf(half4);\n"
35154"half8 __ovld __cnfn erf(half8);\n"
35155"half16 __ovld __cnfn erf(half16);\n"
35156"#endif //cl_khr_fp16\n"
35157"\n"
35158"/**\n"
35159" * Compute the base e exponential function of x.\n"
35160" */\n"
35161"float __ovld __cnfn exp(float x);\n"
35162"float2 __ovld __cnfn exp(float2 x);\n"
35163"float3 __ovld __cnfn exp(float3 x);\n"
35164"float4 __ovld __cnfn exp(float4 x);\n"
35165"float8 __ovld __cnfn exp(float8 x);\n"
35166"float16 __ovld __cnfn exp(float16 x);\n"
35167"#ifdef cl_khr_fp64\n"
35168"double __ovld __cnfn exp(double x);\n"
35169"double2 __ovld __cnfn exp(double2 x);\n"
35170"double3 __ovld __cnfn exp(double3 x);\n"
35171"double4 __ovld __cnfn exp(double4 x);\n"
35172"double8 __ovld __cnfn exp(double8 x);\n"
35173"double16 __ovld __cnfn exp(double16 x);\n"
35174"#endif //cl_khr_fp64\n"
35175"#ifdef cl_khr_fp16\n"
35176"half __ovld __cnfn exp(half x);\n"
35177"half2 __ovld __cnfn exp(half2 x);\n"
35178"half3 __ovld __cnfn exp(half3 x);\n"
35179"half4 __ovld __cnfn exp(half4 x);\n"
35180"half8 __ovld __cnfn exp(half8 x);\n"
35181"half16 __ovld __cnfn exp(half16 x);\n"
35182"#endif //cl_khr_fp16\n"
35183"\n"
35184"/**\n"
35185" * Exponential base 2 function.\n"
35186" */\n"
35187"float __ovld __cnfn exp2(float);\n"
35188"float2 __ovld __cnfn exp2(float2);\n"
35189"float3 __ovld __cnfn exp2(float3);\n"
35190"float4 __ovld __cnfn exp2(float4);\n"
35191"float8 __ovld __cnfn exp2(float8);\n"
35192"float16 __ovld __cnfn exp2(float16);\n"
35193"#ifdef cl_khr_fp64\n"
35194"double __ovld __cnfn exp2(double);\n"
35195"double2 __ovld __cnfn exp2(double2);\n"
35196"double3 __ovld __cnfn exp2(double3);\n"
35197"double4 __ovld __cnfn exp2(double4);\n"
35198"double8 __ovld __cnfn exp2(double8);\n"
35199"double16 __ovld __cnfn exp2(double16);\n"
35200"#endif //cl_khr_fp64\n"
35201"#ifdef cl_khr_fp16\n"
35202"half __ovld __cnfn exp2(half);\n"
35203"half2 __ovld __cnfn exp2(half2);\n"
35204"half3 __ovld __cnfn exp2(half3);\n"
35205"half4 __ovld __cnfn exp2(half4);\n"
35206"half8 __ovld __cnfn exp2(half8);\n"
35207"half16 __ovld __cnfn exp2(half16);\n"
35208"#endif //cl_khr_fp16\n"
35209"\n"
35210"/**\n"
35211" * Exponential base 10 function.\n"
35212" */\n"
35213"float __ovld __cnfn exp10(float);\n"
35214"float2 __ovld __cnfn exp10(float2);\n"
35215"float3 __ovld __cnfn exp10(float3);\n"
35216"float4 __ovld __cnfn exp10(float4);\n"
35217"float8 __ovld __cnfn exp10(float8);\n"
35218"float16 __ovld __cnfn exp10(float16);\n"
35219"#ifdef cl_khr_fp64\n"
35220"double __ovld __cnfn exp10(double);\n"
35221"double2 __ovld __cnfn exp10(double2);\n"
35222"double3 __ovld __cnfn exp10(double3);\n"
35223"double4 __ovld __cnfn exp10(double4);\n"
35224"double8 __ovld __cnfn exp10(double8);\n"
35225"double16 __ovld __cnfn exp10(double16);\n"
35226"#endif //cl_khr_fp64\n"
35227"#ifdef cl_khr_fp16\n"
35228"half __ovld __cnfn exp10(half);\n"
35229"half2 __ovld __cnfn exp10(half2);\n"
35230"half3 __ovld __cnfn exp10(half3);\n"
35231"half4 __ovld __cnfn exp10(half4);\n"
35232"half8 __ovld __cnfn exp10(half8);\n"
35233"half16 __ovld __cnfn exp10(half16);\n"
35234"#endif //cl_khr_fp16\n"
35235"\n"
35236"/**\n"
35237" * Compute e^x- 1.0.\n"
35238" */\n"
35239"float __ovld __cnfn expm1(float x);\n"
35240"float2 __ovld __cnfn expm1(float2 x);\n"
35241"float3 __ovld __cnfn expm1(float3 x);\n"
35242"float4 __ovld __cnfn expm1(float4 x);\n"
35243"float8 __ovld __cnfn expm1(float8 x);\n"
35244"float16 __ovld __cnfn expm1(float16 x);\n"
35245"#ifdef cl_khr_fp64\n"
35246"double __ovld __cnfn expm1(double x);\n"
35247"double2 __ovld __cnfn expm1(double2 x);\n"
35248"double3 __ovld __cnfn expm1(double3 x);\n"
35249"double4 __ovld __cnfn expm1(double4 x);\n"
35250"double8 __ovld __cnfn expm1(double8 x);\n"
35251"double16 __ovld __cnfn expm1(double16 x);\n"
35252"#endif //cl_khr_fp64\n"
35253"#ifdef cl_khr_fp16\n"
35254"half __ovld __cnfn expm1(half x);\n"
35255"half2 __ovld __cnfn expm1(half2 x);\n"
35256"half3 __ovld __cnfn expm1(half3 x);\n"
35257"half4 __ovld __cnfn expm1(half4 x);\n"
35258"half8 __ovld __cnfn expm1(half8 x);\n"
35259"half16 __ovld __cnfn expm1(half16 x);\n"
35260"#endif //cl_khr_fp16\n"
35261"\n"
35262"/**\n"
35263" * Compute absolute value of a floating-point number.\n"
35264" */\n"
35265"float __ovld __cnfn fabs(float);\n"
35266"float2 __ovld __cnfn fabs(float2);\n"
35267"float3 __ovld __cnfn fabs(float3);\n"
35268"float4 __ovld __cnfn fabs(float4);\n"
35269"float8 __ovld __cnfn fabs(float8);\n"
35270"float16 __ovld __cnfn fabs(float16);\n"
35271"#ifdef cl_khr_fp64\n"
35272"double __ovld __cnfn fabs(double);\n"
35273"double2 __ovld __cnfn fabs(double2);\n"
35274"double3 __ovld __cnfn fabs(double3);\n"
35275"double4 __ovld __cnfn fabs(double4);\n"
35276"double8 __ovld __cnfn fabs(double8);\n"
35277"double16 __ovld __cnfn fabs(double16);\n"
35278"#endif //cl_khr_fp64\n"
35279"#ifdef cl_khr_fp16\n"
35280"half __ovld __cnfn fabs(half);\n"
35281"half2 __ovld __cnfn fabs(half2);\n"
35282"half3 __ovld __cnfn fabs(half3);\n"
35283"half4 __ovld __cnfn fabs(half4);\n"
35284"half8 __ovld __cnfn fabs(half8);\n"
35285"half16 __ovld __cnfn fabs(half16);\n"
35286"#endif //cl_khr_fp16\n"
35287"\n"
35288"/**\n"
35289" * x - y if x > y, +0 if x is less than or equal to y.\n"
35290" */\n"
35291"float __ovld __cnfn fdim(float x, float y);\n"
35292"float2 __ovld __cnfn fdim(float2 x, float2 y);\n"
35293"float3 __ovld __cnfn fdim(float3 x, float3 y);\n"
35294"float4 __ovld __cnfn fdim(float4 x, float4 y);\n"
35295"float8 __ovld __cnfn fdim(float8 x, float8 y);\n"
35296"float16 __ovld __cnfn fdim(float16 x, float16 y);\n"
35297"#ifdef cl_khr_fp64\n"
35298"double __ovld __cnfn fdim(double x, double y);\n"
35299"double2 __ovld __cnfn fdim(double2 x, double2 y);\n"
35300"double3 __ovld __cnfn fdim(double3 x, double3 y);\n"
35301"double4 __ovld __cnfn fdim(double4 x, double4 y);\n"
35302"double8 __ovld __cnfn fdim(double8 x, double8 y);\n"
35303"double16 __ovld __cnfn fdim(double16 x, double16 y);\n"
35304"#endif //cl_khr_fp64\n"
35305"#ifdef cl_khr_fp16\n"
35306"half __ovld __cnfn fdim(half x, half y);\n"
35307"half2 __ovld __cnfn fdim(half2 x, half2 y);\n"
35308"half3 __ovld __cnfn fdim(half3 x, half3 y);\n"
35309"half4 __ovld __cnfn fdim(half4 x, half4 y);\n"
35310"half8 __ovld __cnfn fdim(half8 x, half8 y);\n"
35311"half16 __ovld __cnfn fdim(half16 x, half16 y);\n"
35312"#endif //cl_khr_fp16\n"
35313"\n"
35314"/**\n"
35315" * Round to integral value using the round to -ve\n"
35316" * infinity rounding mode.\n"
35317" */\n"
35318"float __ovld __cnfn floor(float);\n"
35319"float2 __ovld __cnfn floor(float2);\n"
35320"float3 __ovld __cnfn floor(float3);\n"
35321"float4 __ovld __cnfn floor(float4);\n"
35322"float8 __ovld __cnfn floor(float8);\n"
35323"float16 __ovld __cnfn floor(float16);\n"
35324"#ifdef cl_khr_fp64\n"
35325"double __ovld __cnfn floor(double);\n"
35326"double2 __ovld __cnfn floor(double2);\n"
35327"double3 __ovld __cnfn floor(double3);\n"
35328"double4 __ovld __cnfn floor(double4);\n"
35329"double8 __ovld __cnfn floor(double8);\n"
35330"double16 __ovld __cnfn floor(double16);\n"
35331"#endif //cl_khr_fp64\n"
35332"#ifdef cl_khr_fp16\n"
35333"half __ovld __cnfn floor(half);\n"
35334"half2 __ovld __cnfn floor(half2);\n"
35335"half3 __ovld __cnfn floor(half3);\n"
35336"half4 __ovld __cnfn floor(half4);\n"
35337"half8 __ovld __cnfn floor(half8);\n"
35338"half16 __ovld __cnfn floor(half16);\n"
35339"#endif //cl_khr_fp16\n"
35340"\n"
35341"/**\n"
35342" * Returns the correctly rounded floating-point\n"
35343" * representation of the sum of c with the infinitely\n"
35344" * precise product of a and b. Rounding of\n"
35345" * intermediate products shall not occur. Edge case\n"
35346" * behavior is per the IEEE 754-2008 standard.\n"
35347" */\n"
35348"float __ovld __cnfn fma(float a, float b, float c);\n"
35349"float2 __ovld __cnfn fma(float2 a, float2 b, float2 c);\n"
35350"float3 __ovld __cnfn fma(float3 a, float3 b, float3 c);\n"
35351"float4 __ovld __cnfn fma(float4 a, float4 b, float4 c);\n"
35352"float8 __ovld __cnfn fma(float8 a, float8 b, float8 c);\n"
35353"float16 __ovld __cnfn fma(float16 a, float16 b, float16 c);\n"
35354"#ifdef cl_khr_fp64\n"
35355"double __ovld __cnfn fma(double a, double b, double c);\n"
35356"double2 __ovld __cnfn fma(double2 a, double2 b, double2 c);\n"
35357"double3 __ovld __cnfn fma(double3 a, double3 b, double3 c);\n"
35358"double4 __ovld __cnfn fma(double4 a, double4 b, double4 c);\n"
35359"double8 __ovld __cnfn fma(double8 a, double8 b, double8 c);\n"
35360"double16 __ovld __cnfn fma(double16 a, double16 b, double16 c);\n"
35361"#endif //cl_khr_fp64\n"
35362"#ifdef cl_khr_fp16\n"
35363"half __ovld __cnfn fma(half a, half b, half c);\n"
35364"half2 __ovld __cnfn fma(half2 a, half2 b, half2 c);\n"
35365"half3 __ovld __cnfn fma(half3 a, half3 b, half3 c);\n"
35366"half4 __ovld __cnfn fma(half4 a, half4 b, half4 c);\n"
35367"half8 __ovld __cnfn fma(half8 a, half8 b, half8 c);\n"
35368"half16 __ovld __cnfn fma(half16 a, half16 b, half16 c);\n"
35369"#endif //cl_khr_fp16\n"
35370"\n"
35371"/**\n"
35372" * Returns y if x < y, otherwise it returns x. If one\n"
35373" * argument is a NaN, fmax() returns the other\n"
35374" * argument. If both arguments are NaNs, fmax()\n"
35375" * returns a NaN.\n"
35376" */\n"
35377"float __ovld __cnfn fmax(float x, float y);\n"
35378"float2 __ovld __cnfn fmax(float2 x, float2 y);\n"
35379"float3 __ovld __cnfn fmax(float3 x, float3 y);\n"
35380"float4 __ovld __cnfn fmax(float4 x, float4 y);\n"
35381"float8 __ovld __cnfn fmax(float8 x, float8 y);\n"
35382"float16 __ovld __cnfn fmax(float16 x, float16 y);\n"
35383"float2 __ovld __cnfn fmax(float2 x, float y);\n"
35384"float3 __ovld __cnfn fmax(float3 x, float y);\n"
35385"float4 __ovld __cnfn fmax(float4 x, float y);\n"
35386"float8 __ovld __cnfn fmax(float8 x, float y);\n"
35387"float16 __ovld __cnfn fmax(float16 x, float y);\n"
35388"#ifdef cl_khr_fp64\n"
35389"double __ovld __cnfn fmax(double x, double y);\n"
35390"double2 __ovld __cnfn fmax(double2 x, double2 y);\n"
35391"double3 __ovld __cnfn fmax(double3 x, double3 y);\n"
35392"double4 __ovld __cnfn fmax(double4 x, double4 y);\n"
35393"double8 __ovld __cnfn fmax(double8 x, double8 y);\n"
35394"double16 __ovld __cnfn fmax(double16 x, double16 y);\n"
35395"double2 __ovld __cnfn fmax(double2 x, double y);\n"
35396"double3 __ovld __cnfn fmax(double3 x, double y);\n"
35397"double4 __ovld __cnfn fmax(double4 x, double y);\n"
35398"double8 __ovld __cnfn fmax(double8 x, double y);\n"
35399"double16 __ovld __cnfn fmax(double16 x, double y);\n"
35400"#endif //cl_khr_fp64\n"
35401"#ifdef cl_khr_fp16\n"
35402"half __ovld __cnfn fmax(half x, half y);\n"
35403"half2 __ovld __cnfn fmax(half2 x, half2 y);\n"
35404"half3 __ovld __cnfn fmax(half3 x, half3 y);\n"
35405"half4 __ovld __cnfn fmax(half4 x, half4 y);\n"
35406"half8 __ovld __cnfn fmax(half8 x, half8 y);\n"
35407"half16 __ovld __cnfn fmax(half16 x, half16 y);\n"
35408"half2 __ovld __cnfn fmax(half2 x, half y);\n"
35409"half3 __ovld __cnfn fmax(half3 x, half y);\n"
35410"half4 __ovld __cnfn fmax(half4 x, half y);\n"
35411"half8 __ovld __cnfn fmax(half8 x, half y);\n"
35412"half16 __ovld __cnfn fmax(half16 x, half y);\n"
35413"#endif //cl_khr_fp16\n"
35414"\n"
35415"/**\n"
35416" * Returns y if y < x, otherwise it returns x. If one\n"
35417" * argument is a NaN, fmin() returns the other\n"
35418" * argument. If both arguments are NaNs, fmin()\n"
35419" * returns a NaN.\n"
35420" */\n"
35421"float __ovld __cnfn fmin(float x, float y);\n"
35422"float2 __ovld __cnfn fmin(float2 x, float2 y);\n"
35423"float3 __ovld __cnfn fmin(float3 x, float3 y);\n"
35424"float4 __ovld __cnfn fmin(float4 x, float4 y);\n"
35425"float8 __ovld __cnfn fmin(float8 x, float8 y);\n"
35426"float16 __ovld __cnfn fmin(float16 x, float16 y);\n"
35427"float2 __ovld __cnfn fmin(float2 x, float y);\n"
35428"float3 __ovld __cnfn fmin(float3 x, float y);\n"
35429"float4 __ovld __cnfn fmin(float4 x, float y);\n"
35430"float8 __ovld __cnfn fmin(float8 x, float y);\n"
35431"float16 __ovld __cnfn fmin(float16 x, float y);\n"
35432"#ifdef cl_khr_fp64\n"
35433"double __ovld __cnfn fmin(double x, double y);\n"
35434"double2 __ovld __cnfn fmin(double2 x, double2 y);\n"
35435"double3 __ovld __cnfn fmin(double3 x, double3 y);\n"
35436"double4 __ovld __cnfn fmin(double4 x, double4 y);\n"
35437"double8 __ovld __cnfn fmin(double8 x, double8 y);\n"
35438"double16 __ovld __cnfn fmin(double16 x, double16 y);\n"
35439"double2 __ovld __cnfn fmin(double2 x, double y);\n"
35440"double3 __ovld __cnfn fmin(double3 x, double y);\n"
35441"double4 __ovld __cnfn fmin(double4 x, double y);\n"
35442"double8 __ovld __cnfn fmin(double8 x, double y);\n"
35443"double16 __ovld __cnfn fmin(double16 x, double y);\n"
35444"#endif //cl_khr_fp64\n"
35445"#ifdef cl_khr_fp16\n"
35446"half __ovld __cnfn fmin(half x, half y);\n"
35447"half2 __ovld __cnfn fmin(half2 x, half2 y);\n"
35448"half3 __ovld __cnfn fmin(half3 x, half3 y);\n"
35449"half4 __ovld __cnfn fmin(half4 x, half4 y);\n"
35450"half8 __ovld __cnfn fmin(half8 x, half8 y);\n"
35451"half16 __ovld __cnfn fmin(half16 x, half16 y);\n"
35452"half2 __ovld __cnfn fmin(half2 x, half y);\n"
35453"half3 __ovld __cnfn fmin(half3 x, half y);\n"
35454"half4 __ovld __cnfn fmin(half4 x, half y);\n"
35455"half8 __ovld __cnfn fmin(half8 x, half y);\n"
35456"half16 __ovld __cnfn fmin(half16 x, half y);\n"
35457"#endif //cl_khr_fp16\n"
35458"\n"
35459"/**\n"
35460" * Modulus. Returns x - y * trunc (x/y).\n"
35461" */\n"
35462"float __ovld __cnfn fmod(float x, float y);\n"
35463"float2 __ovld __cnfn fmod(float2 x, float2 y);\n"
35464"float3 __ovld __cnfn fmod(float3 x, float3 y);\n"
35465"float4 __ovld __cnfn fmod(float4 x, float4 y);\n"
35466"float8 __ovld __cnfn fmod(float8 x, float8 y);\n"
35467"float16 __ovld __cnfn fmod(float16 x, float16 y);\n"
35468"#ifdef cl_khr_fp64\n"
35469"double __ovld __cnfn fmod(double x, double y);\n"
35470"double2 __ovld __cnfn fmod(double2 x, double2 y);\n"
35471"double3 __ovld __cnfn fmod(double3 x, double3 y);\n"
35472"double4 __ovld __cnfn fmod(double4 x, double4 y);\n"
35473"double8 __ovld __cnfn fmod(double8 x, double8 y);\n"
35474"double16 __ovld __cnfn fmod(double16 x, double16 y);\n"
35475"#endif //cl_khr_fp64\n"
35476"#ifdef cl_khr_fp16\n"
35477"half __ovld __cnfn fmod(half x, half y);\n"
35478"half2 __ovld __cnfn fmod(half2 x, half2 y);\n"
35479"half3 __ovld __cnfn fmod(half3 x, half3 y);\n"
35480"half4 __ovld __cnfn fmod(half4 x, half4 y);\n"
35481"half8 __ovld __cnfn fmod(half8 x, half8 y);\n"
35482"half16 __ovld __cnfn fmod(half16 x, half16 y);\n"
35483"#endif //cl_khr_fp16\n"
35484"\n"
35485"/**\n"
35486" * Returns fmin(x - floor (x), 0x1.fffffep-1f ).\n"
35487" * floor(x) is returned in iptr.\n"
35488" */\n"
35489"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35490"float __ovld fract(float x, float *iptr);\n"
35491"float2 __ovld fract(float2 x, float2 *iptr);\n"
35492"float3 __ovld fract(float3 x, float3 *iptr);\n"
35493"float4 __ovld fract(float4 x, float4 *iptr);\n"
35494"float8 __ovld fract(float8 x, float8 *iptr);\n"
35495"float16 __ovld fract(float16 x, float16 *iptr);\n"
35496"#ifdef cl_khr_fp64\n"
35497"double __ovld fract(double x, double *iptr);\n"
35498"double2 __ovld fract(double2 x, double2 *iptr);\n"
35499"double3 __ovld fract(double3 x, double3 *iptr);\n"
35500"double4 __ovld fract(double4 x, double4 *iptr);\n"
35501"double8 __ovld fract(double8 x, double8 *iptr);\n"
35502"double16 __ovld fract(double16 x, double16 *iptr);\n"
35503"#endif //cl_khr_fp64\n"
35504"#ifdef cl_khr_fp16\n"
35505"half __ovld fract(half x, half *iptr);\n"
35506"half2 __ovld fract(half2 x, half2 *iptr);\n"
35507"half3 __ovld fract(half3 x, half3 *iptr);\n"
35508"half4 __ovld fract(half4 x, half4 *iptr);\n"
35509"half8 __ovld fract(half8 x, half8 *iptr);\n"
35510"half16 __ovld fract(half16 x, half16 *iptr);\n"
35511"#endif //cl_khr_fp16\n"
35512"#else\n"
35513"float __ovld fract(float x, __global float *iptr);\n"
35514"float2 __ovld fract(float2 x, __global float2 *iptr);\n"
35515"float3 __ovld fract(float3 x, __global float3 *iptr);\n"
35516"float4 __ovld fract(float4 x, __global float4 *iptr);\n"
35517"float8 __ovld fract(float8 x, __global float8 *iptr);\n"
35518"float16 __ovld fract(float16 x, __global float16 *iptr);\n"
35519"float __ovld fract(float x, __local float *iptr);\n"
35520"float2 __ovld fract(float2 x, __local float2 *iptr);\n"
35521"float3 __ovld fract(float3 x, __local float3 *iptr);\n"
35522"float4 __ovld fract(float4 x, __local float4 *iptr);\n"
35523"float8 __ovld fract(float8 x, __local float8 *iptr);\n"
35524"float16 __ovld fract(float16 x, __local float16 *iptr);\n"
35525"float __ovld fract(float x, __private float *iptr);\n"
35526"float2 __ovld fract(float2 x, __private float2 *iptr);\n"
35527"float3 __ovld fract(float3 x, __private float3 *iptr);\n"
35528"float4 __ovld fract(float4 x, __private float4 *iptr);\n"
35529"float8 __ovld fract(float8 x, __private float8 *iptr);\n"
35530"float16 __ovld fract(float16 x, __private float16 *iptr);\n"
35531"#ifdef cl_khr_fp64\n"
35532"double __ovld fract(double x, __global double *iptr);\n"
35533"double2 __ovld fract(double2 x, __global double2 *iptr);\n"
35534"double3 __ovld fract(double3 x, __global double3 *iptr);\n"
35535"double4 __ovld fract(double4 x, __global double4 *iptr);\n"
35536"double8 __ovld fract(double8 x, __global double8 *iptr);\n"
35537"double16 __ovld fract(double16 x, __global double16 *iptr);\n"
35538"double __ovld fract(double x, __local double *iptr);\n"
35539"double2 __ovld fract(double2 x, __local double2 *iptr);\n"
35540"double3 __ovld fract(double3 x, __local double3 *iptr);\n"
35541"double4 __ovld fract(double4 x, __local double4 *iptr);\n"
35542"double8 __ovld fract(double8 x, __local double8 *iptr);\n"
35543"double16 __ovld fract(double16 x, __local double16 *iptr);\n"
35544"double __ovld fract(double x, __private double *iptr);\n"
35545"double2 __ovld fract(double2 x, __private double2 *iptr);\n"
35546"double3 __ovld fract(double3 x, __private double3 *iptr);\n"
35547"double4 __ovld fract(double4 x, __private double4 *iptr);\n"
35548"double8 __ovld fract(double8 x, __private double8 *iptr);\n"
35549"double16 __ovld fract(double16 x, __private double16 *iptr);\n"
35550"#endif //cl_khr_fp64\n"
35551"#ifdef cl_khr_fp16\n"
35552"half __ovld fract(half x, __global half *iptr);\n"
35553"half2 __ovld fract(half2 x, __global half2 *iptr);\n"
35554"half3 __ovld fract(half3 x, __global half3 *iptr);\n"
35555"half4 __ovld fract(half4 x, __global half4 *iptr);\n"
35556"half8 __ovld fract(half8 x, __global half8 *iptr);\n"
35557"half16 __ovld fract(half16 x, __global half16 *iptr);\n"
35558"half __ovld fract(half x, __local half *iptr);\n"
35559"half2 __ovld fract(half2 x, __local half2 *iptr);\n"
35560"half3 __ovld fract(half3 x, __local half3 *iptr);\n"
35561"half4 __ovld fract(half4 x, __local half4 *iptr);\n"
35562"half8 __ovld fract(half8 x, __local half8 *iptr);\n"
35563"half16 __ovld fract(half16 x, __local half16 *iptr);\n"
35564"half __ovld fract(half x, __private half *iptr);\n"
35565"half2 __ovld fract(half2 x, __private half2 *iptr);\n"
35566"half3 __ovld fract(half3 x, __private half3 *iptr);\n"
35567"half4 __ovld fract(half4 x, __private half4 *iptr);\n"
35568"half8 __ovld fract(half8 x, __private half8 *iptr);\n"
35569"half16 __ovld fract(half16 x, __private half16 *iptr);\n"
35570"#endif //cl_khr_fp16\n"
35571"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35572"\n"
35573"/**\n"
35574" * Extract mantissa and exponent from x. For each\n"
35575" * component the mantissa returned is a float with\n"
35576" * magnitude in the interval [1/2, 1) or 0. Each\n"
35577" * component of x equals mantissa returned * 2^exp.\n"
35578" */\n"
35579"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35580"float __ovld frexp(float x, int *exp);\n"
35581"float2 __ovld frexp(float2 x, int2 *exp);\n"
35582"float3 __ovld frexp(float3 x, int3 *exp);\n"
35583"float4 __ovld frexp(float4 x, int4 *exp);\n"
35584"float8 __ovld frexp(float8 x, int8 *exp);\n"
35585"float16 __ovld frexp(float16 x, int16 *exp);\n"
35586"#ifdef cl_khr_fp64\n"
35587"double __ovld frexp(double x, int *exp);\n"
35588"double2 __ovld frexp(double2 x, int2 *exp);\n"
35589"double3 __ovld frexp(double3 x, int3 *exp);\n"
35590"double4 __ovld frexp(double4 x, int4 *exp);\n"
35591"double8 __ovld frexp(double8 x, int8 *exp);\n"
35592"double16 __ovld frexp(double16 x, int16 *exp);\n"
35593"#endif //cl_khr_fp64\n"
35594"#ifdef cl_khr_fp16\n"
35595"half __ovld frexp(half x, int *exp);\n"
35596"half2 __ovld frexp(half2 x, int2 *exp);\n"
35597"half3 __ovld frexp(half3 x, int3 *exp);\n"
35598"half4 __ovld frexp(half4 x, int4 *exp);\n"
35599"half8 __ovld frexp(half8 x, int8 *exp);\n"
35600"half16 __ovld frexp(half16 x, int16 *exp);\n"
35601"#endif //cl_khr_fp16\n"
35602"#else\n"
35603"float __ovld frexp(float x, __global int *exp);\n"
35604"float2 __ovld frexp(float2 x, __global int2 *exp);\n"
35605"float3 __ovld frexp(float3 x, __global int3 *exp);\n"
35606"float4 __ovld frexp(float4 x, __global int4 *exp);\n"
35607"float8 __ovld frexp(float8 x, __global int8 *exp);\n"
35608"float16 __ovld frexp(float16 x, __global int16 *exp);\n"
35609"float __ovld frexp(float x, __local int *exp);\n"
35610"float2 __ovld frexp(float2 x, __local int2 *exp);\n"
35611"float3 __ovld frexp(float3 x, __local int3 *exp);\n"
35612"float4 __ovld frexp(float4 x, __local int4 *exp);\n"
35613"float8 __ovld frexp(float8 x, __local int8 *exp);\n"
35614"float16 __ovld frexp(float16 x, __local int16 *exp);\n"
35615"float __ovld frexp(float x, __private int *exp);\n"
35616"float2 __ovld frexp(float2 x, __private int2 *exp);\n"
35617"float3 __ovld frexp(float3 x, __private int3 *exp);\n"
35618"float4 __ovld frexp(float4 x, __private int4 *exp);\n"
35619"float8 __ovld frexp(float8 x, __private int8 *exp);\n"
35620"float16 __ovld frexp(float16 x, __private int16 *exp);\n"
35621"#ifdef cl_khr_fp64\n"
35622"double __ovld frexp(double x, __global int *exp);\n"
35623"double2 __ovld frexp(double2 x, __global int2 *exp);\n"
35624"double3 __ovld frexp(double3 x, __global int3 *exp);\n"
35625"double4 __ovld frexp(double4 x, __global int4 *exp);\n"
35626"double8 __ovld frexp(double8 x, __global int8 *exp);\n"
35627"double16 __ovld frexp(double16 x, __global int16 *exp);\n"
35628"double __ovld frexp(double x, __local int *exp);\n"
35629"double2 __ovld frexp(double2 x, __local int2 *exp);\n"
35630"double3 __ovld frexp(double3 x, __local int3 *exp);\n"
35631"double4 __ovld frexp(double4 x, __local int4 *exp);\n"
35632"double8 __ovld frexp(double8 x, __local int8 *exp);\n"
35633"double16 __ovld frexp(double16 x, __local int16 *exp);\n"
35634"double __ovld frexp(double x, __private int *exp);\n"
35635"double2 __ovld frexp(double2 x, __private int2 *exp);\n"
35636"double3 __ovld frexp(double3 x, __private int3 *exp);\n"
35637"double4 __ovld frexp(double4 x, __private int4 *exp);\n"
35638"double8 __ovld frexp(double8 x, __private int8 *exp);\n"
35639"double16 __ovld frexp(double16 x, __private int16 *exp);\n"
35640"#endif //cl_khr_fp64\n"
35641"#ifdef cl_khr_fp16\n"
35642"half __ovld frexp(half x, __global int *exp);\n"
35643"half2 __ovld frexp(half2 x, __global int2 *exp);\n"
35644"half3 __ovld frexp(half3 x, __global int3 *exp);\n"
35645"half4 __ovld frexp(half4 x, __global int4 *exp);\n"
35646"half8 __ovld frexp(half8 x, __global int8 *exp);\n"
35647"half16 __ovld frexp(half16 x, __global int16 *exp);\n"
35648"half __ovld frexp(half x, __local int *exp);\n"
35649"half2 __ovld frexp(half2 x, __local int2 *exp);\n"
35650"half3 __ovld frexp(half3 x, __local int3 *exp);\n"
35651"half4 __ovld frexp(half4 x, __local int4 *exp);\n"
35652"half8 __ovld frexp(half8 x, __local int8 *exp);\n"
35653"half16 __ovld frexp(half16 x, __local int16 *exp);\n"
35654"half __ovld frexp(half x, __private int *exp);\n"
35655"half2 __ovld frexp(half2 x, __private int2 *exp);\n"
35656"half3 __ovld frexp(half3 x, __private int3 *exp);\n"
35657"half4 __ovld frexp(half4 x, __private int4 *exp);\n"
35658"half8 __ovld frexp(half8 x, __private int8 *exp);\n"
35659"half16 __ovld frexp(half16 x, __private int16 *exp);\n"
35660"#endif //cl_khr_fp16\n"
35661"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35662"\n"
35663"/**\n"
35664" * Compute the value of the square root of x^2 + y^2\n"
35665" * without undue overflow or underflow.\n"
35666" */\n"
35667"float __ovld __cnfn hypot(float x, float y);\n"
35668"float2 __ovld __cnfn hypot(float2 x, float2 y);\n"
35669"float3 __ovld __cnfn hypot(float3 x, float3 y);\n"
35670"float4 __ovld __cnfn hypot(float4 x, float4 y);\n"
35671"float8 __ovld __cnfn hypot(float8 x, float8 y);\n"
35672"float16 __ovld __cnfn hypot(float16 x, float16 y);\n"
35673"#ifdef cl_khr_fp64\n"
35674"double __ovld __cnfn hypot(double x, double y);\n"
35675"double2 __ovld __cnfn hypot(double2 x, double2 y);\n"
35676"double3 __ovld __cnfn hypot(double3 x, double3 y);\n"
35677"double4 __ovld __cnfn hypot(double4 x, double4 y);\n"
35678"double8 __ovld __cnfn hypot(double8 x, double8 y);\n"
35679"double16 __ovld __cnfn hypot(double16 x, double16 y);\n"
35680"#endif //cl_khr_fp64\n"
35681"#ifdef cl_khr_fp16\n"
35682"half __ovld __cnfn hypot(half x, half y);\n"
35683"half2 __ovld __cnfn hypot(half2 x, half2 y);\n"
35684"half3 __ovld __cnfn hypot(half3 x, half3 y);\n"
35685"half4 __ovld __cnfn hypot(half4 x, half4 y);\n"
35686"half8 __ovld __cnfn hypot(half8 x, half8 y);\n"
35687"half16 __ovld __cnfn hypot(half16 x, half16 y);\n"
35688"#endif //cl_khr_fp16\n"
35689"\n"
35690"/**\n"
35691" * Return the exponent as an integer value.\n"
35692" */\n"
35693"int __ovld __cnfn ilogb(float x);\n"
35694"int2 __ovld __cnfn ilogb(float2 x);\n"
35695"int3 __ovld __cnfn ilogb(float3 x);\n"
35696"int4 __ovld __cnfn ilogb(float4 x);\n"
35697"int8 __ovld __cnfn ilogb(float8 x);\n"
35698"int16 __ovld __cnfn ilogb(float16 x);\n"
35699"#ifdef cl_khr_fp64\n"
35700"int __ovld __cnfn ilogb(double x);\n"
35701"int2 __ovld __cnfn ilogb(double2 x);\n"
35702"int3 __ovld __cnfn ilogb(double3 x);\n"
35703"int4 __ovld __cnfn ilogb(double4 x);\n"
35704"int8 __ovld __cnfn ilogb(double8 x);\n"
35705"int16 __ovld __cnfn ilogb(double16 x);\n"
35706"#endif //cl_khr_fp64\n"
35707"#ifdef cl_khr_fp16\n"
35708"int __ovld __cnfn ilogb(half x);\n"
35709"int2 __ovld __cnfn ilogb(half2 x);\n"
35710"int3 __ovld __cnfn ilogb(half3 x);\n"
35711"int4 __ovld __cnfn ilogb(half4 x);\n"
35712"int8 __ovld __cnfn ilogb(half8 x);\n"
35713"int16 __ovld __cnfn ilogb(half16 x);\n"
35714"#endif //cl_khr_fp16\n"
35715"\n"
35716"/**\n"
35717" * Multiply x by 2 to the power n.\n"
35718" */\n"
35719"float __ovld __cnfn ldexp(float x, int n);\n"
35720"float2 __ovld __cnfn ldexp(float2 x, int2 n);\n"
35721"float3 __ovld __cnfn ldexp(float3 x, int3 n);\n"
35722"float4 __ovld __cnfn ldexp(float4 x, int4 n);\n"
35723"float8 __ovld __cnfn ldexp(float8 x, int8 n);\n"
35724"float16 __ovld __cnfn ldexp(float16 x, int16 n);\n"
35725"float2 __ovld __cnfn ldexp(float2 x, int n);\n"
35726"float3 __ovld __cnfn ldexp(float3 x, int n);\n"
35727"float4 __ovld __cnfn ldexp(float4 x, int n);\n"
35728"float8 __ovld __cnfn ldexp(float8 x, int n);\n"
35729"float16 __ovld __cnfn ldexp(float16 x, int n);\n"
35730"#ifdef cl_khr_fp64\n"
35731"double __ovld __cnfn ldexp(double x, int n);\n"
35732"double2 __ovld __cnfn ldexp(double2 x, int2 n);\n"
35733"double3 __ovld __cnfn ldexp(double3 x, int3 n);\n"
35734"double4 __ovld __cnfn ldexp(double4 x, int4 n);\n"
35735"double8 __ovld __cnfn ldexp(double8 x, int8 n);\n"
35736"double16 __ovld __cnfn ldexp(double16 x, int16 n);\n"
35737"double2 __ovld __cnfn ldexp(double2 x, int n);\n"
35738"double3 __ovld __cnfn ldexp(double3 x, int n);\n"
35739"double4 __ovld __cnfn ldexp(double4 x, int n);\n"
35740"double8 __ovld __cnfn ldexp(double8 x, int n);\n"
35741"double16 __ovld __cnfn ldexp(double16 x, int n);\n"
35742"#endif //cl_khr_fp64\n"
35743"#ifdef cl_khr_fp16\n"
35744"half __ovld __cnfn ldexp(half x, int n);\n"
35745"half2 __ovld __cnfn ldexp(half2 x, int2 n);\n"
35746"half3 __ovld __cnfn ldexp(half3 x, int3 n);\n"
35747"half4 __ovld __cnfn ldexp(half4 x, int4 n);\n"
35748"half8 __ovld __cnfn ldexp(half8 x, int8 n);\n"
35749"half16 __ovld __cnfn ldexp(half16 x, int16 n);\n"
35750"half2 __ovld __cnfn ldexp(half2 x, int n);\n"
35751"half3 __ovld __cnfn ldexp(half3 x, int n);\n"
35752"half4 __ovld __cnfn ldexp(half4 x, int n);\n"
35753"half8 __ovld __cnfn ldexp(half8 x, int n);\n"
35754"half16 __ovld __cnfn ldexp(half16 x, int n);\n"
35755"#endif //cl_khr_fp16\n"
35756"\n"
35757"/**\n"
35758" * Log gamma function. Returns the natural\n"
35759" * logarithm of the absolute value of the gamma\n"
35760" * function. The sign of the gamma function is\n"
35761" * returned in the signp argument of lgamma_r.\n"
35762" */\n"
35763"float __ovld __cnfn lgamma(float x);\n"
35764"float2 __ovld __cnfn lgamma(float2 x);\n"
35765"float3 __ovld __cnfn lgamma(float3 x);\n"
35766"float4 __ovld __cnfn lgamma(float4 x);\n"
35767"float8 __ovld __cnfn lgamma(float8 x);\n"
35768"float16 __ovld __cnfn lgamma(float16 x);\n"
35769"#ifdef cl_khr_fp64\n"
35770"double __ovld __cnfn lgamma(double x);\n"
35771"double2 __ovld __cnfn lgamma(double2 x);\n"
35772"double3 __ovld __cnfn lgamma(double3 x);\n"
35773"double4 __ovld __cnfn lgamma(double4 x);\n"
35774"double8 __ovld __cnfn lgamma(double8 x);\n"
35775"double16 __ovld __cnfn lgamma(double16 x);\n"
35776"#endif //cl_khr_fp64\n"
35777"#ifdef cl_khr_fp16\n"
35778"half __ovld __cnfn lgamma(half x);\n"
35779"half2 __ovld __cnfn lgamma(half2 x);\n"
35780"half3 __ovld __cnfn lgamma(half3 x);\n"
35781"half4 __ovld __cnfn lgamma(half4 x);\n"
35782"half8 __ovld __cnfn lgamma(half8 x);\n"
35783"half16 __ovld __cnfn lgamma(half16 x);\n"
35784"#endif //cl_khr_fp16\n"
35785"\n"
35786"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35787"float __ovld lgamma_r(float x, int *signp);\n"
35788"float2 __ovld lgamma_r(float2 x, int2 *signp);\n"
35789"float3 __ovld lgamma_r(float3 x, int3 *signp);\n"
35790"float4 __ovld lgamma_r(float4 x, int4 *signp);\n"
35791"float8 __ovld lgamma_r(float8 x, int8 *signp);\n"
35792"float16 __ovld lgamma_r(float16 x, int16 *signp);\n"
35793"#ifdef cl_khr_fp64\n"
35794"double __ovld lgamma_r(double x, int *signp);\n"
35795"double2 __ovld lgamma_r(double2 x, int2 *signp);\n"
35796"double3 __ovld lgamma_r(double3 x, int3 *signp);\n"
35797"double4 __ovld lgamma_r(double4 x, int4 *signp);\n"
35798"double8 __ovld lgamma_r(double8 x, int8 *signp);\n"
35799"double16 __ovld lgamma_r(double16 x, int16 *signp);\n"
35800"#endif //cl_khr_fp64\n"
35801"#ifdef cl_khr_fp16\n"
35802"half __ovld lgamma_r(half x, int *signp);\n"
35803"half2 __ovld lgamma_r(half2 x, int2 *signp);\n"
35804"half3 __ovld lgamma_r(half3 x, int3 *signp);\n"
35805"half4 __ovld lgamma_r(half4 x, int4 *signp);\n"
35806"half8 __ovld lgamma_r(half8 x, int8 *signp);\n"
35807"half16 __ovld lgamma_r(half16 x, int16 *signp);\n"
35808"#endif //cl_khr_fp16\n"
35809"#else\n"
35810"float __ovld lgamma_r(float x, __global int *signp);\n"
35811"float2 __ovld lgamma_r(float2 x, __global int2 *signp);\n"
35812"float3 __ovld lgamma_r(float3 x, __global int3 *signp);\n"
35813"float4 __ovld lgamma_r(float4 x, __global int4 *signp);\n"
35814"float8 __ovld lgamma_r(float8 x, __global int8 *signp);\n"
35815"float16 __ovld lgamma_r(float16 x, __global int16 *signp);\n"
35816"float __ovld lgamma_r(float x, __local int *signp);\n"
35817"float2 __ovld lgamma_r(float2 x, __local int2 *signp);\n"
35818"float3 __ovld lgamma_r(float3 x, __local int3 *signp);\n"
35819"float4 __ovld lgamma_r(float4 x, __local int4 *signp);\n"
35820"float8 __ovld lgamma_r(float8 x, __local int8 *signp);\n"
35821"float16 __ovld lgamma_r(float16 x, __local int16 *signp);\n"
35822"float __ovld lgamma_r(float x, __private int *signp);\n"
35823"float2 __ovld lgamma_r(float2 x, __private int2 *signp);\n"
35824"float3 __ovld lgamma_r(float3 x, __private int3 *signp);\n"
35825"float4 __ovld lgamma_r(float4 x, __private int4 *signp);\n"
35826"float8 __ovld lgamma_r(float8 x, __private int8 *signp);\n"
35827"float16 __ovld lgamma_r(float16 x, __private int16 *signp);\n"
35828"#ifdef cl_khr_fp64\n"
35829"double __ovld lgamma_r(double x, __global int *signp);\n"
35830"double2 __ovld lgamma_r(double2 x, __global int2 *signp);\n"
35831"double3 __ovld lgamma_r(double3 x, __global int3 *signp);\n"
35832"double4 __ovld lgamma_r(double4 x, __global int4 *signp);\n"
35833"double8 __ovld lgamma_r(double8 x, __global int8 *signp);\n"
35834"double16 __ovld lgamma_r(double16 x, __global int16 *signp);\n"
35835"double __ovld lgamma_r(double x, __local int *signp);\n"
35836"double2 __ovld lgamma_r(double2 x, __local int2 *signp);\n"
35837"double3 __ovld lgamma_r(double3 x, __local int3 *signp);\n"
35838"double4 __ovld lgamma_r(double4 x, __local int4 *signp);\n"
35839"double8 __ovld lgamma_r(double8 x, __local int8 *signp);\n"
35840"double16 __ovld lgamma_r(double16 x, __local int16 *signp);\n"
35841"double __ovld lgamma_r(double x, __private int *signp);\n"
35842"double2 __ovld lgamma_r(double2 x, __private int2 *signp);\n"
35843"double3 __ovld lgamma_r(double3 x, __private int3 *signp);\n"
35844"double4 __ovld lgamma_r(double4 x, __private int4 *signp);\n"
35845"double8 __ovld lgamma_r(double8 x, __private int8 *signp);\n"
35846"double16 __ovld lgamma_r(double16 x, __private int16 *signp);\n"
35847"#endif //cl_khr_fp64\n"
35848"#ifdef cl_khr_fp16\n"
35849"half __ovld lgamma_r(half x, __global int *signp);\n"
35850"half2 __ovld lgamma_r(half2 x, __global int2 *signp);\n"
35851"half3 __ovld lgamma_r(half3 x, __global int3 *signp);\n"
35852"half4 __ovld lgamma_r(half4 x, __global int4 *signp);\n"
35853"half8 __ovld lgamma_r(half8 x, __global int8 *signp);\n"
35854"half16 __ovld lgamma_r(half16 x, __global int16 *signp);\n"
35855"half __ovld lgamma_r(half x, __local int *signp);\n"
35856"half2 __ovld lgamma_r(half2 x, __local int2 *signp);\n"
35857"half3 __ovld lgamma_r(half3 x, __local int3 *signp);\n"
35858"half4 __ovld lgamma_r(half4 x, __local int4 *signp);\n"
35859"half8 __ovld lgamma_r(half8 x, __local int8 *signp);\n"
35860"half16 __ovld lgamma_r(half16 x, __local int16 *signp);\n"
35861"half __ovld lgamma_r(half x, __private int *signp);\n"
35862"half2 __ovld lgamma_r(half2 x, __private int2 *signp);\n"
35863"half3 __ovld lgamma_r(half3 x, __private int3 *signp);\n"
35864"half4 __ovld lgamma_r(half4 x, __private int4 *signp);\n"
35865"half8 __ovld lgamma_r(half8 x, __private int8 *signp);\n"
35866"half16 __ovld lgamma_r(half16 x, __private int16 *signp);\n"
35867"#endif //cl_khr_fp16\n"
35868"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35869"\n"
35870"/**\n"
35871" * Compute natural logarithm.\n"
35872" */\n"
35873"float __ovld __cnfn log(float);\n"
35874"float2 __ovld __cnfn log(float2);\n"
35875"float3 __ovld __cnfn log(float3);\n"
35876"float4 __ovld __cnfn log(float4);\n"
35877"float8 __ovld __cnfn log(float8);\n"
35878"float16 __ovld __cnfn log(float16);\n"
35879"#ifdef cl_khr_fp64\n"
35880"double __ovld __cnfn log(double);\n"
35881"double2 __ovld __cnfn log(double2);\n"
35882"double3 __ovld __cnfn log(double3);\n"
35883"double4 __ovld __cnfn log(double4);\n"
35884"double8 __ovld __cnfn log(double8);\n"
35885"double16 __ovld __cnfn log(double16);\n"
35886"#endif //cl_khr_fp64\n"
35887"#ifdef cl_khr_fp16\n"
35888"half __ovld __cnfn log(half);\n"
35889"half2 __ovld __cnfn log(half2);\n"
35890"half3 __ovld __cnfn log(half3);\n"
35891"half4 __ovld __cnfn log(half4);\n"
35892"half8 __ovld __cnfn log(half8);\n"
35893"half16 __ovld __cnfn log(half16);\n"
35894"#endif //cl_khr_fp16\n"
35895"\n"
35896"/**\n"
35897" * Compute a base 2 logarithm.\n"
35898" */\n"
35899"float __ovld __cnfn log2(float);\n"
35900"float2 __ovld __cnfn log2(float2);\n"
35901"float3 __ovld __cnfn log2(float3);\n"
35902"float4 __ovld __cnfn log2(float4);\n"
35903"float8 __ovld __cnfn log2(float8);\n"
35904"float16 __ovld __cnfn log2(float16);\n"
35905"#ifdef cl_khr_fp64\n"
35906"double __ovld __cnfn log2(double);\n"
35907"double2 __ovld __cnfn log2(double2);\n"
35908"double3 __ovld __cnfn log2(double3);\n"
35909"double4 __ovld __cnfn log2(double4);\n"
35910"double8 __ovld __cnfn log2(double8);\n"
35911"double16 __ovld __cnfn log2(double16);\n"
35912"#endif //cl_khr_fp64\n"
35913"#ifdef cl_khr_fp16\n"
35914"half __ovld __cnfn log2(half);\n"
35915"half2 __ovld __cnfn log2(half2);\n"
35916"half3 __ovld __cnfn log2(half3);\n"
35917"half4 __ovld __cnfn log2(half4);\n"
35918"half8 __ovld __cnfn log2(half8);\n"
35919"half16 __ovld __cnfn log2(half16);\n"
35920"#endif //cl_khr_fp16\n"
35921"\n"
35922"/**\n"
35923" * Compute a base 10 logarithm.\n"
35924" */\n"
35925"float __ovld __cnfn log10(float);\n"
35926"float2 __ovld __cnfn log10(float2);\n"
35927"float3 __ovld __cnfn log10(float3);\n"
35928"float4 __ovld __cnfn log10(float4);\n"
35929"float8 __ovld __cnfn log10(float8);\n"
35930"float16 __ovld __cnfn log10(float16);\n"
35931"#ifdef cl_khr_fp64\n"
35932"double __ovld __cnfn log10(double);\n"
35933"double2 __ovld __cnfn log10(double2);\n"
35934"double3 __ovld __cnfn log10(double3);\n"
35935"double4 __ovld __cnfn log10(double4);\n"
35936"double8 __ovld __cnfn log10(double8);\n"
35937"double16 __ovld __cnfn log10(double16);\n"
35938"#endif //cl_khr_fp64\n"
35939"#ifdef cl_khr_fp16\n"
35940"half __ovld __cnfn log10(half);\n"
35941"half2 __ovld __cnfn log10(half2);\n"
35942"half3 __ovld __cnfn log10(half3);\n"
35943"half4 __ovld __cnfn log10(half4);\n"
35944"half8 __ovld __cnfn log10(half8);\n"
35945"half16 __ovld __cnfn log10(half16);\n"
35946"#endif //cl_khr_fp16\n"
35947"\n"
35948"/**\n"
35949" * Compute a base e logarithm of (1.0 + x).\n"
35950" */\n"
35951"float __ovld __cnfn log1p(float x);\n"
35952"float2 __ovld __cnfn log1p(float2 x);\n"
35953"float3 __ovld __cnfn log1p(float3 x);\n"
35954"float4 __ovld __cnfn log1p(float4 x);\n"
35955"float8 __ovld __cnfn log1p(float8 x);\n"
35956"float16 __ovld __cnfn log1p(float16 x);\n"
35957"#ifdef cl_khr_fp64\n"
35958"double __ovld __cnfn log1p(double x);\n"
35959"double2 __ovld __cnfn log1p(double2 x);\n"
35960"double3 __ovld __cnfn log1p(double3 x);\n"
35961"double4 __ovld __cnfn log1p(double4 x);\n"
35962"double8 __ovld __cnfn log1p(double8 x);\n"
35963"double16 __ovld __cnfn log1p(double16 x);\n"
35964"#endif //cl_khr_fp64\n"
35965"#ifdef cl_khr_fp16\n"
35966"half __ovld __cnfn log1p(half x);\n"
35967"half2 __ovld __cnfn log1p(half2 x);\n"
35968"half3 __ovld __cnfn log1p(half3 x);\n"
35969"half4 __ovld __cnfn log1p(half4 x);\n"
35970"half8 __ovld __cnfn log1p(half8 x);\n"
35971"half16 __ovld __cnfn log1p(half16 x);\n"
35972"#endif //cl_khr_fp16\n"
35973"\n"
35974"/**\n"
35975" * Compute the exponent of x, which is the integral\n"
35976" * part of logr | x |.\n"
35977" */\n"
35978"float __ovld __cnfn logb(float x);\n"
35979"float2 __ovld __cnfn logb(float2 x);\n"
35980"float3 __ovld __cnfn logb(float3 x);\n"
35981"float4 __ovld __cnfn logb(float4 x);\n"
35982"float8 __ovld __cnfn logb(float8 x);\n"
35983"float16 __ovld __cnfn logb(float16 x);\n"
35984"#ifdef cl_khr_fp64\n"
35985"double __ovld __cnfn logb(double x);\n"
35986"double2 __ovld __cnfn logb(double2 x);\n"
35987"double3 __ovld __cnfn logb(double3 x);\n"
35988"double4 __ovld __cnfn logb(double4 x);\n"
35989"double8 __ovld __cnfn logb(double8 x);\n"
35990"double16 __ovld __cnfn logb(double16 x);\n"
35991"#endif //cl_khr_fp64\n"
35992"#ifdef cl_khr_fp16\n"
35993"half __ovld __cnfn logb(half x);\n"
35994"half2 __ovld __cnfn logb(half2 x);\n"
35995"half3 __ovld __cnfn logb(half3 x);\n"
35996"half4 __ovld __cnfn logb(half4 x);\n"
35997"half8 __ovld __cnfn logb(half8 x);\n"
35998"half16 __ovld __cnfn logb(half16 x);\n"
35999"#endif //cl_khr_fp16\n"
36000"\n"
36001"/**\n"
36002" * mad approximates a * b + c. Whether or how the\n"
36003" * product of a * b is rounded and how supernormal or\n"
36004" * subnormal intermediate products are handled is not\n"
36005" * defined. mad is intended to be used where speed is\n"
36006" * preferred over accuracy.\n"
36007" */\n"
36008"float __ovld __cnfn mad(float a, float b, float c);\n"
36009"float2 __ovld __cnfn mad(float2 a, float2 b, float2 c);\n"
36010"float3 __ovld __cnfn mad(float3 a, float3 b, float3 c);\n"
36011"float4 __ovld __cnfn mad(float4 a, float4 b, float4 c);\n"
36012"float8 __ovld __cnfn mad(float8 a, float8 b, float8 c);\n"
36013"float16 __ovld __cnfn mad(float16 a, float16 b, float16 c);\n"
36014"#ifdef cl_khr_fp64\n"
36015"double __ovld __cnfn mad(double a, double b, double c);\n"
36016"double2 __ovld __cnfn mad(double2 a, double2 b, double2 c);\n"
36017"double3 __ovld __cnfn mad(double3 a, double3 b, double3 c);\n"
36018"double4 __ovld __cnfn mad(double4 a, double4 b, double4 c);\n"
36019"double8 __ovld __cnfn mad(double8 a, double8 b, double8 c);\n"
36020"double16 __ovld __cnfn mad(double16 a, double16 b, double16 c);\n"
36021"#endif //cl_khr_fp64\n"
36022"#ifdef cl_khr_fp16\n"
36023"half __ovld __cnfn mad(half a, half b, half c);\n"
36024"half2 __ovld __cnfn mad(half2 a, half2 b, half2 c);\n"
36025"half3 __ovld __cnfn mad(half3 a, half3 b, half3 c);\n"
36026"half4 __ovld __cnfn mad(half4 a, half4 b, half4 c);\n"
36027"half8 __ovld __cnfn mad(half8 a, half8 b, half8 c);\n"
36028"half16 __ovld __cnfn mad(half16 a, half16 b, half16 c);\n"
36029"#endif //cl_khr_fp16\n"
36030"\n"
36031"/**\n"
36032" * Returns x if | x | > | y |, y if | y | > | x |, otherwise\n"
36033" * fmax(x, y).\n"
36034" */\n"
36035"float __ovld __cnfn maxmag(float x, float y);\n"
36036"float2 __ovld __cnfn maxmag(float2 x, float2 y);\n"
36037"float3 __ovld __cnfn maxmag(float3 x, float3 y);\n"
36038"float4 __ovld __cnfn maxmag(float4 x, float4 y);\n"
36039"float8 __ovld __cnfn maxmag(float8 x, float8 y);\n"
36040"float16 __ovld __cnfn maxmag(float16 x, float16 y);\n"
36041"#ifdef cl_khr_fp64\n"
36042"double __ovld __cnfn maxmag(double x, double y);\n"
36043"double2 __ovld __cnfn maxmag(double2 x, double2 y);\n"
36044"double3 __ovld __cnfn maxmag(double3 x, double3 y);\n"
36045"double4 __ovld __cnfn maxmag(double4 x, double4 y);\n"
36046"double8 __ovld __cnfn maxmag(double8 x, double8 y);\n"
36047"double16 __ovld __cnfn maxmag(double16 x, double16 y);\n"
36048"#endif //cl_khr_fp64\n"
36049"#ifdef cl_khr_fp16\n"
36050"half __ovld __cnfn maxmag(half x, half y);\n"
36051"half2 __ovld __cnfn maxmag(half2 x, half2 y);\n"
36052"half3 __ovld __cnfn maxmag(half3 x, half3 y);\n"
36053"half4 __ovld __cnfn maxmag(half4 x, half4 y);\n"
36054"half8 __ovld __cnfn maxmag(half8 x, half8 y);\n"
36055"half16 __ovld __cnfn maxmag(half16 x, half16 y);\n"
36056"#endif //cl_khr_fp16\n"
36057"\n"
36058"/**\n"
36059" * Returns x if | x | < | y |, y if | y | < | x |, otherwise\n"
36060" * fmin(x, y).\n"
36061" */\n"
36062"float __ovld __cnfn minmag(float x, float y);\n"
36063"float2 __ovld __cnfn minmag(float2 x, float2 y);\n"
36064"float3 __ovld __cnfn minmag(float3 x, float3 y);\n"
36065"float4 __ovld __cnfn minmag(float4 x, float4 y);\n"
36066"float8 __ovld __cnfn minmag(float8 x, float8 y);\n"
36067"float16 __ovld __cnfn minmag(float16 x, float16 y);\n"
36068"#ifdef cl_khr_fp64\n"
36069"double __ovld __cnfn minmag(double x, double y);\n"
36070"double2 __ovld __cnfn minmag(double2 x, double2 y);\n"
36071"double3 __ovld __cnfn minmag(double3 x, double3 y);\n"
36072"double4 __ovld __cnfn minmag(double4 x, double4 y);\n"
36073"double8 __ovld __cnfn minmag(double8 x, double8 y);\n"
36074"double16 __ovld __cnfn minmag(double16 x, double16 y);\n"
36075"#endif //cl_khr_fp64\n"
36076"#ifdef cl_khr_fp16\n"
36077"half __ovld __cnfn minmag(half x, half y);\n"
36078"half2 __ovld __cnfn minmag(half2 x, half2 y);\n"
36079"half3 __ovld __cnfn minmag(half3 x, half3 y);\n"
36080"half4 __ovld __cnfn minmag(half4 x, half4 y);\n"
36081"half8 __ovld __cnfn minmag(half8 x, half8 y);\n"
36082"half16 __ovld __cnfn minmag(half16 x, half16 y);\n"
36083"#endif //cl_khr_fp16\n"
36084"\n"
36085"/**\n"
36086" * Decompose a floating-point number. The modf\n"
36087" * function breaks the argument x into integral and\n"
36088" * fractional parts, each of which has the same sign as\n"
36089" * the argument. It stores the integral part in the object\n"
36090" * pointed to by iptr.\n"
36091" */\n"
36092"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36093"float __ovld modf(float x, float *iptr);\n"
36094"float2 __ovld modf(float2 x, float2 *iptr);\n"
36095"float3 __ovld modf(float3 x, float3 *iptr);\n"
36096"float4 __ovld modf(float4 x, float4 *iptr);\n"
36097"float8 __ovld modf(float8 x, float8 *iptr);\n"
36098"float16 __ovld modf(float16 x, float16 *iptr);\n"
36099"#ifdef cl_khr_fp64\n"
36100"double __ovld modf(double x, double *iptr);\n"
36101"double2 __ovld modf(double2 x, double2 *iptr);\n"
36102"double3 __ovld modf(double3 x, double3 *iptr);\n"
36103"double4 __ovld modf(double4 x, double4 *iptr);\n"
36104"double8 __ovld modf(double8 x, double8 *iptr);\n"
36105"double16 __ovld modf(double16 x, double16 *iptr);\n"
36106"#endif //cl_khr_fp64\n"
36107"#ifdef cl_khr_fp16\n"
36108"half __ovld modf(half x, half *iptr);\n"
36109"half2 __ovld modf(half2 x, half2 *iptr);\n"
36110"half3 __ovld modf(half3 x, half3 *iptr);\n"
36111"half4 __ovld modf(half4 x, half4 *iptr);\n"
36112"half8 __ovld modf(half8 x, half8 *iptr);\n"
36113"half16 __ovld modf(half16 x, half16 *iptr);\n"
36114"#endif //cl_khr_fp16\n"
36115"#else\n"
36116"float __ovld modf(float x, __global float *iptr);\n"
36117"float2 __ovld modf(float2 x, __global float2 *iptr);\n"
36118"float3 __ovld modf(float3 x, __global float3 *iptr);\n"
36119"float4 __ovld modf(float4 x, __global float4 *iptr);\n"
36120"float8 __ovld modf(float8 x, __global float8 *iptr);\n"
36121"float16 __ovld modf(float16 x, __global float16 *iptr);\n"
36122"float __ovld modf(float x, __local float *iptr);\n"
36123"float2 __ovld modf(float2 x, __local float2 *iptr);\n"
36124"float3 __ovld modf(float3 x, __local float3 *iptr);\n"
36125"float4 __ovld modf(float4 x, __local float4 *iptr);\n"
36126"float8 __ovld modf(float8 x, __local float8 *iptr);\n"
36127"float16 __ovld modf(float16 x, __local float16 *iptr);\n"
36128"float __ovld modf(float x, __private float *iptr);\n"
36129"float2 __ovld modf(float2 x, __private float2 *iptr);\n"
36130"float3 __ovld modf(float3 x, __private float3 *iptr);\n"
36131"float4 __ovld modf(float4 x, __private float4 *iptr);\n"
36132"float8 __ovld modf(float8 x, __private float8 *iptr);\n"
36133"float16 __ovld modf(float16 x, __private float16 *iptr);\n"
36134"#ifdef cl_khr_fp64\n"
36135"double __ovld modf(double x, __global double *iptr);\n"
36136"double2 __ovld modf(double2 x, __global double2 *iptr);\n"
36137"double3 __ovld modf(double3 x, __global double3 *iptr);\n"
36138"double4 __ovld modf(double4 x, __global double4 *iptr);\n"
36139"double8 __ovld modf(double8 x, __global double8 *iptr);\n"
36140"double16 __ovld modf(double16 x, __global double16 *iptr);\n"
36141"double __ovld modf(double x, __local double *iptr);\n"
36142"double2 __ovld modf(double2 x, __local double2 *iptr);\n"
36143"double3 __ovld modf(double3 x, __local double3 *iptr);\n"
36144"double4 __ovld modf(double4 x, __local double4 *iptr);\n"
36145"double8 __ovld modf(double8 x, __local double8 *iptr);\n"
36146"double16 __ovld modf(double16 x, __local double16 *iptr);\n"
36147"double __ovld modf(double x, __private double *iptr);\n"
36148"double2 __ovld modf(double2 x, __private double2 *iptr);\n"
36149"double3 __ovld modf(double3 x, __private double3 *iptr);\n"
36150"double4 __ovld modf(double4 x, __private double4 *iptr);\n"
36151"double8 __ovld modf(double8 x, __private double8 *iptr);\n"
36152"double16 __ovld modf(double16 x, __private double16 *iptr);\n"
36153"#endif //cl_khr_fp64\n"
36154"#ifdef cl_khr_fp16\n"
36155"half __ovld modf(half x, __global half *iptr);\n"
36156"half2 __ovld modf(half2 x, __global half2 *iptr);\n"
36157"half3 __ovld modf(half3 x, __global half3 *iptr);\n"
36158"half4 __ovld modf(half4 x, __global half4 *iptr);\n"
36159"half8 __ovld modf(half8 x, __global half8 *iptr);\n"
36160"half16 __ovld modf(half16 x, __global half16 *iptr);\n"
36161"half __ovld modf(half x, __local half *iptr);\n"
36162"half2 __ovld modf(half2 x, __local half2 *iptr);\n"
36163"half3 __ovld modf(half3 x, __local half3 *iptr);\n"
36164"half4 __ovld modf(half4 x, __local half4 *iptr);\n"
36165"half8 __ovld modf(half8 x, __local half8 *iptr);\n"
36166"half16 __ovld modf(half16 x, __local half16 *iptr);\n"
36167"half __ovld modf(half x, __private half *iptr);\n"
36168"half2 __ovld modf(half2 x, __private half2 *iptr);\n"
36169"half3 __ovld modf(half3 x, __private half3 *iptr);\n"
36170"half4 __ovld modf(half4 x, __private half4 *iptr);\n"
36171"half8 __ovld modf(half8 x, __private half8 *iptr);\n"
36172"half16 __ovld modf(half16 x, __private half16 *iptr);\n"
36173"#endif //cl_khr_fp16\n"
36174"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36175"\n"
36176"/**\n"
36177" * Returns a quiet NaN. The nancode may be placed\n"
36178" * in the significand of the resulting NaN.\n"
36179" */\n"
36180"float __ovld __cnfn nan(uint nancode);\n"
36181"float2 __ovld __cnfn nan(uint2 nancode);\n"
36182"float3 __ovld __cnfn nan(uint3 nancode);\n"
36183"float4 __ovld __cnfn nan(uint4 nancode);\n"
36184"float8 __ovld __cnfn nan(uint8 nancode);\n"
36185"float16 __ovld __cnfn nan(uint16 nancode);\n"
36186"#ifdef cl_khr_fp64\n"
36187"double __ovld __cnfn nan(ulong nancode);\n"
36188"double2 __ovld __cnfn nan(ulong2 nancode);\n"
36189"double3 __ovld __cnfn nan(ulong3 nancode);\n"
36190"double4 __ovld __cnfn nan(ulong4 nancode);\n"
36191"double8 __ovld __cnfn nan(ulong8 nancode);\n"
36192"double16 __ovld __cnfn nan(ulong16 nancode);\n"
36193"#endif //cl_khr_fp64\n"
36194"#ifdef cl_khr_fp16\n"
36195"half __ovld __cnfn nan(ushort nancode);\n"
36196"half2 __ovld __cnfn nan(ushort2 nancode);\n"
36197"half3 __ovld __cnfn nan(ushort3 nancode);\n"
36198"half4 __ovld __cnfn nan(ushort4 nancode);\n"
36199"half8 __ovld __cnfn nan(ushort8 nancode);\n"
36200"half16 __ovld __cnfn nan(ushort16 nancode);\n"
36201"#endif //cl_khr_fp16\n"
36202"\n"
36203"/**\n"
36204" * Computes the next representable single-precision\n"
36205" * floating-point value following x in the direction of\n"
36206" * y. Thus, if y is less than x, nextafter() returns the\n"
36207" * largest representable floating-point number less\n"
36208" * than x.\n"
36209" */\n"
36210"float __ovld __cnfn nextafter(float x, float y);\n"
36211"float2 __ovld __cnfn nextafter(float2 x, float2 y);\n"
36212"float3 __ovld __cnfn nextafter(float3 x, float3 y);\n"
36213"float4 __ovld __cnfn nextafter(float4 x, float4 y);\n"
36214"float8 __ovld __cnfn nextafter(float8 x, float8 y);\n"
36215"float16 __ovld __cnfn nextafter(float16 x, float16 y);\n"
36216"#ifdef cl_khr_fp64\n"
36217"double __ovld __cnfn nextafter(double x, double y);\n"
36218"double2 __ovld __cnfn nextafter(double2 x, double2 y);\n"
36219"double3 __ovld __cnfn nextafter(double3 x, double3 y);\n"
36220"double4 __ovld __cnfn nextafter(double4 x, double4 y);\n"
36221"double8 __ovld __cnfn nextafter(double8 x, double8 y);\n"
36222"double16 __ovld __cnfn nextafter(double16 x, double16 y);\n"
36223"#endif //cl_khr_fp64\n"
36224"#ifdef cl_khr_fp16\n"
36225"half __ovld __cnfn nextafter(half x, half y);\n"
36226"half2 __ovld __cnfn nextafter(half2 x, half2 y);\n"
36227"half3 __ovld __cnfn nextafter(half3 x, half3 y);\n"
36228"half4 __ovld __cnfn nextafter(half4 x, half4 y);\n"
36229"half8 __ovld __cnfn nextafter(half8 x, half8 y);\n"
36230"half16 __ovld __cnfn nextafter(half16 x, half16 y);\n"
36231"#endif //cl_khr_fp16\n"
36232"\n"
36233"/**\n"
36234" * Compute x to the power y.\n"
36235" */\n"
36236"float __ovld __cnfn pow(float x, float y);\n"
36237"float2 __ovld __cnfn pow(float2 x, float2 y);\n"
36238"float3 __ovld __cnfn pow(float3 x, float3 y);\n"
36239"float4 __ovld __cnfn pow(float4 x, float4 y);\n"
36240"float8 __ovld __cnfn pow(float8 x, float8 y);\n"
36241"float16 __ovld __cnfn pow(float16 x, float16 y);\n"
36242"#ifdef cl_khr_fp64\n"
36243"double __ovld __cnfn pow(double x, double y);\n"
36244"double2 __ovld __cnfn pow(double2 x, double2 y);\n"
36245"double3 __ovld __cnfn pow(double3 x, double3 y);\n"
36246"double4 __ovld __cnfn pow(double4 x, double4 y);\n"
36247"double8 __ovld __cnfn pow(double8 x, double8 y);\n"
36248"double16 __ovld __cnfn pow(double16 x, double16 y);\n"
36249"#endif //cl_khr_fp64\n"
36250"#ifdef cl_khr_fp16\n"
36251"half __ovld __cnfn pow(half x, half y);\n"
36252"half2 __ovld __cnfn pow(half2 x, half2 y);\n"
36253"half3 __ovld __cnfn pow(half3 x, half3 y);\n"
36254"half4 __ovld __cnfn pow(half4 x, half4 y);\n"
36255"half8 __ovld __cnfn pow(half8 x, half8 y);\n"
36256"half16 __ovld __cnfn pow(half16 x, half16 y);\n"
36257"#endif //cl_khr_fp16\n"
36258"\n"
36259"/**\n"
36260" * Compute x to the power y, where y is an integer.\n"
36261" */\n"
36262"float __ovld __cnfn pown(float x, int y);\n"
36263"float2 __ovld __cnfn pown(float2 x, int2 y);\n"
36264"float3 __ovld __cnfn pown(float3 x, int3 y);\n"
36265"float4 __ovld __cnfn pown(float4 x, int4 y);\n"
36266"float8 __ovld __cnfn pown(float8 x, int8 y);\n"
36267"float16 __ovld __cnfn pown(float16 x, int16 y);\n"
36268"#ifdef cl_khr_fp64\n"
36269"double __ovld __cnfn pown(double x, int y);\n"
36270"double2 __ovld __cnfn pown(double2 x, int2 y);\n"
36271"double3 __ovld __cnfn pown(double3 x, int3 y);\n"
36272"double4 __ovld __cnfn pown(double4 x, int4 y);\n"
36273"double8 __ovld __cnfn pown(double8 x, int8 y);\n"
36274"double16 __ovld __cnfn pown(double16 x, int16 y);\n"
36275"#endif //cl_khr_fp64\n"
36276"#ifdef cl_khr_fp16\n"
36277"half __ovld __cnfn pown(half x, int y);\n"
36278"half2 __ovld __cnfn pown(half2 x, int2 y);\n"
36279"half3 __ovld __cnfn pown(half3 x, int3 y);\n"
36280"half4 __ovld __cnfn pown(half4 x, int4 y);\n"
36281"half8 __ovld __cnfn pown(half8 x, int8 y);\n"
36282"half16 __ovld __cnfn pown(half16 x, int16 y);\n"
36283"#endif //cl_khr_fp16\n"
36284"\n"
36285"/**\n"
36286" * Compute x to the power y, where x is >= 0.\n"
36287" */\n"
36288"float __ovld __cnfn powr(float x, float y);\n"
36289"float2 __ovld __cnfn powr(float2 x, float2 y);\n"
36290"float3 __ovld __cnfn powr(float3 x, float3 y);\n"
36291"float4 __ovld __cnfn powr(float4 x, float4 y);\n"
36292"float8 __ovld __cnfn powr(float8 x, float8 y);\n"
36293"float16 __ovld __cnfn powr(float16 x, float16 y);\n"
36294"#ifdef cl_khr_fp64\n"
36295"double __ovld __cnfn powr(double x, double y);\n"
36296"double2 __ovld __cnfn powr(double2 x, double2 y);\n"
36297"double3 __ovld __cnfn powr(double3 x, double3 y);\n"
36298"double4 __ovld __cnfn powr(double4 x, double4 y);\n"
36299"double8 __ovld __cnfn powr(double8 x, double8 y);\n"
36300"double16 __ovld __cnfn powr(double16 x, double16 y);\n"
36301"#endif //cl_khr_fp64\n"
36302"#ifdef cl_khr_fp16\n"
36303"half __ovld __cnfn powr(half x, half y);\n"
36304"half2 __ovld __cnfn powr(half2 x, half2 y);\n"
36305"half3 __ovld __cnfn powr(half3 x, half3 y);\n"
36306"half4 __ovld __cnfn powr(half4 x, half4 y);\n"
36307"half8 __ovld __cnfn powr(half8 x, half8 y);\n"
36308"half16 __ovld __cnfn powr(half16 x, half16 y);\n"
36309"#endif //cl_khr_fp16\n"
36310"\n"
36311"/**\n"
36312" * Compute the value r such that r = x - n*y, where n\n"
36313" * is the integer nearest the exact value of x/y. If there\n"
36314" * are two integers closest to x/y, n shall be the even\n"
36315" * one. If r is zero, it is given the same sign as x.\n"
36316" */\n"
36317"float __ovld __cnfn remainder(float x, float y);\n"
36318"float2 __ovld __cnfn remainder(float2 x, float2 y);\n"
36319"float3 __ovld __cnfn remainder(float3 x, float3 y);\n"
36320"float4 __ovld __cnfn remainder(float4 x, float4 y);\n"
36321"float8 __ovld __cnfn remainder(float8 x, float8 y);\n"
36322"float16 __ovld __cnfn remainder(float16 x, float16 y);\n"
36323"#ifdef cl_khr_fp64\n"
36324"double __ovld __cnfn remainder(double x, double y);\n"
36325"double2 __ovld __cnfn remainder(double2 x, double2 y);\n"
36326"double3 __ovld __cnfn remainder(double3 x, double3 y);\n"
36327"double4 __ovld __cnfn remainder(double4 x, double4 y);\n"
36328"double8 __ovld __cnfn remainder(double8 x, double8 y);\n"
36329"double16 __ovld __cnfn remainder(double16 x, double16 y);\n"
36330"#endif //cl_khr_fp64\n"
36331"#ifdef cl_khr_fp16\n"
36332"half __ovld __cnfn remainder(half x, half y);\n"
36333"half2 __ovld __cnfn remainder(half2 x, half2 y);\n"
36334"half3 __ovld __cnfn remainder(half3 x, half3 y);\n"
36335"half4 __ovld __cnfn remainder(half4 x, half4 y);\n"
36336"half8 __ovld __cnfn remainder(half8 x, half8 y);\n"
36337"half16 __ovld __cnfn remainder(half16 x, half16 y);\n"
36338"#endif //cl_khr_fp16\n"
36339"\n"
36340"/**\n"
36341" * The remquo function computes the value r such\n"
36342" * that r = x - n*y, where n is the integer nearest the\n"
36343" * exact value of x/y. If there are two integers closest\n"
36344" * to x/y, n shall be the even one. If r is zero, it is\n"
36345" * given the same sign as x. This is the same value\n"
36346" * that is returned by the remainder function.\n"
36347" * remquo also calculates the lower seven bits of the\n"
36348" * integral quotient x/y, and gives that value the same\n"
36349" * sign as x/y. It stores this signed value in the object\n"
36350" * pointed to by quo.\n"
36351" */\n"
36352"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36353"float __ovld remquo(float x, float y, int *quo);\n"
36354"float2 __ovld remquo(float2 x, float2 y, int2 *quo);\n"
36355"float3 __ovld remquo(float3 x, float3 y, int3 *quo);\n"
36356"float4 __ovld remquo(float4 x, float4 y, int4 *quo);\n"
36357"float8 __ovld remquo(float8 x, float8 y, int8 *quo);\n"
36358"float16 __ovld remquo(float16 x, float16 y, int16 *quo);\n"
36359"#ifdef cl_khr_fp64\n"
36360"double __ovld remquo(double x, double y, int *quo);\n"
36361"double2 __ovld remquo(double2 x, double2 y, int2 *quo);\n"
36362"double3 __ovld remquo(double3 x, double3 y, int3 *quo);\n"
36363"double4 __ovld remquo(double4 x, double4 y, int4 *quo);\n"
36364"double8 __ovld remquo(double8 x, double8 y, int8 *quo);\n"
36365"double16 __ovld remquo(double16 x, double16 y, int16 *quo);\n"
36366"#endif //cl_khr_fp64\n"
36367"#ifdef cl_khr_fp16\n"
36368"half __ovld remquo(half x, half y, int *quo);\n"
36369"half2 __ovld remquo(half2 x, half2 y, int2 *quo);\n"
36370"half3 __ovld remquo(half3 x, half3 y, int3 *quo);\n"
36371"half4 __ovld remquo(half4 x, half4 y, int4 *quo);\n"
36372"half8 __ovld remquo(half8 x, half8 y, int8 *quo);\n"
36373"half16 __ovld remquo(half16 x, half16 y, int16 *quo);\n"
36374"\n"
36375"#endif //cl_khr_fp16\n"
36376"#else\n"
36377"float __ovld remquo(float x, float y, __global int *quo);\n"
36378"float2 __ovld remquo(float2 x, float2 y, __global int2 *quo);\n"
36379"float3 __ovld remquo(float3 x, float3 y, __global int3 *quo);\n"
36380"float4 __ovld remquo(float4 x, float4 y, __global int4 *quo);\n"
36381"float8 __ovld remquo(float8 x, float8 y, __global int8 *quo);\n"
36382"float16 __ovld remquo(float16 x, float16 y, __global int16 *quo);\n"
36383"float __ovld remquo(float x, float y, __local int *quo);\n"
36384"float2 __ovld remquo(float2 x, float2 y, __local int2 *quo);\n"
36385"float3 __ovld remquo(float3 x, float3 y, __local int3 *quo);\n"
36386"float4 __ovld remquo(float4 x, float4 y, __local int4 *quo);\n"
36387"float8 __ovld remquo(float8 x, float8 y, __local int8 *quo);\n"
36388"float16 __ovld remquo(float16 x, float16 y, __local int16 *quo);\n"
36389"float __ovld remquo(float x, float y, __private int *quo);\n"
36390"float2 __ovld remquo(float2 x, float2 y, __private int2 *quo);\n"
36391"float3 __ovld remquo(float3 x, float3 y, __private int3 *quo);\n"
36392"float4 __ovld remquo(float4 x, float4 y, __private int4 *quo);\n"
36393"float8 __ovld remquo(float8 x, float8 y, __private int8 *quo);\n"
36394"float16 __ovld remquo(float16 x, float16 y, __private int16 *quo);\n"
36395"#ifdef cl_khr_fp64\n"
36396"double __ovld remquo(double x, double y, __global int *quo);\n"
36397"double2 __ovld remquo(double2 x, double2 y, __global int2 *quo);\n"
36398"double3 __ovld remquo(double3 x, double3 y, __global int3 *quo);\n"
36399"double4 __ovld remquo(double4 x, double4 y, __global int4 *quo);\n"
36400"double8 __ovld remquo(double8 x, double8 y, __global int8 *quo);\n"
36401"double16 __ovld remquo(double16 x, double16 y, __global int16 *quo);\n"
36402"double __ovld remquo(double x, double y, __local int *quo);\n"
36403"double2 __ovld remquo(double2 x, double2 y, __local int2 *quo);\n"
36404"double3 __ovld remquo(double3 x, double3 y, __local int3 *quo);\n"
36405"double4 __ovld remquo(double4 x, double4 y, __local int4 *quo);\n"
36406"double8 __ovld remquo(double8 x, double8 y, __local int8 *quo);\n"
36407"double16 __ovld remquo(double16 x, double16 y, __local int16 *quo);\n"
36408"double __ovld remquo(double x, double y, __private int *quo);\n"
36409"double2 __ovld remquo(double2 x, double2 y, __private int2 *quo);\n"
36410"double3 __ovld remquo(double3 x, double3 y, __private int3 *quo);\n"
36411"double4 __ovld remquo(double4 x, double4 y, __private int4 *quo);\n"
36412"double8 __ovld remquo(double8 x, double8 y, __private int8 *quo);\n"
36413"double16 __ovld remquo(double16 x, double16 y, __private int16 *quo);\n"
36414"#endif //cl_khr_fp64\n"
36415"#ifdef cl_khr_fp16\n"
36416"half __ovld remquo(half x, half y, __global int *quo);\n"
36417"half2 __ovld remquo(half2 x, half2 y, __global int2 *quo);\n"
36418"half3 __ovld remquo(half3 x, half3 y, __global int3 *quo);\n"
36419"half4 __ovld remquo(half4 x, half4 y, __global int4 *quo);\n"
36420"half8 __ovld remquo(half8 x, half8 y, __global int8 *quo);\n"
36421"half16 __ovld remquo(half16 x, half16 y, __global int16 *quo);\n"
36422"half __ovld remquo(half x, half y, __local int *quo);\n"
36423"half2 __ovld remquo(half2 x, half2 y, __local int2 *quo);\n"
36424"half3 __ovld remquo(half3 x, half3 y, __local int3 *quo);\n"
36425"half4 __ovld remquo(half4 x, half4 y, __local int4 *quo);\n"
36426"half8 __ovld remquo(half8 x, half8 y, __local int8 *quo);\n"
36427"half16 __ovld remquo(half16 x, half16 y, __local int16 *quo);\n"
36428"half __ovld remquo(half x, half y, __private int *quo);\n"
36429"half2 __ovld remquo(half2 x, half2 y, __private int2 *quo);\n"
36430"half3 __ovld remquo(half3 x, half3 y, __private int3 *quo);\n"
36431"half4 __ovld remquo(half4 x, half4 y, __private int4 *quo);\n"
36432"half8 __ovld remquo(half8 x, half8 y, __private int8 *quo);\n"
36433"half16 __ovld remquo(half16 x, half16 y, __private int16 *quo);\n"
36434"#endif //cl_khr_fp16\n"
36435"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36436"/**\n"
36437" * Round to integral value (using round to nearest\n"
36438" * even rounding mode) in floating-point format.\n"
36439" * Refer to section 7.1 for description of rounding\n"
36440" * modes.\n"
36441" */\n"
36442"float __ovld __cnfn rint(float);\n"
36443"float2 __ovld __cnfn rint(float2);\n"
36444"float3 __ovld __cnfn rint(float3);\n"
36445"float4 __ovld __cnfn rint(float4);\n"
36446"float8 __ovld __cnfn rint(float8);\n"
36447"float16 __ovld __cnfn rint(float16);\n"
36448"#ifdef cl_khr_fp64\n"
36449"double __ovld __cnfn rint(double);\n"
36450"double2 __ovld __cnfn rint(double2);\n"
36451"double3 __ovld __cnfn rint(double3);\n"
36452"double4 __ovld __cnfn rint(double4);\n"
36453"double8 __ovld __cnfn rint(double8);\n"
36454"double16 __ovld __cnfn rint(double16);\n"
36455"#endif //cl_khr_fp64\n"
36456"#ifdef cl_khr_fp16\n"
36457"half __ovld __cnfn rint(half);\n"
36458"half2 __ovld __cnfn rint(half2);\n"
36459"half3 __ovld __cnfn rint(half3);\n"
36460"half4 __ovld __cnfn rint(half4);\n"
36461"half8 __ovld __cnfn rint(half8);\n"
36462"half16 __ovld __cnfn rint(half16);\n"
36463"#endif //cl_khr_fp16\n"
36464"\n"
36465"/**\n"
36466" * Compute x to the power 1/y.\n"
36467" */\n"
36468"float __ovld __cnfn rootn(float x, int y);\n"
36469"float2 __ovld __cnfn rootn(float2 x, int2 y);\n"
36470"float3 __ovld __cnfn rootn(float3 x, int3 y);\n"
36471"float4 __ovld __cnfn rootn(float4 x, int4 y);\n"
36472"float8 __ovld __cnfn rootn(float8 x, int8 y);\n"
36473"float16 __ovld __cnfn rootn(float16 x, int16 y);\n"
36474"#ifdef cl_khr_fp64\n"
36475"double __ovld __cnfn rootn(double x, int y);\n"
36476"double2 __ovld __cnfn rootn(double2 x, int2 y);\n"
36477"double3 __ovld __cnfn rootn(double3 x, int3 y);\n"
36478"double4 __ovld __cnfn rootn(double4 x, int4 y);\n"
36479"double8 __ovld __cnfn rootn(double8 x, int8 y);\n"
36480"double16 __ovld __cnfn rootn(double16 x, int16 y);\n"
36481"#endif //cl_khr_fp64\n"
36482"#ifdef cl_khr_fp16\n"
36483"half __ovld __cnfn rootn(half x, int y);\n"
36484"half2 __ovld __cnfn rootn(half2 x, int2 y);\n"
36485"half3 __ovld __cnfn rootn(half3 x, int3 y);\n"
36486"half4 __ovld __cnfn rootn(half4 x, int4 y);\n"
36487"half8 __ovld __cnfn rootn(half8 x, int8 y);\n"
36488"half16 __ovld __cnfn rootn(half16 x, int16 y);\n"
36489"#endif //cl_khr_fp16\n"
36490"\n"
36491"/**\n"
36492" * Return the integral value nearest to x rounding\n"
36493" * halfway cases away from zero, regardless of the\n"
36494" * current rounding direction.\n"
36495" */\n"
36496"float __ovld __cnfn round(float x);\n"
36497"float2 __ovld __cnfn round(float2 x);\n"
36498"float3 __ovld __cnfn round(float3 x);\n"
36499"float4 __ovld __cnfn round(float4 x);\n"
36500"float8 __ovld __cnfn round(float8 x);\n"
36501"float16 __ovld __cnfn round(float16 x);\n"
36502"#ifdef cl_khr_fp64\n"
36503"double __ovld __cnfn round(double x);\n"
36504"double2 __ovld __cnfn round(double2 x);\n"
36505"double3 __ovld __cnfn round(double3 x);\n"
36506"double4 __ovld __cnfn round(double4 x);\n"
36507"double8 __ovld __cnfn round(double8 x);\n"
36508"double16 __ovld __cnfn round(double16 x);\n"
36509"#endif //cl_khr_fp64\n"
36510"#ifdef cl_khr_fp16\n"
36511"half __ovld __cnfn round(half x);\n"
36512"half2 __ovld __cnfn round(half2 x);\n"
36513"half3 __ovld __cnfn round(half3 x);\n"
36514"half4 __ovld __cnfn round(half4 x);\n"
36515"half8 __ovld __cnfn round(half8 x);\n"
36516"half16 __ovld __cnfn round(half16 x);\n"
36517"#endif //cl_khr_fp16\n"
36518"\n"
36519"/**\n"
36520" * Compute inverse square root.\n"
36521" */\n"
36522"float __ovld __cnfn rsqrt(float);\n"
36523"float2 __ovld __cnfn rsqrt(float2);\n"
36524"float3 __ovld __cnfn rsqrt(float3);\n"
36525"float4 __ovld __cnfn rsqrt(float4);\n"
36526"float8 __ovld __cnfn rsqrt(float8);\n"
36527"float16 __ovld __cnfn rsqrt(float16);\n"
36528"#ifdef cl_khr_fp64\n"
36529"double __ovld __cnfn rsqrt(double);\n"
36530"double2 __ovld __cnfn rsqrt(double2);\n"
36531"double3 __ovld __cnfn rsqrt(double3);\n"
36532"double4 __ovld __cnfn rsqrt(double4);\n"
36533"double8 __ovld __cnfn rsqrt(double8);\n"
36534"double16 __ovld __cnfn rsqrt(double16);\n"
36535"#endif //cl_khr_fp64\n"
36536"#ifdef cl_khr_fp16\n"
36537"half __ovld __cnfn rsqrt(half);\n"
36538"half2 __ovld __cnfn rsqrt(half2);\n"
36539"half3 __ovld __cnfn rsqrt(half3);\n"
36540"half4 __ovld __cnfn rsqrt(half4);\n"
36541"half8 __ovld __cnfn rsqrt(half8);\n"
36542"half16 __ovld __cnfn rsqrt(half16);\n"
36543"#endif //cl_khr_fp16\n"
36544"\n"
36545"/**\n"
36546" * Compute sine.\n"
36547" */\n"
36548"float __ovld __cnfn sin(float);\n"
36549"float2 __ovld __cnfn sin(float2);\n"
36550"float3 __ovld __cnfn sin(float3);\n"
36551"float4 __ovld __cnfn sin(float4);\n"
36552"float8 __ovld __cnfn sin(float8);\n"
36553"float16 __ovld __cnfn sin(float16);\n"
36554"#ifdef cl_khr_fp64\n"
36555"double __ovld __cnfn sin(double);\n"
36556"double2 __ovld __cnfn sin(double2);\n"
36557"double3 __ovld __cnfn sin(double3);\n"
36558"double4 __ovld __cnfn sin(double4);\n"
36559"double8 __ovld __cnfn sin(double8);\n"
36560"double16 __ovld __cnfn sin(double16);\n"
36561"#endif //cl_khr_fp64\n"
36562"#ifdef cl_khr_fp16\n"
36563"half __ovld __cnfn sin(half);\n"
36564"half2 __ovld __cnfn sin(half2);\n"
36565"half3 __ovld __cnfn sin(half3);\n"
36566"half4 __ovld __cnfn sin(half4);\n"
36567"half8 __ovld __cnfn sin(half8);\n"
36568"half16 __ovld __cnfn sin(half16);\n"
36569"#endif //cl_khr_fp16\n"
36570"\n"
36571"/**\n"
36572" * Compute sine and cosine of x. The computed sine\n"
36573" * is the return value and computed cosine is returned\n"
36574" * in cosval.\n"
36575" */\n"
36576"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36577"float __ovld sincos(float x, float *cosval);\n"
36578"float2 __ovld sincos(float2 x, float2 *cosval);\n"
36579"float3 __ovld sincos(float3 x, float3 *cosval);\n"
36580"float4 __ovld sincos(float4 x, float4 *cosval);\n"
36581"float8 __ovld sincos(float8 x, float8 *cosval);\n"
36582"float16 __ovld sincos(float16 x, float16 *cosval);\n"
36583"#ifdef cl_khr_fp64\n"
36584"double __ovld sincos(double x, double *cosval);\n"
36585"double2 __ovld sincos(double2 x, double2 *cosval);\n"
36586"double3 __ovld sincos(double3 x, double3 *cosval);\n"
36587"double4 __ovld sincos(double4 x, double4 *cosval);\n"
36588"double8 __ovld sincos(double8 x, double8 *cosval);\n"
36589"double16 __ovld sincos(double16 x, double16 *cosval);\n"
36590"#endif //cl_khr_fp64\n"
36591"#ifdef cl_khr_fp16\n"
36592"half __ovld sincos(half x, half *cosval);\n"
36593"half2 __ovld sincos(half2 x, half2 *cosval);\n"
36594"half3 __ovld sincos(half3 x, half3 *cosval);\n"
36595"half4 __ovld sincos(half4 x, half4 *cosval);\n"
36596"half8 __ovld sincos(half8 x, half8 *cosval);\n"
36597"half16 __ovld sincos(half16 x, half16 *cosval);\n"
36598"#endif //cl_khr_fp16\n"
36599"#else\n"
36600"float __ovld sincos(float x, __global float *cosval);\n"
36601"float2 __ovld sincos(float2 x, __global float2 *cosval);\n"
36602"float3 __ovld sincos(float3 x, __global float3 *cosval);\n"
36603"float4 __ovld sincos(float4 x, __global float4 *cosval);\n"
36604"float8 __ovld sincos(float8 x, __global float8 *cosval);\n"
36605"float16 __ovld sincos(float16 x, __global float16 *cosval);\n"
36606"float __ovld sincos(float x, __local float *cosval);\n"
36607"float2 __ovld sincos(float2 x, __local float2 *cosval);\n"
36608"float3 __ovld sincos(float3 x, __local float3 *cosval);\n"
36609"float4 __ovld sincos(float4 x, __local float4 *cosval);\n"
36610"float8 __ovld sincos(float8 x, __local float8 *cosval);\n"
36611"float16 __ovld sincos(float16 x, __local float16 *cosval);\n"
36612"float __ovld sincos(float x, __private float *cosval);\n"
36613"float2 __ovld sincos(float2 x, __private float2 *cosval);\n"
36614"float3 __ovld sincos(float3 x, __private float3 *cosval);\n"
36615"float4 __ovld sincos(float4 x, __private float4 *cosval);\n"
36616"float8 __ovld sincos(float8 x, __private float8 *cosval);\n"
36617"float16 __ovld sincos(float16 x, __private float16 *cosval);\n"
36618"#ifdef cl_khr_fp64\n"
36619"double __ovld sincos(double x, __global double *cosval);\n"
36620"double2 __ovld sincos(double2 x, __global double2 *cosval);\n"
36621"double3 __ovld sincos(double3 x, __global double3 *cosval);\n"
36622"double4 __ovld sincos(double4 x, __global double4 *cosval);\n"
36623"double8 __ovld sincos(double8 x, __global double8 *cosval);\n"
36624"double16 __ovld sincos(double16 x, __global double16 *cosval);\n"
36625"double __ovld sincos(double x, __local double *cosval);\n"
36626"double2 __ovld sincos(double2 x, __local double2 *cosval);\n"
36627"double3 __ovld sincos(double3 x, __local double3 *cosval);\n"
36628"double4 __ovld sincos(double4 x, __local double4 *cosval);\n"
36629"double8 __ovld sincos(double8 x, __local double8 *cosval);\n"
36630"double16 __ovld sincos(double16 x, __local double16 *cosval);\n"
36631"double __ovld sincos(double x, __private double *cosval);\n"
36632"double2 __ovld sincos(double2 x, __private double2 *cosval);\n"
36633"double3 __ovld sincos(double3 x, __private double3 *cosval);\n"
36634"double4 __ovld sincos(double4 x, __private double4 *cosval);\n"
36635"double8 __ovld sincos(double8 x, __private double8 *cosval);\n"
36636"double16 __ovld sincos(double16 x, __private double16 *cosval);\n"
36637"#endif //cl_khr_fp64\n"
36638"#ifdef cl_khr_fp16\n"
36639"half __ovld sincos(half x, __global half *cosval);\n"
36640"half2 __ovld sincos(half2 x, __global half2 *cosval);\n"
36641"half3 __ovld sincos(half3 x, __global half3 *cosval);\n"
36642"half4 __ovld sincos(half4 x, __global half4 *cosval);\n"
36643"half8 __ovld sincos(half8 x, __global half8 *cosval);\n"
36644"half16 __ovld sincos(half16 x, __global half16 *cosval);\n"
36645"half __ovld sincos(half x, __local half *cosval);\n"
36646"half2 __ovld sincos(half2 x, __local half2 *cosval);\n"
36647"half3 __ovld sincos(half3 x, __local half3 *cosval);\n"
36648"half4 __ovld sincos(half4 x, __local half4 *cosval);\n"
36649"half8 __ovld sincos(half8 x, __local half8 *cosval);\n"
36650"half16 __ovld sincos(half16 x, __local half16 *cosval);\n"
36651"half __ovld sincos(half x, __private half *cosval);\n"
36652"half2 __ovld sincos(half2 x, __private half2 *cosval);\n"
36653"half3 __ovld sincos(half3 x, __private half3 *cosval);\n"
36654"half4 __ovld sincos(half4 x, __private half4 *cosval);\n"
36655"half8 __ovld sincos(half8 x, __private half8 *cosval);\n"
36656"half16 __ovld sincos(half16 x, __private half16 *cosval);\n"
36657"#endif //cl_khr_fp16\n"
36658"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36659"\n"
36660"/**\n"
36661" * Compute hyperbolic sine.\n"
36662" */\n"
36663"float __ovld __cnfn sinh(float);\n"
36664"float2 __ovld __cnfn sinh(float2);\n"
36665"float3 __ovld __cnfn sinh(float3);\n"
36666"float4 __ovld __cnfn sinh(float4);\n"
36667"float8 __ovld __cnfn sinh(float8);\n"
36668"float16 __ovld __cnfn sinh(float16);\n"
36669"#ifdef cl_khr_fp64\n"
36670"double __ovld __cnfn sinh(double);\n"
36671"double2 __ovld __cnfn sinh(double2);\n"
36672"double3 __ovld __cnfn sinh(double3);\n"
36673"double4 __ovld __cnfn sinh(double4);\n"
36674"double8 __ovld __cnfn sinh(double8);\n"
36675"double16 __ovld __cnfn sinh(double16);\n"
36676"#endif //cl_khr_fp64\n"
36677"#ifdef cl_khr_fp16\n"
36678"half __ovld __cnfn sinh(half);\n"
36679"half2 __ovld __cnfn sinh(half2);\n"
36680"half3 __ovld __cnfn sinh(half3);\n"
36681"half4 __ovld __cnfn sinh(half4);\n"
36682"half8 __ovld __cnfn sinh(half8);\n"
36683"half16 __ovld __cnfn sinh(half16);\n"
36684"#endif //cl_khr_fp16\n"
36685"\n"
36686"/**\n"
36687" * Compute sin (PI * x).\n"
36688" */\n"
36689"float __ovld __cnfn sinpi(float x);\n"
36690"float2 __ovld __cnfn sinpi(float2 x);\n"
36691"float3 __ovld __cnfn sinpi(float3 x);\n"
36692"float4 __ovld __cnfn sinpi(float4 x);\n"
36693"float8 __ovld __cnfn sinpi(float8 x);\n"
36694"float16 __ovld __cnfn sinpi(float16 x);\n"
36695"#ifdef cl_khr_fp64\n"
36696"double __ovld __cnfn sinpi(double x);\n"
36697"double2 __ovld __cnfn sinpi(double2 x);\n"
36698"double3 __ovld __cnfn sinpi(double3 x);\n"
36699"double4 __ovld __cnfn sinpi(double4 x);\n"
36700"double8 __ovld __cnfn sinpi(double8 x);\n"
36701"double16 __ovld __cnfn sinpi(double16 x);\n"
36702"#endif //cl_khr_fp64\n"
36703"#ifdef cl_khr_fp16\n"
36704"half __ovld __cnfn sinpi(half x);\n"
36705"half2 __ovld __cnfn sinpi(half2 x);\n"
36706"half3 __ovld __cnfn sinpi(half3 x);\n"
36707"half4 __ovld __cnfn sinpi(half4 x);\n"
36708"half8 __ovld __cnfn sinpi(half8 x);\n"
36709"half16 __ovld __cnfn sinpi(half16 x);\n"
36710"#endif //cl_khr_fp16\n"
36711"\n"
36712"/**\n"
36713" * Compute square root.\n"
36714" */\n"
36715"float __ovld __cnfn sqrt(float);\n"
36716"float2 __ovld __cnfn sqrt(float2);\n"
36717"float3 __ovld __cnfn sqrt(float3);\n"
36718"float4 __ovld __cnfn sqrt(float4);\n"
36719"float8 __ovld __cnfn sqrt(float8);\n"
36720"float16 __ovld __cnfn sqrt(float16);\n"
36721"#ifdef cl_khr_fp64\n"
36722"double __ovld __cnfn sqrt(double);\n"
36723"double2 __ovld __cnfn sqrt(double2);\n"
36724"double3 __ovld __cnfn sqrt(double3);\n"
36725"double4 __ovld __cnfn sqrt(double4);\n"
36726"double8 __ovld __cnfn sqrt(double8);\n"
36727"double16 __ovld __cnfn sqrt(double16);\n"
36728"#endif //cl_khr_fp64\n"
36729"#ifdef cl_khr_fp16\n"
36730"half __ovld __cnfn sqrt(half);\n"
36731"half2 __ovld __cnfn sqrt(half2);\n"
36732"half3 __ovld __cnfn sqrt(half3);\n"
36733"half4 __ovld __cnfn sqrt(half4);\n"
36734"half8 __ovld __cnfn sqrt(half8);\n"
36735"half16 __ovld __cnfn sqrt(half16);\n"
36736"#endif //cl_khr_fp16\n"
36737"\n"
36738"/**\n"
36739" * Compute tangent.\n"
36740" */\n"
36741"float __ovld __cnfn tan(float);\n"
36742"float2 __ovld __cnfn tan(float2);\n"
36743"float3 __ovld __cnfn tan(float3);\n"
36744"float4 __ovld __cnfn tan(float4);\n"
36745"float8 __ovld __cnfn tan(float8);\n"
36746"float16 __ovld __cnfn tan(float16);\n"
36747"#ifdef cl_khr_fp64\n"
36748"double __ovld __cnfn tan(double);\n"
36749"double2 __ovld __cnfn tan(double2);\n"
36750"double3 __ovld __cnfn tan(double3);\n"
36751"double4 __ovld __cnfn tan(double4);\n"
36752"double8 __ovld __cnfn tan(double8);\n"
36753"double16 __ovld __cnfn tan(double16);\n"
36754"#endif //cl_khr_fp64\n"
36755"#ifdef cl_khr_fp16\n"
36756"half __ovld __cnfn tan(half);\n"
36757"half2 __ovld __cnfn tan(half2);\n"
36758"half3 __ovld __cnfn tan(half3);\n"
36759"half4 __ovld __cnfn tan(half4);\n"
36760"half8 __ovld __cnfn tan(half8);\n"
36761"half16 __ovld __cnfn tan(half16);\n"
36762"#endif //cl_khr_fp16\n"
36763"\n"
36764"/**\n"
36765" * Compute hyperbolic tangent.\n"
36766" */\n"
36767"float __ovld __cnfn tanh(float);\n"
36768"float2 __ovld __cnfn tanh(float2);\n"
36769"float3 __ovld __cnfn tanh(float3);\n"
36770"float4 __ovld __cnfn tanh(float4);\n"
36771"float8 __ovld __cnfn tanh(float8);\n"
36772"float16 __ovld __cnfn tanh(float16);\n"
36773"#ifdef cl_khr_fp64\n"
36774"double __ovld __cnfn tanh(double);\n"
36775"double2 __ovld __cnfn tanh(double2);\n"
36776"double3 __ovld __cnfn tanh(double3);\n"
36777"double4 __ovld __cnfn tanh(double4);\n"
36778"double8 __ovld __cnfn tanh(double8);\n"
36779"double16 __ovld __cnfn tanh(double16);\n"
36780"#endif //cl_khr_fp64\n"
36781"#ifdef cl_khr_fp16\n"
36782"half __ovld __cnfn tanh(half);\n"
36783"half2 __ovld __cnfn tanh(half2);\n"
36784"half3 __ovld __cnfn tanh(half3);\n"
36785"half4 __ovld __cnfn tanh(half4);\n"
36786"half8 __ovld __cnfn tanh(half8);\n"
36787"half16 __ovld __cnfn tanh(half16);\n"
36788"#endif //cl_khr_fp16\n"
36789"\n"
36790"/**\n"
36791" * Compute tan (PI * x).\n"
36792" */\n"
36793"float __ovld __cnfn tanpi(float x);\n"
36794"float2 __ovld __cnfn tanpi(float2 x);\n"
36795"float3 __ovld __cnfn tanpi(float3 x);\n"
36796"float4 __ovld __cnfn tanpi(float4 x);\n"
36797"float8 __ovld __cnfn tanpi(float8 x);\n"
36798"float16 __ovld __cnfn tanpi(float16 x);\n"
36799"#ifdef cl_khr_fp64\n"
36800"double __ovld __cnfn tanpi(double x);\n"
36801"double2 __ovld __cnfn tanpi(double2 x);\n"
36802"double3 __ovld __cnfn tanpi(double3 x);\n"
36803"double4 __ovld __cnfn tanpi(double4 x);\n"
36804"double8 __ovld __cnfn tanpi(double8 x);\n"
36805"double16 __ovld __cnfn tanpi(double16 x);\n"
36806"#endif //cl_khr_fp64\n"
36807"#ifdef cl_khr_fp16\n"
36808"half __ovld __cnfn tanpi(half x);\n"
36809"half2 __ovld __cnfn tanpi(half2 x);\n"
36810"half3 __ovld __cnfn tanpi(half3 x);\n"
36811"half4 __ovld __cnfn tanpi(half4 x);\n"
36812"half8 __ovld __cnfn tanpi(half8 x);\n"
36813"half16 __ovld __cnfn tanpi(half16 x);\n"
36814"#endif //cl_khr_fp16\n"
36815"\n"
36816"/**\n"
36817" * Compute the gamma function.\n"
36818" */\n"
36819"float __ovld __cnfn tgamma(float);\n"
36820"float2 __ovld __cnfn tgamma(float2);\n"
36821"float3 __ovld __cnfn tgamma(float3);\n"
36822"float4 __ovld __cnfn tgamma(float4);\n"
36823"float8 __ovld __cnfn tgamma(float8);\n"
36824"float16 __ovld __cnfn tgamma(float16);\n"
36825"#ifdef cl_khr_fp64\n"
36826"double __ovld __cnfn tgamma(double);\n"
36827"double2 __ovld __cnfn tgamma(double2);\n"
36828"double3 __ovld __cnfn tgamma(double3);\n"
36829"double4 __ovld __cnfn tgamma(double4);\n"
36830"double8 __ovld __cnfn tgamma(double8);\n"
36831"double16 __ovld __cnfn tgamma(double16);\n"
36832"#endif //cl_khr_fp64\n"
36833"#ifdef cl_khr_fp16\n"
36834"half __ovld __cnfn tgamma(half);\n"
36835"half2 __ovld __cnfn tgamma(half2);\n"
36836"half3 __ovld __cnfn tgamma(half3);\n"
36837"half4 __ovld __cnfn tgamma(half4);\n"
36838"half8 __ovld __cnfn tgamma(half8);\n"
36839"half16 __ovld __cnfn tgamma(half16);\n"
36840"#endif //cl_khr_fp16\n"
36841"\n"
36842"/**\n"
36843" * Round to integral value using the round to zero\n"
36844" * rounding mode.\n"
36845" */\n"
36846"float __ovld __cnfn trunc(float);\n"
36847"float2 __ovld __cnfn trunc(float2);\n"
36848"float3 __ovld __cnfn trunc(float3);\n"
36849"float4 __ovld __cnfn trunc(float4);\n"
36850"float8 __ovld __cnfn trunc(float8);\n"
36851"float16 __ovld __cnfn trunc(float16);\n"
36852"#ifdef cl_khr_fp64\n"
36853"double __ovld __cnfn trunc(double);\n"
36854"double2 __ovld __cnfn trunc(double2);\n"
36855"double3 __ovld __cnfn trunc(double3);\n"
36856"double4 __ovld __cnfn trunc(double4);\n"
36857"double8 __ovld __cnfn trunc(double8);\n"
36858"double16 __ovld __cnfn trunc(double16);\n"
36859"#endif //cl_khr_fp64\n"
36860"#ifdef cl_khr_fp16\n"
36861"half __ovld __cnfn trunc(half);\n"
36862"half2 __ovld __cnfn trunc(half2);\n"
36863"half3 __ovld __cnfn trunc(half3);\n"
36864"half4 __ovld __cnfn trunc(half4);\n"
36865"half8 __ovld __cnfn trunc(half8);\n"
36866"half16 __ovld __cnfn trunc(half16);\n"
36867"#endif //cl_khr_fp16\n"
36868"\n"
36869"/**\n"
36870" * Compute cosine. x must be in the range -2^16 ... +2^16.\n"
36871" */\n"
36872"float __ovld __cnfn half_cos(float x);\n"
36873"float2 __ovld __cnfn half_cos(float2 x);\n"
36874"float3 __ovld __cnfn half_cos(float3 x);\n"
36875"float4 __ovld __cnfn half_cos(float4 x);\n"
36876"float8 __ovld __cnfn half_cos(float8 x);\n"
36877"float16 __ovld __cnfn half_cos(float16 x);\n"
36878"\n"
36879"/**\n"
36880" * Compute x / y.\n"
36881" */\n"
36882"float __ovld __cnfn half_divide(float x, float y);\n"
36883"float2 __ovld __cnfn half_divide(float2 x, float2 y);\n"
36884"float3 __ovld __cnfn half_divide(float3 x, float3 y);\n"
36885"float4 __ovld __cnfn half_divide(float4 x, float4 y);\n"
36886"float8 __ovld __cnfn half_divide(float8 x, float8 y);\n"
36887"float16 __ovld __cnfn half_divide(float16 x, float16 y);\n"
36888"\n"
36889"/**\n"
36890" * Compute the base- e exponential of x.\n"
36891" */\n"
36892"float __ovld __cnfn half_exp(float x);\n"
36893"float2 __ovld __cnfn half_exp(float2 x);\n"
36894"float3 __ovld __cnfn half_exp(float3 x);\n"
36895"float4 __ovld __cnfn half_exp(float4 x);\n"
36896"float8 __ovld __cnfn half_exp(float8 x);\n"
36897"float16 __ovld __cnfn half_exp(float16 x);\n"
36898"\n"
36899"/**\n"
36900" * Compute the base- 2 exponential of x.\n"
36901" */\n"
36902"float __ovld __cnfn half_exp2(float x);\n"
36903"float2 __ovld __cnfn half_exp2(float2 x);\n"
36904"float3 __ovld __cnfn half_exp2(float3 x);\n"
36905"float4 __ovld __cnfn half_exp2(float4 x);\n"
36906"float8 __ovld __cnfn half_exp2(float8 x);\n"
36907"float16 __ovld __cnfn half_exp2(float16 x);\n"
36908"\n"
36909"/**\n"
36910" * Compute the base- 10 exponential of x.\n"
36911" */\n"
36912"float __ovld __cnfn half_exp10(float x);\n"
36913"float2 __ovld __cnfn half_exp10(float2 x);\n"
36914"float3 __ovld __cnfn half_exp10(float3 x);\n"
36915"float4 __ovld __cnfn half_exp10(float4 x);\n"
36916"float8 __ovld __cnfn half_exp10(float8 x);\n"
36917"float16 __ovld __cnfn half_exp10(float16 x);\n"
36918"\n"
36919"/**\n"
36920" * Compute natural logarithm.\n"
36921" */\n"
36922"float __ovld __cnfn half_log(float x);\n"
36923"float2 __ovld __cnfn half_log(float2 x);\n"
36924"float3 __ovld __cnfn half_log(float3 x);\n"
36925"float4 __ovld __cnfn half_log(float4 x);\n"
36926"float8 __ovld __cnfn half_log(float8 x);\n"
36927"float16 __ovld __cnfn half_log(float16 x);\n"
36928"\n"
36929"/**\n"
36930" * Compute a base 2 logarithm.\n"
36931" */\n"
36932"float __ovld __cnfn half_log2(float x);\n"
36933"float2 __ovld __cnfn half_log2(float2 x);\n"
36934"float3 __ovld __cnfn half_log2(float3 x);\n"
36935"float4 __ovld __cnfn half_log2(float4 x);\n"
36936"float8 __ovld __cnfn half_log2(float8 x);\n"
36937"float16 __ovld __cnfn half_log2(float16 x);\n"
36938"\n"
36939"/**\n"
36940" * Compute a base 10 logarithm.\n"
36941" */\n"
36942"float __ovld __cnfn half_log10(float x);\n"
36943"float2 __ovld __cnfn half_log10(float2 x);\n"
36944"float3 __ovld __cnfn half_log10(float3 x);\n"
36945"float4 __ovld __cnfn half_log10(float4 x);\n"
36946"float8 __ovld __cnfn half_log10(float8 x);\n"
36947"float16 __ovld __cnfn half_log10(float16 x);\n"
36948"\n"
36949"/**\n"
36950" * Compute x to the power y, where x is >= 0.\n"
36951" */\n"
36952"float __ovld __cnfn half_powr(float x, float y);\n"
36953"float2 __ovld __cnfn half_powr(float2 x, float2 y);\n"
36954"float3 __ovld __cnfn half_powr(float3 x, float3 y);\n"
36955"float4 __ovld __cnfn half_powr(float4 x, float4 y);\n"
36956"float8 __ovld __cnfn half_powr(float8 x, float8 y);\n"
36957"float16 __ovld __cnfn half_powr(float16 x, float16 y);\n"
36958"\n"
36959"/**\n"
36960" * Compute reciprocal.\n"
36961" */\n"
36962"float __ovld __cnfn half_recip(float x);\n"
36963"float2 __ovld __cnfn half_recip(float2 x);\n"
36964"float3 __ovld __cnfn half_recip(float3 x);\n"
36965"float4 __ovld __cnfn half_recip(float4 x);\n"
36966"float8 __ovld __cnfn half_recip(float8 x);\n"
36967"float16 __ovld __cnfn half_recip(float16 x);\n"
36968"\n"
36969"/**\n"
36970" * Compute inverse square root.\n"
36971" */\n"
36972"float __ovld __cnfn half_rsqrt(float x);\n"
36973"float2 __ovld __cnfn half_rsqrt(float2 x);\n"
36974"float3 __ovld __cnfn half_rsqrt(float3 x);\n"
36975"float4 __ovld __cnfn half_rsqrt(float4 x);\n"
36976"float8 __ovld __cnfn half_rsqrt(float8 x);\n"
36977"float16 __ovld __cnfn half_rsqrt(float16 x);\n"
36978"\n"
36979"/**\n"
36980" * Compute sine. x must be in the range -2^16 ... +2^16.\n"
36981" */\n"
36982"float __ovld __cnfn half_sin(float x);\n"
36983"float2 __ovld __cnfn half_sin(float2 x);\n"
36984"float3 __ovld __cnfn half_sin(float3 x);\n"
36985"float4 __ovld __cnfn half_sin(float4 x);\n"
36986"float8 __ovld __cnfn half_sin(float8 x);\n"
36987"float16 __ovld __cnfn half_sin(float16 x);\n"
36988"\n"
36989"/**\n"
36990" * Compute square root.\n"
36991" */\n"
36992"float __ovld __cnfn half_sqrt(float x);\n"
36993"float2 __ovld __cnfn half_sqrt(float2 x);\n"
36994"float3 __ovld __cnfn half_sqrt(float3 x);\n"
36995"float4 __ovld __cnfn half_sqrt(float4 x);\n"
36996"float8 __ovld __cnfn half_sqrt(float8 x);\n"
36997"float16 __ovld __cnfn half_sqrt(float16 x);\n"
36998"\n"
36999"/**\n"
37000" * Compute tangent. x must be in the range -216 ... +216.\n"
37001" */\n"
37002"float __ovld __cnfn half_tan(float x);\n"
37003"float2 __ovld __cnfn half_tan(float2 x);\n"
37004"float3 __ovld __cnfn half_tan(float3 x);\n"
37005"float4 __ovld __cnfn half_tan(float4 x);\n"
37006"float8 __ovld __cnfn half_tan(float8 x);\n"
37007"float16 __ovld __cnfn half_tan(float16 x);\n"
37008"\n"
37009"/**\n"
37010" * Compute cosine over an implementation-defined range.\n"
37011" * The maximum error is implementation-defined.\n"
37012" */\n"
37013"float __ovld __cnfn native_cos(float x);\n"
37014"float2 __ovld __cnfn native_cos(float2 x);\n"
37015"float3 __ovld __cnfn native_cos(float3 x);\n"
37016"float4 __ovld __cnfn native_cos(float4 x);\n"
37017"float8 __ovld __cnfn native_cos(float8 x);\n"
37018"float16 __ovld __cnfn native_cos(float16 x);\n"
37019"\n"
37020"/**\n"
37021" * Compute x / y over an implementation-defined range.\n"
37022" * The maximum error is implementation-defined.\n"
37023" */\n"
37024"float __ovld __cnfn native_divide(float x, float y);\n"
37025"float2 __ovld __cnfn native_divide(float2 x, float2 y);\n"
37026"float3 __ovld __cnfn native_divide(float3 x, float3 y);\n"
37027"float4 __ovld __cnfn native_divide(float4 x, float4 y);\n"
37028"float8 __ovld __cnfn native_divide(float8 x, float8 y);\n"
37029"float16 __ovld __cnfn native_divide(float16 x, float16 y);\n"
37030"\n"
37031"/**\n"
37032" * Compute the base- e exponential of x over an\n"
37033" * implementation-defined range. The maximum error is\n"
37034" * implementation-defined.\n"
37035" */\n"
37036"float __ovld __cnfn native_exp(float x);\n"
37037"float2 __ovld __cnfn native_exp(float2 x);\n"
37038"float3 __ovld __cnfn native_exp(float3 x);\n"
37039"float4 __ovld __cnfn native_exp(float4 x);\n"
37040"float8 __ovld __cnfn native_exp(float8 x);\n"
37041"float16 __ovld __cnfn native_exp(float16 x);\n"
37042"\n"
37043"/**\n"
37044" * Compute the base- 2 exponential of x over an\n"
37045" * implementation-defined range. The maximum error is\n"
37046" * implementation-defined.\n"
37047" */\n"
37048"float __ovld __cnfn native_exp2(float x);\n"
37049"float2 __ovld __cnfn native_exp2(float2 x);\n"
37050"float3 __ovld __cnfn native_exp2(float3 x);\n"
37051"float4 __ovld __cnfn native_exp2(float4 x);\n"
37052"float8 __ovld __cnfn native_exp2(float8 x);\n"
37053"float16 __ovld __cnfn native_exp2(float16 x);\n"
37054"\n"
37055"/**\n"
37056" * Compute the base- 10 exponential of x over an\n"
37057" * implementation-defined range. The maximum error is\n"
37058" * implementation-defined.\n"
37059" */\n"
37060"float __ovld __cnfn native_exp10(float x);\n"
37061"float2 __ovld __cnfn native_exp10(float2 x);\n"
37062"float3 __ovld __cnfn native_exp10(float3 x);\n"
37063"float4 __ovld __cnfn native_exp10(float4 x);\n"
37064"float8 __ovld __cnfn native_exp10(float8 x);\n"
37065"float16 __ovld __cnfn native_exp10(float16 x);\n"
37066"\n"
37067"/**\n"
37068" * Compute natural logarithm over an implementationdefined\n"
37069" * range. The maximum error is implementation\n"
37070" * defined.\n"
37071" */\n"
37072"float __ovld __cnfn native_log(float x);\n"
37073"float2 __ovld __cnfn native_log(float2 x);\n"
37074"float3 __ovld __cnfn native_log(float3 x);\n"
37075"float4 __ovld __cnfn native_log(float4 x);\n"
37076"float8 __ovld __cnfn native_log(float8 x);\n"
37077"float16 __ovld __cnfn native_log(float16 x);\n"
37078"\n"
37079"/**\n"
37080" * Compute a base 2 logarithm over an implementationdefined\n"
37081" * range. The maximum error is implementationdefined.\n"
37082" */\n"
37083"float __ovld __cnfn native_log2(float x);\n"
37084"float2 __ovld __cnfn native_log2(float2 x);\n"
37085"float3 __ovld __cnfn native_log2(float3 x);\n"
37086"float4 __ovld __cnfn native_log2(float4 x);\n"
37087"float8 __ovld __cnfn native_log2(float8 x);\n"
37088"float16 __ovld __cnfn native_log2(float16 x);\n"
37089"\n"
37090"/**\n"
37091" * Compute a base 10 logarithm over an implementationdefined\n"
37092" * range. The maximum error is implementationdefined.\n"
37093" */\n"
37094"float __ovld __cnfn native_log10(float x);\n"
37095"float2 __ovld __cnfn native_log10(float2 x);\n"
37096"float3 __ovld __cnfn native_log10(float3 x);\n"
37097"float4 __ovld __cnfn native_log10(float4 x);\n"
37098"float8 __ovld __cnfn native_log10(float8 x);\n"
37099"float16 __ovld __cnfn native_log10(float16 x);\n"
37100"\n"
37101"/**\n"
37102" * Compute x to the power y, where x is >= 0. The range of\n"
37103" * x and y are implementation-defined. The maximum error\n"
37104" * is implementation-defined.\n"
37105" */\n"
37106"float __ovld __cnfn native_powr(float x, float y);\n"
37107"float2 __ovld __cnfn native_powr(float2 x, float2 y);\n"
37108"float3 __ovld __cnfn native_powr(float3 x, float3 y);\n"
37109"float4 __ovld __cnfn native_powr(float4 x, float4 y);\n"
37110"float8 __ovld __cnfn native_powr(float8 x, float8 y);\n"
37111"float16 __ovld __cnfn native_powr(float16 x, float16 y);\n"
37112"\n"
37113"/**\n"
37114" * Compute reciprocal over an implementation-defined\n"
37115" * range. The maximum error is implementation-defined.\n"
37116" */\n"
37117"float __ovld __cnfn native_recip(float x);\n"
37118"float2 __ovld __cnfn native_recip(float2 x);\n"
37119"float3 __ovld __cnfn native_recip(float3 x);\n"
37120"float4 __ovld __cnfn native_recip(float4 x);\n"
37121"float8 __ovld __cnfn native_recip(float8 x);\n"
37122"float16 __ovld __cnfn native_recip(float16 x);\n"
37123"\n"
37124"/**\n"
37125" * Compute inverse square root over an implementationdefined\n"
37126" * range. The maximum error is implementationdefined.\n"
37127" */\n"
37128"float __ovld __cnfn native_rsqrt(float x);\n"
37129"float2 __ovld __cnfn native_rsqrt(float2 x);\n"
37130"float3 __ovld __cnfn native_rsqrt(float3 x);\n"
37131"float4 __ovld __cnfn native_rsqrt(float4 x);\n"
37132"float8 __ovld __cnfn native_rsqrt(float8 x);\n"
37133"float16 __ovld __cnfn native_rsqrt(float16 x);\n"
37134"\n"
37135"/**\n"
37136" * Compute sine over an implementation-defined range.\n"
37137" * The maximum error is implementation-defined.\n"
37138" */\n"
37139"float __ovld __cnfn native_sin(float x);\n"
37140"float2 __ovld __cnfn native_sin(float2 x);\n"
37141"float3 __ovld __cnfn native_sin(float3 x);\n"
37142"float4 __ovld __cnfn native_sin(float4 x);\n"
37143"float8 __ovld __cnfn native_sin(float8 x);\n"
37144"float16 __ovld __cnfn native_sin(float16 x);\n"
37145"\n"
37146"/**\n"
37147" * Compute square root over an implementation-defined\n"
37148" * range. The maximum error is implementation-defined.\n"
37149" */\n"
37150"float __ovld __cnfn native_sqrt(float x);\n"
37151"float2 __ovld __cnfn native_sqrt(float2 x);\n"
37152"float3 __ovld __cnfn native_sqrt(float3 x);\n"
37153"float4 __ovld __cnfn native_sqrt(float4 x);\n"
37154"float8 __ovld __cnfn native_sqrt(float8 x);\n"
37155"float16 __ovld __cnfn native_sqrt(float16 x);\n"
37156"\n"
37157"/**\n"
37158" * Compute tangent over an implementation-defined range.\n"
37159" * The maximum error is implementation-defined.\n"
37160" */\n"
37161"float __ovld __cnfn native_tan(float x);\n"
37162"float2 __ovld __cnfn native_tan(float2 x);\n"
37163"float3 __ovld __cnfn native_tan(float3 x);\n"
37164"float4 __ovld __cnfn native_tan(float4 x);\n"
37165"float8 __ovld __cnfn native_tan(float8 x);\n"
37166"float16 __ovld __cnfn native_tan(float16 x);\n"
37167"\n"
37168"// OpenCL v1.1 s6.11.3, v1.2 s6.12.3, v2.0 s6.13.3 - Integer Functions\n"
37169"\n"
37170"/**\n"
37171" * Returns | x |.\n"
37172" */\n"
37173"uchar __ovld __cnfn abs(char x);\n"
37174"uchar __ovld __cnfn abs(uchar x);\n"
37175"uchar2 __ovld __cnfn abs(char2 x);\n"
37176"uchar2 __ovld __cnfn abs(uchar2 x);\n"
37177"uchar3 __ovld __cnfn abs(char3 x);\n"
37178"uchar3 __ovld __cnfn abs(uchar3 x);\n"
37179"uchar4 __ovld __cnfn abs(char4 x);\n"
37180"uchar4 __ovld __cnfn abs(uchar4 x);\n"
37181"uchar8 __ovld __cnfn abs(char8 x);\n"
37182"uchar8 __ovld __cnfn abs(uchar8 x);\n"
37183"uchar16 __ovld __cnfn abs(char16 x);\n"
37184"uchar16 __ovld __cnfn abs(uchar16 x);\n"
37185"ushort __ovld __cnfn abs(short x);\n"
37186"ushort __ovld __cnfn abs(ushort x);\n"
37187"ushort2 __ovld __cnfn abs(short2 x);\n"
37188"ushort2 __ovld __cnfn abs(ushort2 x);\n"
37189"ushort3 __ovld __cnfn abs(short3 x);\n"
37190"ushort3 __ovld __cnfn abs(ushort3 x);\n"
37191"ushort4 __ovld __cnfn abs(short4 x);\n"
37192"ushort4 __ovld __cnfn abs(ushort4 x);\n"
37193"ushort8 __ovld __cnfn abs(short8 x);\n"
37194"ushort8 __ovld __cnfn abs(ushort8 x);\n"
37195"ushort16 __ovld __cnfn abs(short16 x);\n"
37196"ushort16 __ovld __cnfn abs(ushort16 x);\n"
37197"uint __ovld __cnfn abs(int x);\n"
37198"uint __ovld __cnfn abs(uint x);\n"
37199"uint2 __ovld __cnfn abs(int2 x);\n"
37200"uint2 __ovld __cnfn abs(uint2 x);\n"
37201"uint3 __ovld __cnfn abs(int3 x);\n"
37202"uint3 __ovld __cnfn abs(uint3 x);\n"
37203"uint4 __ovld __cnfn abs(int4 x);\n"
37204"uint4 __ovld __cnfn abs(uint4 x);\n"
37205"uint8 __ovld __cnfn abs(int8 x);\n"
37206"uint8 __ovld __cnfn abs(uint8 x);\n"
37207"uint16 __ovld __cnfn abs(int16 x);\n"
37208"uint16 __ovld __cnfn abs(uint16 x);\n"
37209"ulong __ovld __cnfn abs(long x);\n"
37210"ulong __ovld __cnfn abs(ulong x);\n"
37211"ulong2 __ovld __cnfn abs(long2 x);\n"
37212"ulong2 __ovld __cnfn abs(ulong2 x);\n"
37213"ulong3 __ovld __cnfn abs(long3 x);\n"
37214"ulong3 __ovld __cnfn abs(ulong3 x);\n"
37215"ulong4 __ovld __cnfn abs(long4 x);\n"
37216"ulong4 __ovld __cnfn abs(ulong4 x);\n"
37217"ulong8 __ovld __cnfn abs(long8 x);\n"
37218"ulong8 __ovld __cnfn abs(ulong8 x);\n"
37219"ulong16 __ovld __cnfn abs(long16 x);\n"
37220"ulong16 __ovld __cnfn abs(ulong16 x);\n"
37221"\n"
37222"/**\n"
37223" * Returns | x - y | without modulo overflow.\n"
37224" */\n"
37225"uchar __ovld __cnfn abs_diff(char x, char y);\n"
37226"uchar __ovld __cnfn abs_diff(uchar x, uchar y);\n"
37227"uchar2 __ovld __cnfn abs_diff(char2 x, char2 y);\n"
37228"uchar2 __ovld __cnfn abs_diff(uchar2 x, uchar2 y);\n"
37229"uchar3 __ovld __cnfn abs_diff(char3 x, char3 y);\n"
37230"uchar3 __ovld __cnfn abs_diff(uchar3 x, uchar3 y);\n"
37231"uchar4 __ovld __cnfn abs_diff(char4 x, char4 y);\n"
37232"uchar4 __ovld __cnfn abs_diff(uchar4 x, uchar4 y);\n"
37233"uchar8 __ovld __cnfn abs_diff(char8 x, char8 y);\n"
37234"uchar8 __ovld __cnfn abs_diff(uchar8 x, uchar8 y);\n"
37235"uchar16 __ovld __cnfn abs_diff(char16 x, char16 y);\n"
37236"uchar16 __ovld __cnfn abs_diff(uchar16 x, uchar16 y);\n"
37237"ushort __ovld __cnfn abs_diff(short x, short y);\n"
37238"ushort __ovld __cnfn abs_diff(ushort x, ushort y);\n"
37239"ushort2 __ovld __cnfn abs_diff(short2 x, short2 y);\n"
37240"ushort2 __ovld __cnfn abs_diff(ushort2 x, ushort2 y);\n"
37241"ushort3 __ovld __cnfn abs_diff(short3 x, short3 y);\n"
37242"ushort3 __ovld __cnfn abs_diff(ushort3 x, ushort3 y);\n"
37243"ushort4 __ovld __cnfn abs_diff(short4 x, short4 y);\n"
37244"ushort4 __ovld __cnfn abs_diff(ushort4 x, ushort4 y);\n"
37245"ushort8 __ovld __cnfn abs_diff(short8 x, short8 y);\n"
37246"ushort8 __ovld __cnfn abs_diff(ushort8 x, ushort8 y);\n"
37247"ushort16 __ovld __cnfn abs_diff(short16 x, short16 y);\n"
37248"ushort16 __ovld __cnfn abs_diff(ushort16 x, ushort16 y);\n"
37249"uint __ovld __cnfn abs_diff(int x, int y);\n"
37250"uint __ovld __cnfn abs_diff(uint x, uint y);\n"
37251"uint2 __ovld __cnfn abs_diff(int2 x, int2 y);\n"
37252"uint2 __ovld __cnfn abs_diff(uint2 x, uint2 y);\n"
37253"uint3 __ovld __cnfn abs_diff(int3 x, int3 y);\n"
37254"uint3 __ovld __cnfn abs_diff(uint3 x, uint3 y);\n"
37255"uint4 __ovld __cnfn abs_diff(int4 x, int4 y);\n"
37256"uint4 __ovld __cnfn abs_diff(uint4 x, uint4 y);\n"
37257"uint8 __ovld __cnfn abs_diff(int8 x, int8 y);\n"
37258"uint8 __ovld __cnfn abs_diff(uint8 x, uint8 y);\n"
37259"uint16 __ovld __cnfn abs_diff(int16 x, int16 y);\n"
37260"uint16 __ovld __cnfn abs_diff(uint16 x, uint16 y);\n"
37261"ulong __ovld __cnfn abs_diff(long x, long y);\n"
37262"ulong __ovld __cnfn abs_diff(ulong x, ulong y);\n"
37263"ulong2 __ovld __cnfn abs_diff(long2 x, long2 y);\n"
37264"ulong2 __ovld __cnfn abs_diff(ulong2 x, ulong2 y);\n"
37265"ulong3 __ovld __cnfn abs_diff(long3 x, long3 y);\n"
37266"ulong3 __ovld __cnfn abs_diff(ulong3 x, ulong3 y);\n"
37267"ulong4 __ovld __cnfn abs_diff(long4 x, long4 y);\n"
37268"ulong4 __ovld __cnfn abs_diff(ulong4 x, ulong4 y);\n"
37269"ulong8 __ovld __cnfn abs_diff(long8 x, long8 y);\n"
37270"ulong8 __ovld __cnfn abs_diff(ulong8 x, ulong8 y);\n"
37271"ulong16 __ovld __cnfn abs_diff(long16 x, long16 y);\n"
37272"ulong16 __ovld __cnfn abs_diff(ulong16 x, ulong16 y);\n"
37273"\n"
37274"/**\n"
37275" * Returns x + y and saturates the result.\n"
37276" */\n"
37277"char __ovld __cnfn add_sat(char x, char y);\n"
37278"uchar __ovld __cnfn add_sat(uchar x, uchar y);\n"
37279"char2 __ovld __cnfn add_sat(char2 x, char2 y);\n"
37280"uchar2 __ovld __cnfn add_sat(uchar2 x, uchar2 y);\n"
37281"char3 __ovld __cnfn add_sat(char3 x, char3 y);\n"
37282"uchar3 __ovld __cnfn add_sat(uchar3 x, uchar3 y);\n"
37283"char4 __ovld __cnfn add_sat(char4 x, char4 y);\n"
37284"uchar4 __ovld __cnfn add_sat(uchar4 x, uchar4 y);\n"
37285"char8 __ovld __cnfn add_sat(char8 x, char8 y);\n"
37286"uchar8 __ovld __cnfn add_sat(uchar8 x, uchar8 y);\n"
37287"char16 __ovld __cnfn add_sat(char16 x, char16 y);\n"
37288"uchar16 __ovld __cnfn add_sat(uchar16 x, uchar16 y);\n"
37289"short __ovld __cnfn add_sat(short x, short y);\n"
37290"ushort __ovld __cnfn add_sat(ushort x, ushort y);\n"
37291"short2 __ovld __cnfn add_sat(short2 x, short2 y);\n"
37292"ushort2 __ovld __cnfn add_sat(ushort2 x, ushort2 y);\n"
37293"short3 __ovld __cnfn add_sat(short3 x, short3 y);\n"
37294"ushort3 __ovld __cnfn add_sat(ushort3 x, ushort3 y);\n"
37295"short4 __ovld __cnfn add_sat(short4 x, short4 y);\n"
37296"ushort4 __ovld __cnfn add_sat(ushort4 x, ushort4 y);\n"
37297"short8 __ovld __cnfn add_sat(short8 x, short8 y);\n"
37298"ushort8 __ovld __cnfn add_sat(ushort8 x, ushort8 y);\n"
37299"short16 __ovld __cnfn add_sat(short16 x, short16 y);\n"
37300"ushort16 __ovld __cnfn add_sat(ushort16 x, ushort16 y);\n"
37301"int __ovld __cnfn add_sat(int x, int y);\n"
37302"uint __ovld __cnfn add_sat(uint x, uint y);\n"
37303"int2 __ovld __cnfn add_sat(int2 x, int2 y);\n"
37304"uint2 __ovld __cnfn add_sat(uint2 x, uint2 y);\n"
37305"int3 __ovld __cnfn add_sat(int3 x, int3 y);\n"
37306"uint3 __ovld __cnfn add_sat(uint3 x, uint3 y);\n"
37307"int4 __ovld __cnfn add_sat(int4 x, int4 y);\n"
37308"uint4 __ovld __cnfn add_sat(uint4 x, uint4 y);\n"
37309"int8 __ovld __cnfn add_sat(int8 x, int8 y);\n"
37310"uint8 __ovld __cnfn add_sat(uint8 x, uint8 y);\n"
37311"int16 __ovld __cnfn add_sat(int16 x, int16 y);\n"
37312"uint16 __ovld __cnfn add_sat(uint16 x, uint16 y);\n"
37313"long __ovld __cnfn add_sat(long x, long y);\n"
37314"ulong __ovld __cnfn add_sat(ulong x, ulong y);\n"
37315"long2 __ovld __cnfn add_sat(long2 x, long2 y);\n"
37316"ulong2 __ovld __cnfn add_sat(ulong2 x, ulong2 y);\n"
37317"long3 __ovld __cnfn add_sat(long3 x, long3 y);\n"
37318"ulong3 __ovld __cnfn add_sat(ulong3 x, ulong3 y);\n"
37319"long4 __ovld __cnfn add_sat(long4 x, long4 y);\n"
37320"ulong4 __ovld __cnfn add_sat(ulong4 x, ulong4 y);\n"
37321"long8 __ovld __cnfn add_sat(long8 x, long8 y);\n"
37322"ulong8 __ovld __cnfn add_sat(ulong8 x, ulong8 y);\n"
37323"long16 __ovld __cnfn add_sat(long16 x, long16 y);\n"
37324"ulong16 __ovld __cnfn add_sat(ulong16 x, ulong16 y);\n"
37325"\n"
37326"/**\n"
37327" * Returns (x + y) >> 1. The intermediate sum does\n"
37328" * not modulo overflow.\n"
37329" */\n"
37330"char __ovld __cnfn hadd(char x, char y);\n"
37331"uchar __ovld __cnfn hadd(uchar x, uchar y);\n"
37332"char2 __ovld __cnfn hadd(char2 x, char2 y);\n"
37333"uchar2 __ovld __cnfn hadd(uchar2 x, uchar2 y);\n"
37334"char3 __ovld __cnfn hadd(char3 x, char3 y);\n"
37335"uchar3 __ovld __cnfn hadd(uchar3 x, uchar3 y);\n"
37336"char4 __ovld __cnfn hadd(char4 x, char4 y);\n"
37337"uchar4 __ovld __cnfn hadd(uchar4 x, uchar4 y);\n"
37338"char8 __ovld __cnfn hadd(char8 x, char8 y);\n"
37339"uchar8 __ovld __cnfn hadd(uchar8 x, uchar8 y);\n"
37340"char16 __ovld __cnfn hadd(char16 x, char16 y);\n"
37341"uchar16 __ovld __cnfn hadd(uchar16 x, uchar16 y);\n"
37342"short __ovld __cnfn hadd(short x, short y);\n"
37343"ushort __ovld __cnfn hadd(ushort x, ushort y);\n"
37344"short2 __ovld __cnfn hadd(short2 x, short2 y);\n"
37345"ushort2 __ovld __cnfn hadd(ushort2 x, ushort2 y);\n"
37346"short3 __ovld __cnfn hadd(short3 x, short3 y);\n"
37347"ushort3 __ovld __cnfn hadd(ushort3 x, ushort3 y);\n"
37348"short4 __ovld __cnfn hadd(short4 x, short4 y);\n"
37349"ushort4 __ovld __cnfn hadd(ushort4 x, ushort4 y);\n"
37350"short8 __ovld __cnfn hadd(short8 x, short8 y);\n"
37351"ushort8 __ovld __cnfn hadd(ushort8 x, ushort8 y);\n"
37352"short16 __ovld __cnfn hadd(short16 x, short16 y);\n"
37353"ushort16 __ovld __cnfn hadd(ushort16 x, ushort16 y);\n"
37354"int __ovld __cnfn hadd(int x, int y);\n"
37355"uint __ovld __cnfn hadd(uint x, uint y);\n"
37356"int2 __ovld __cnfn hadd(int2 x, int2 y);\n"
37357"uint2 __ovld __cnfn hadd(uint2 x, uint2 y);\n"
37358"int3 __ovld __cnfn hadd(int3 x, int3 y);\n"
37359"uint3 __ovld __cnfn hadd(uint3 x, uint3 y);\n"
37360"int4 __ovld __cnfn hadd(int4 x, int4 y);\n"
37361"uint4 __ovld __cnfn hadd(uint4 x, uint4 y);\n"
37362"int8 __ovld __cnfn hadd(int8 x, int8 y);\n"
37363"uint8 __ovld __cnfn hadd(uint8 x, uint8 y);\n"
37364"int16 __ovld __cnfn hadd(int16 x, int16 y);\n"
37365"uint16 __ovld __cnfn hadd(uint16 x, uint16 y);\n"
37366"long __ovld __cnfn hadd(long x, long y);\n"
37367"ulong __ovld __cnfn hadd(ulong x, ulong y);\n"
37368"long2 __ovld __cnfn hadd(long2 x, long2 y);\n"
37369"ulong2 __ovld __cnfn hadd(ulong2 x, ulong2 y);\n"
37370"long3 __ovld __cnfn hadd(long3 x, long3 y);\n"
37371"ulong3 __ovld __cnfn hadd(ulong3 x, ulong3 y);\n"
37372"long4 __ovld __cnfn hadd(long4 x, long4 y);\n"
37373"ulong4 __ovld __cnfn hadd(ulong4 x, ulong4 y);\n"
37374"long8 __ovld __cnfn hadd(long8 x, long8 y);\n"
37375"ulong8 __ovld __cnfn hadd(ulong8 x, ulong8 y);\n"
37376"long16 __ovld __cnfn hadd(long16 x, long16 y);\n"
37377"ulong16 __ovld __cnfn hadd(ulong16 x, ulong16 y);\n"
37378"\n"
37379"/**\n"
37380" * Returns (x + y + 1) >> 1. The intermediate sum\n"
37381" * does not modulo overflow.\n"
37382" */\n"
37383"char __ovld __cnfn rhadd(char x, char y);\n"
37384"uchar __ovld __cnfn rhadd(uchar x, uchar y);\n"
37385"char2 __ovld __cnfn rhadd(char2 x, char2 y);\n"
37386"uchar2 __ovld __cnfn rhadd(uchar2 x, uchar2 y);\n"
37387"char3 __ovld __cnfn rhadd(char3 x, char3 y);\n"
37388"uchar3 __ovld __cnfn rhadd(uchar3 x, uchar3 y);\n"
37389"char4 __ovld __cnfn rhadd(char4 x, char4 y);\n"
37390"uchar4 __ovld __cnfn rhadd(uchar4 x, uchar4 y);\n"
37391"char8 __ovld __cnfn rhadd(char8 x, char8 y);\n"
37392"uchar8 __ovld __cnfn rhadd(uchar8 x, uchar8 y);\n"
37393"char16 __ovld __cnfn rhadd(char16 x, char16 y);\n"
37394"uchar16 __ovld __cnfn rhadd(uchar16 x, uchar16 y);\n"
37395"short __ovld __cnfn rhadd(short x, short y);\n"
37396"ushort __ovld __cnfn rhadd(ushort x, ushort y);\n"
37397"short2 __ovld __cnfn rhadd(short2 x, short2 y);\n"
37398"ushort2 __ovld __cnfn rhadd(ushort2 x, ushort2 y);\n"
37399"short3 __ovld __cnfn rhadd(short3 x, short3 y);\n"
37400"ushort3 __ovld __cnfn rhadd(ushort3 x, ushort3 y);\n"
37401"short4 __ovld __cnfn rhadd(short4 x, short4 y);\n"
37402"ushort4 __ovld __cnfn rhadd(ushort4 x, ushort4 y);\n"
37403"short8 __ovld __cnfn rhadd(short8 x, short8 y);\n"
37404"ushort8 __ovld __cnfn rhadd(ushort8 x, ushort8 y);\n"
37405"short16 __ovld __cnfn rhadd(short16 x, short16 y);\n"
37406"ushort16 __ovld __cnfn rhadd(ushort16 x, ushort16 y);\n"
37407"int __ovld __cnfn rhadd(int x, int y);\n"
37408"uint __ovld __cnfn rhadd(uint x, uint y);\n"
37409"int2 __ovld __cnfn rhadd(int2 x, int2 y);\n"
37410"uint2 __ovld __cnfn rhadd(uint2 x, uint2 y);\n"
37411"int3 __ovld __cnfn rhadd(int3 x, int3 y);\n"
37412"uint3 __ovld __cnfn rhadd(uint3 x, uint3 y);\n"
37413"int4 __ovld __cnfn rhadd(int4 x, int4 y);\n"
37414"uint4 __ovld __cnfn rhadd(uint4 x, uint4 y);\n"
37415"int8 __ovld __cnfn rhadd(int8 x, int8 y);\n"
37416"uint8 __ovld __cnfn rhadd(uint8 x, uint8 y);\n"
37417"int16 __ovld __cnfn rhadd(int16 x, int16 y);\n"
37418"uint16 __ovld __cnfn rhadd(uint16 x, uint16 y);\n"
37419"long __ovld __cnfn rhadd(long x, long y);\n"
37420"ulong __ovld __cnfn rhadd(ulong x, ulong y);\n"
37421"long2 __ovld __cnfn rhadd(long2 x, long2 y);\n"
37422"ulong2 __ovld __cnfn rhadd(ulong2 x, ulong2 y);\n"
37423"long3 __ovld __cnfn rhadd(long3 x, long3 y);\n"
37424"ulong3 __ovld __cnfn rhadd(ulong3 x, ulong3 y);\n"
37425"long4 __ovld __cnfn rhadd(long4 x, long4 y);\n"
37426"ulong4 __ovld __cnfn rhadd(ulong4 x, ulong4 y);\n"
37427"long8 __ovld __cnfn rhadd(long8 x, long8 y);\n"
37428"ulong8 __ovld __cnfn rhadd(ulong8 x, ulong8 y);\n"
37429"long16 __ovld __cnfn rhadd(long16 x, long16 y);\n"
37430"ulong16 __ovld __cnfn rhadd(ulong16 x, ulong16 y);\n"
37431"\n"
37432"/**\n"
37433" * Returns min(max(x, minval), maxval).\n"
37434" * Results are undefined if minval > maxval.\n"
37435" */\n"
37436"char __ovld __cnfn clamp(char x, char minval, char maxval);\n"
37437"uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n"
37438"char2 __ovld __cnfn clamp(char2 x, char2 minval, char2 maxval);\n"
37439"uchar2 __ovld __cnfn clamp(uchar2 x, uchar2 minval, uchar2 maxval);\n"
37440"char3 __ovld __cnfn clamp(char3 x, char3 minval, char3 maxval);\n"
37441"uchar3 __ovld __cnfn clamp(uchar3 x, uchar3 minval, uchar3 maxval);\n"
37442"char4 __ovld __cnfn clamp(char4 x, char4 minval, char4 maxval);\n"
37443"uchar4 __ovld __cnfn clamp(uchar4 x, uchar4 minval, uchar4 maxval);\n"
37444"char8 __ovld __cnfn clamp(char8 x, char8 minval, char8 maxval);\n"
37445"uchar8 __ovld __cnfn clamp(uchar8 x, uchar8 minval, uchar8 maxval);\n"
37446"char16 __ovld __cnfn clamp(char16 x, char16 minval, char16 maxval);\n"
37447"uchar16 __ovld __cnfn clamp(uchar16 x, uchar16 minval, uchar16 maxval);\n"
37448"short __ovld __cnfn clamp(short x, short minval, short maxval);\n"
37449"ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n"
37450"short2 __ovld __cnfn clamp(short2 x, short2 minval, short2 maxval);\n"
37451"ushort2 __ovld __cnfn clamp(ushort2 x, ushort2 minval, ushort2 maxval);\n"
37452"short3 __ovld __cnfn clamp(short3 x, short3 minval, short3 maxval);\n"
37453"ushort3 __ovld __cnfn clamp(ushort3 x, ushort3 minval, ushort3 maxval);\n"
37454"short4 __ovld __cnfn clamp(short4 x, short4 minval, short4 maxval);\n"
37455"ushort4 __ovld __cnfn clamp(ushort4 x, ushort4 minval, ushort4 maxval);\n"
37456"short8 __ovld __cnfn clamp(short8 x, short8 minval, short8 maxval);\n"
37457"ushort8 __ovld __cnfn clamp(ushort8 x, ushort8 minval, ushort8 maxval);\n"
37458"short16 __ovld __cnfn clamp(short16 x, short16 minval, short16 maxval);\n"
37459"ushort16 __ovld __cnfn clamp(ushort16 x, ushort16 minval, ushort16 maxval);\n"
37460"int __ovld __cnfn clamp(int x, int minval, int maxval);\n"
37461"uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n"
37462"int2 __ovld __cnfn clamp(int2 x, int2 minval, int2 maxval);\n"
37463"uint2 __ovld __cnfn clamp(uint2 x, uint2 minval, uint2 maxval);\n"
37464"int3 __ovld __cnfn clamp(int3 x, int3 minval, int3 maxval);\n"
37465"uint3 __ovld __cnfn clamp(uint3 x, uint3 minval, uint3 maxval);\n"
37466"int4 __ovld __cnfn clamp(int4 x, int4 minval, int4 maxval);\n"
37467"uint4 __ovld __cnfn clamp(uint4 x, uint4 minval, uint4 maxval);\n"
37468"int8 __ovld __cnfn clamp(int8 x, int8 minval, int8 maxval);\n"
37469"uint8 __ovld __cnfn clamp(uint8 x, uint8 minval, uint8 maxval);\n"
37470"int16 __ovld __cnfn clamp(int16 x, int16 minval, int16 maxval);\n"
37471"uint16 __ovld __cnfn clamp(uint16 x, uint16 minval, uint16 maxval);\n"
37472"long __ovld __cnfn clamp(long x, long minval, long maxval);\n"
37473"ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n"
37474"long2 __ovld __cnfn clamp(long2 x, long2 minval, long2 maxval);\n"
37475"ulong2 __ovld __cnfn clamp(ulong2 x, ulong2 minval, ulong2 maxval);\n"
37476"long3 __ovld __cnfn clamp(long3 x, long3 minval, long3 maxval);\n"
37477"ulong3 __ovld __cnfn clamp(ulong3 x, ulong3 minval, ulong3 maxval);\n"
37478"long4 __ovld __cnfn clamp(long4 x, long4 minval, long4 maxval);\n"
37479"ulong4 __ovld __cnfn clamp(ulong4 x, ulong4 minval, ulong4 maxval);\n"
37480"long8 __ovld __cnfn clamp(long8 x, long8 minval, long8 maxval);\n"
37481"ulong8 __ovld __cnfn clamp(ulong8 x, ulong8 minval, ulong8 maxval);\n"
37482"long16 __ovld __cnfn clamp(long16 x, long16 minval, long16 maxval);\n"
37483"ulong16 __ovld __cnfn clamp(ulong16 x, ulong16 minval, ulong16 maxval);\n"
37484"char __ovld __cnfn clamp(char x, char minval, char maxval);\n"
37485"uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n"
37486"char2 __ovld __cnfn clamp(char2 x, char minval, char maxval);\n"
37487"uchar2 __ovld __cnfn clamp(uchar2 x, uchar minval, uchar maxval);\n"
37488"char3 __ovld __cnfn clamp(char3 x, char minval, char maxval);\n"
37489"uchar3 __ovld __cnfn clamp(uchar3 x, uchar minval, uchar maxval);\n"
37490"char4 __ovld __cnfn clamp(char4 x, char minval, char maxval);\n"
37491"uchar4 __ovld __cnfn clamp(uchar4 x, uchar minval, uchar maxval);\n"
37492"char8 __ovld __cnfn clamp(char8 x, char minval, char maxval);\n"
37493"uchar8 __ovld __cnfn clamp(uchar8 x, uchar minval, uchar maxval);\n"
37494"char16 __ovld __cnfn clamp(char16 x, char minval, char maxval);\n"
37495"uchar16 __ovld __cnfn clamp(uchar16 x, uchar minval, uchar maxval);\n"
37496"short __ovld __cnfn clamp(short x, short minval, short maxval);\n"
37497"ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n"
37498"short2 __ovld __cnfn clamp(short2 x, short minval, short maxval);\n"
37499"ushort2 __ovld __cnfn clamp(ushort2 x, ushort minval, ushort maxval);\n"
37500"short3 __ovld __cnfn clamp(short3 x, short minval, short maxval);\n"
37501"ushort3 __ovld __cnfn clamp(ushort3 x, ushort minval, ushort maxval);\n"
37502"short4 __ovld __cnfn clamp(short4 x, short minval, short maxval);\n"
37503"ushort4 __ovld __cnfn clamp(ushort4 x, ushort minval, ushort maxval);\n"
37504"short8 __ovld __cnfn clamp(short8 x, short minval, short maxval);\n"
37505"ushort8 __ovld __cnfn clamp(ushort8 x, ushort minval, ushort maxval);\n"
37506"short16 __ovld __cnfn clamp(short16 x, short minval, short maxval);\n"
37507"ushort16 __ovld __cnfn clamp(ushort16 x, ushort minval, ushort maxval);\n"
37508"int __ovld __cnfn clamp(int x, int minval, int maxval);\n"
37509"uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n"
37510"int2 __ovld __cnfn clamp(int2 x, int minval, int maxval);\n"
37511"uint2 __ovld __cnfn clamp(uint2 x, uint minval, uint maxval);\n"
37512"int3 __ovld __cnfn clamp(int3 x, int minval, int maxval);\n"
37513"uint3 __ovld __cnfn clamp(uint3 x, uint minval, uint maxval);\n"
37514"int4 __ovld __cnfn clamp(int4 x, int minval, int maxval);\n"
37515"uint4 __ovld __cnfn clamp(uint4 x, uint minval, uint maxval);\n"
37516"int8 __ovld __cnfn clamp(int8 x, int minval, int maxval);\n"
37517"uint8 __ovld __cnfn clamp(uint8 x, uint minval, uint maxval);\n"
37518"int16 __ovld __cnfn clamp(int16 x, int minval, int maxval);\n"
37519"uint16 __ovld __cnfn clamp(uint16 x, uint minval, uint maxval);\n"
37520"long __ovld __cnfn clamp(long x, long minval, long maxval);\n"
37521"ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n"
37522"long2 __ovld __cnfn clamp(long2 x, long minval, long maxval);\n"
37523"ulong2 __ovld __cnfn clamp(ulong2 x, ulong minval, ulong maxval);\n"
37524"long3 __ovld __cnfn clamp(long3 x, long minval, long maxval);\n"
37525"ulong3 __ovld __cnfn clamp(ulong3 x, ulong minval, ulong maxval);\n"
37526"long4 __ovld __cnfn clamp(long4 x, long minval, long maxval);\n"
37527"ulong4 __ovld __cnfn clamp(ulong4 x, ulong minval, ulong maxval);\n"
37528"long8 __ovld __cnfn clamp(long8 x, long minval, long maxval);\n"
37529"ulong8 __ovld __cnfn clamp(ulong8 x, ulong minval, ulong maxval);\n"
37530"long16 __ovld __cnfn clamp(long16 x, long minval, long maxval);\n"
37531"ulong16 __ovld __cnfn clamp(ulong16 x, ulong minval, ulong maxval);\n"
37532"\n"
37533"/**\n"
37534" * Returns the number of leading 0-bits in x, starting\n"
37535" * at the most significant bit position.\n"
37536" */\n"
37537"char __ovld __cnfn clz(char x);\n"
37538"uchar __ovld __cnfn clz(uchar x);\n"
37539"char2 __ovld __cnfn clz(char2 x);\n"
37540"uchar2 __ovld __cnfn clz(uchar2 x);\n"
37541"char3 __ovld __cnfn clz(char3 x);\n"
37542"uchar3 __ovld __cnfn clz(uchar3 x);\n"
37543"char4 __ovld __cnfn clz(char4 x);\n"
37544"uchar4 __ovld __cnfn clz(uchar4 x);\n"
37545"char8 __ovld __cnfn clz(char8 x);\n"
37546"uchar8 __ovld __cnfn clz(uchar8 x);\n"
37547"char16 __ovld __cnfn clz(char16 x);\n"
37548"uchar16 __ovld __cnfn clz(uchar16 x);\n"
37549"short __ovld __cnfn clz(short x);\n"
37550"ushort __ovld __cnfn clz(ushort x);\n"
37551"short2 __ovld __cnfn clz(short2 x);\n"
37552"ushort2 __ovld __cnfn clz(ushort2 x);\n"
37553"short3 __ovld __cnfn clz(short3 x);\n"
37554"ushort3 __ovld __cnfn clz(ushort3 x);\n"
37555"short4 __ovld __cnfn clz(short4 x);\n"
37556"ushort4 __ovld __cnfn clz(ushort4 x);\n"
37557"short8 __ovld __cnfn clz(short8 x);\n"
37558"ushort8 __ovld __cnfn clz(ushort8 x);\n"
37559"short16 __ovld __cnfn clz(short16 x);\n"
37560"ushort16 __ovld __cnfn clz(ushort16 x);\n"
37561"int __ovld __cnfn clz(int x);\n"
37562"uint __ovld __cnfn clz(uint x);\n"
37563"int2 __ovld __cnfn clz(int2 x);\n"
37564"uint2 __ovld __cnfn clz(uint2 x);\n"
37565"int3 __ovld __cnfn clz(int3 x);\n"
37566"uint3 __ovld __cnfn clz(uint3 x);\n"
37567"int4 __ovld __cnfn clz(int4 x);\n"
37568"uint4 __ovld __cnfn clz(uint4 x);\n"
37569"int8 __ovld __cnfn clz(int8 x);\n"
37570"uint8 __ovld __cnfn clz(uint8 x);\n"
37571"int16 __ovld __cnfn clz(int16 x);\n"
37572"uint16 __ovld __cnfn clz(uint16 x);\n"
37573"long __ovld __cnfn clz(long x);\n"
37574"ulong __ovld __cnfn clz(ulong x);\n"
37575"long2 __ovld __cnfn clz(long2 x);\n"
37576"ulong2 __ovld __cnfn clz(ulong2 x);\n"
37577"long3 __ovld __cnfn clz(long3 x);\n"
37578"ulong3 __ovld __cnfn clz(ulong3 x);\n"
37579"long4 __ovld __cnfn clz(long4 x);\n"
37580"ulong4 __ovld __cnfn clz(ulong4 x);\n"
37581"long8 __ovld __cnfn clz(long8 x);\n"
37582"ulong8 __ovld __cnfn clz(ulong8 x);\n"
37583"long16 __ovld __cnfn clz(long16 x);\n"
37584"ulong16 __ovld __cnfn clz(ulong16 x);\n"
37585"\n"
37586"/**\n"
37587" * Returns the count of trailing 0-bits in x. If x is 0,\n"
37588" * returns the size in bits of the type of x or\n"
37589" * component type of x, if x is a vector.\n"
37590" */\n"
37591"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
37592"char __ovld ctz(char x);\n"
37593"uchar __ovld ctz(uchar x);\n"
37594"char2 __ovld ctz(char2 x);\n"
37595"uchar2 __ovld ctz(uchar2 x);\n"
37596"char3 __ovld ctz(char3 x);\n"
37597"uchar3 __ovld ctz(uchar3 x);\n"
37598"char4 __ovld ctz(char4 x);\n"
37599"uchar4 __ovld ctz(uchar4 x);\n"
37600"char8 __ovld ctz(char8 x);\n"
37601"uchar8 __ovld ctz(uchar8 x);\n"
37602"char16 __ovld ctz(char16 x);\n"
37603"uchar16 __ovld ctz(uchar16 x);\n"
37604"short __ovld ctz(short x);\n"
37605"ushort __ovld ctz(ushort x);\n"
37606"short2 __ovld ctz(short2 x);\n"
37607"ushort2 __ovld ctz(ushort2 x);\n"
37608"short3 __ovld ctz(short3 x);\n"
37609"ushort3 __ovld ctz(ushort3 x);\n"
37610"short4 __ovld ctz(short4 x);\n"
37611"ushort4 __ovld ctz(ushort4 x);\n"
37612"short8 __ovld ctz(short8 x);\n"
37613"ushort8 __ovld ctz(ushort8 x);\n"
37614"short16 __ovld ctz(short16 x);\n"
37615"ushort16 __ovld ctz(ushort16 x);\n"
37616"int __ovld ctz(int x);\n"
37617"uint __ovld ctz(uint x);\n"
37618"int2 __ovld ctz(int2 x);\n"
37619"uint2 __ovld ctz(uint2 x);\n"
37620"int3 __ovld ctz(int3 x);\n"
37621"uint3 __ovld ctz(uint3 x);\n"
37622"int4 __ovld ctz(int4 x);\n"
37623"uint4 __ovld ctz(uint4 x);\n"
37624"int8 __ovld ctz(int8 x);\n"
37625"uint8 __ovld ctz(uint8 x);\n"
37626"int16 __ovld ctz(int16 x);\n"
37627"uint16 __ovld ctz(uint16 x);\n"
37628"long __ovld ctz(long x);\n"
37629"ulong __ovld ctz(ulong x);\n"
37630"long2 __ovld ctz(long2 x);\n"
37631"ulong2 __ovld ctz(ulong2 x);\n"
37632"long3 __ovld ctz(long3 x);\n"
37633"ulong3 __ovld ctz(ulong3 x);\n"
37634"long4 __ovld ctz(long4 x);\n"
37635"ulong4 __ovld ctz(ulong4 x);\n"
37636"long8 __ovld ctz(long8 x);\n"
37637"ulong8 __ovld ctz(ulong8 x);\n"
37638"long16 __ovld ctz(long16 x);\n"
37639"ulong16 __ovld ctz(ulong16 x);\n"
37640"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
37641"\n"
37642"/**\n"
37643" * Returns mul_hi(a, b) + c.\n"
37644" */\n"
37645"char __ovld __cnfn mad_hi(char a, char b, char c);\n"
37646"uchar __ovld __cnfn mad_hi(uchar a, uchar b, uchar c);\n"
37647"char2 __ovld __cnfn mad_hi(char2 a, char2 b, char2 c);\n"
37648"uchar2 __ovld __cnfn mad_hi(uchar2 a, uchar2 b, uchar2 c);\n"
37649"char3 __ovld __cnfn mad_hi(char3 a, char3 b, char3 c);\n"
37650"uchar3 __ovld __cnfn mad_hi(uchar3 a, uchar3 b, uchar3 c);\n"
37651"char4 __ovld __cnfn mad_hi(char4 a, char4 b, char4 c);\n"
37652"uchar4 __ovld __cnfn mad_hi(uchar4 a, uchar4 b, uchar4 c);\n"
37653"char8 __ovld __cnfn mad_hi(char8 a, char8 b, char8 c);\n"
37654"uchar8 __ovld __cnfn mad_hi(uchar8 a, uchar8 b, uchar8 c);\n"
37655"char16 __ovld __cnfn mad_hi(char16 a, char16 b, char16 c);\n"
37656"uchar16 __ovld __cnfn mad_hi(uchar16 a, uchar16 b, uchar16 c);\n"
37657"short __ovld __cnfn mad_hi(short a, short b, short c);\n"
37658"ushort __ovld __cnfn mad_hi(ushort a, ushort b, ushort c);\n"
37659"short2 __ovld __cnfn mad_hi(short2 a, short2 b, short2 c);\n"
37660"ushort2 __ovld __cnfn mad_hi(ushort2 a, ushort2 b, ushort2 c);\n"
37661"short3 __ovld __cnfn mad_hi(short3 a, short3 b, short3 c);\n"
37662"ushort3 __ovld __cnfn mad_hi(ushort3 a, ushort3 b, ushort3 c);\n"
37663"short4 __ovld __cnfn mad_hi(short4 a, short4 b, short4 c);\n"
37664"ushort4 __ovld __cnfn mad_hi(ushort4 a, ushort4 b, ushort4 c);\n"
37665"short8 __ovld __cnfn mad_hi(short8 a, short8 b, short8 c);\n"
37666"ushort8 __ovld __cnfn mad_hi(ushort8 a, ushort8 b, ushort8 c);\n"
37667"short16 __ovld __cnfn mad_hi(short16 a, short16 b, short16 c);\n"
37668"ushort16 __ovld __cnfn mad_hi(ushort16 a, ushort16 b, ushort16 c);\n"
37669"int __ovld __cnfn mad_hi(int a, int b, int c);\n"
37670"uint __ovld __cnfn mad_hi(uint a, uint b, uint c);\n"
37671"int2 __ovld __cnfn mad_hi(int2 a, int2 b, int2 c);\n"
37672"uint2 __ovld __cnfn mad_hi(uint2 a, uint2 b, uint2 c);\n"
37673"int3 __ovld __cnfn mad_hi(int3 a, int3 b, int3 c);\n"
37674"uint3 __ovld __cnfn mad_hi(uint3 a, uint3 b, uint3 c);\n"
37675"int4 __ovld __cnfn mad_hi(int4 a, int4 b, int4 c);\n"
37676"uint4 __ovld __cnfn mad_hi(uint4 a, uint4 b, uint4 c);\n"
37677"int8 __ovld __cnfn mad_hi(int8 a, int8 b, int8 c);\n"
37678"uint8 __ovld __cnfn mad_hi(uint8 a, uint8 b, uint8 c);\n"
37679"int16 __ovld __cnfn mad_hi(int16 a, int16 b, int16 c);\n"
37680"uint16 __ovld __cnfn mad_hi(uint16 a, uint16 b, uint16 c);\n"
37681"long __ovld __cnfn mad_hi(long a, long b, long c);\n"
37682"ulong __ovld __cnfn mad_hi(ulong a, ulong b, ulong c);\n"
37683"long2 __ovld __cnfn mad_hi(long2 a, long2 b, long2 c);\n"
37684"ulong2 __ovld __cnfn mad_hi(ulong2 a, ulong2 b, ulong2 c);\n"
37685"long3 __ovld __cnfn mad_hi(long3 a, long3 b, long3 c);\n"
37686"ulong3 __ovld __cnfn mad_hi(ulong3 a, ulong3 b, ulong3 c);\n"
37687"long4 __ovld __cnfn mad_hi(long4 a, long4 b, long4 c);\n"
37688"ulong4 __ovld __cnfn mad_hi(ulong4 a, ulong4 b, ulong4 c);\n"
37689"long8 __ovld __cnfn mad_hi(long8 a, long8 b, long8 c);\n"
37690"ulong8 __ovld __cnfn mad_hi(ulong8 a, ulong8 b, ulong8 c);\n"
37691"long16 __ovld __cnfn mad_hi(long16 a, long16 b, long16 c);\n"
37692"ulong16 __ovld __cnfn mad_hi(ulong16 a, ulong16 b, ulong16 c);\n"
37693"\n"
37694"/**\n"
37695" * Returns a * b + c and saturates the result.\n"
37696" */\n"
37697"char __ovld __cnfn mad_sat(char a, char b, char c);\n"
37698"uchar __ovld __cnfn mad_sat(uchar a, uchar b, uchar c);\n"
37699"char2 __ovld __cnfn mad_sat(char2 a, char2 b, char2 c);\n"
37700"uchar2 __ovld __cnfn mad_sat(uchar2 a, uchar2 b, uchar2 c);\n"
37701"char3 __ovld __cnfn mad_sat(char3 a, char3 b, char3 c);\n"
37702"uchar3 __ovld __cnfn mad_sat(uchar3 a, uchar3 b, uchar3 c);\n"
37703"char4 __ovld __cnfn mad_sat(char4 a, char4 b, char4 c);\n"
37704"uchar4 __ovld __cnfn mad_sat(uchar4 a, uchar4 b, uchar4 c);\n"
37705"char8 __ovld __cnfn mad_sat(char8 a, char8 b, char8 c);\n"
37706"uchar8 __ovld __cnfn mad_sat(uchar8 a, uchar8 b, uchar8 c);\n"
37707"char16 __ovld __cnfn mad_sat(char16 a, char16 b, char16 c);\n"
37708"uchar16 __ovld __cnfn mad_sat(uchar16 a, uchar16 b, uchar16 c);\n"
37709"short __ovld __cnfn mad_sat(short a, short b, short c);\n"
37710"ushort __ovld __cnfn mad_sat(ushort a, ushort b, ushort c);\n"
37711"short2 __ovld __cnfn mad_sat(short2 a, short2 b, short2 c);\n"
37712"ushort2 __ovld __cnfn mad_sat(ushort2 a, ushort2 b, ushort2 c);\n"
37713"short3 __ovld __cnfn mad_sat(short3 a, short3 b, short3 c);\n"
37714"ushort3 __ovld __cnfn mad_sat(ushort3 a, ushort3 b, ushort3 c);\n"
37715"short4 __ovld __cnfn mad_sat(short4 a, short4 b, short4 c);\n"
37716"ushort4 __ovld __cnfn mad_sat(ushort4 a, ushort4 b, ushort4 c);\n"
37717"short8 __ovld __cnfn mad_sat(short8 a, short8 b, short8 c);\n"
37718"ushort8 __ovld __cnfn mad_sat(ushort8 a, ushort8 b, ushort8 c);\n"
37719"short16 __ovld __cnfn mad_sat(short16 a, short16 b, short16 c);\n"
37720"ushort16 __ovld __cnfn mad_sat(ushort16 a, ushort16 b, ushort16 c);\n"
37721"int __ovld __cnfn mad_sat(int a, int b, int c);\n"
37722"uint __ovld __cnfn mad_sat(uint a, uint b, uint c);\n"
37723"int2 __ovld __cnfn mad_sat(int2 a, int2 b, int2 c);\n"
37724"uint2 __ovld __cnfn mad_sat(uint2 a, uint2 b, uint2 c);\n"
37725"int3 __ovld __cnfn mad_sat(int3 a, int3 b, int3 c);\n"
37726"uint3 __ovld __cnfn mad_sat(uint3 a, uint3 b, uint3 c);\n"
37727"int4 __ovld __cnfn mad_sat(int4 a, int4 b, int4 c);\n"
37728"uint4 __ovld __cnfn mad_sat(uint4 a, uint4 b, uint4 c);\n"
37729"int8 __ovld __cnfn mad_sat(int8 a, int8 b, int8 c);\n"
37730"uint8 __ovld __cnfn mad_sat(uint8 a, uint8 b, uint8 c);\n"
37731"int16 __ovld __cnfn mad_sat(int16 a, int16 b, int16 c);\n"
37732"uint16 __ovld __cnfn mad_sat(uint16 a, uint16 b, uint16 c);\n"
37733"long __ovld __cnfn mad_sat(long a, long b, long c);\n"
37734"ulong __ovld __cnfn mad_sat(ulong a, ulong b, ulong c);\n"
37735"long2 __ovld __cnfn mad_sat(long2 a, long2 b, long2 c);\n"
37736"ulong2 __ovld __cnfn mad_sat(ulong2 a, ulong2 b, ulong2 c);\n"
37737"long3 __ovld __cnfn mad_sat(long3 a, long3 b, long3 c);\n"
37738"ulong3 __ovld __cnfn mad_sat(ulong3 a, ulong3 b, ulong3 c);\n"
37739"long4 __ovld __cnfn mad_sat(long4 a, long4 b, long4 c);\n"
37740"ulong4 __ovld __cnfn mad_sat(ulong4 a, ulong4 b, ulong4 c);\n"
37741"long8 __ovld __cnfn mad_sat(long8 a, long8 b, long8 c);\n"
37742"ulong8 __ovld __cnfn mad_sat(ulong8 a, ulong8 b, ulong8 c);\n"
37743"long16 __ovld __cnfn mad_sat(long16 a, long16 b, long16 c);\n"
37744"ulong16 __ovld __cnfn mad_sat(ulong16 a, ulong16 b, ulong16 c);\n"
37745"\n"
37746"/**\n"
37747" * Returns y if x < y, otherwise it returns x.\n"
37748" */\n"
37749"char __ovld __cnfn max(char x, char y);\n"
37750"uchar __ovld __cnfn max(uchar x, uchar y);\n"
37751"char2 __ovld __cnfn max(char2 x, char2 y);\n"
37752"uchar2 __ovld __cnfn max(uchar2 x, uchar2 y);\n"
37753"char3 __ovld __cnfn max(char3 x, char3 y);\n"
37754"uchar3 __ovld __cnfn max(uchar3 x, uchar3 y);\n"
37755"char4 __ovld __cnfn max(char4 x, char4 y);\n"
37756"uchar4 __ovld __cnfn max(uchar4 x, uchar4 y);\n"
37757"char8 __ovld __cnfn max(char8 x, char8 y);\n"
37758"uchar8 __ovld __cnfn max(uchar8 x, uchar8 y);\n"
37759"char16 __ovld __cnfn max(char16 x, char16 y);\n"
37760"uchar16 __ovld __cnfn max(uchar16 x, uchar16 y);\n"
37761"short __ovld __cnfn max(short x, short y);\n"
37762"ushort __ovld __cnfn max(ushort x, ushort y);\n"
37763"short2 __ovld __cnfn max(short2 x, short2 y);\n"
37764"ushort2 __ovld __cnfn max(ushort2 x, ushort2 y);\n"
37765"short3 __ovld __cnfn max(short3 x, short3 y);\n"
37766"ushort3 __ovld __cnfn max(ushort3 x, ushort3 y);\n"
37767"short4 __ovld __cnfn max(short4 x, short4 y);\n"
37768"ushort4 __ovld __cnfn max(ushort4 x, ushort4 y);\n"
37769"short8 __ovld __cnfn max(short8 x, short8 y);\n"
37770"ushort8 __ovld __cnfn max(ushort8 x, ushort8 y);\n"
37771"short16 __ovld __cnfn max(short16 x, short16 y);\n"
37772"ushort16 __ovld __cnfn max(ushort16 x, ushort16 y);\n"
37773"int __ovld __cnfn max(int x, int y);\n"
37774"uint __ovld __cnfn max(uint x, uint y);\n"
37775"int2 __ovld __cnfn max(int2 x, int2 y);\n"
37776"uint2 __ovld __cnfn max(uint2 x, uint2 y);\n"
37777"int3 __ovld __cnfn max(int3 x, int3 y);\n"
37778"uint3 __ovld __cnfn max(uint3 x, uint3 y);\n"
37779"int4 __ovld __cnfn max(int4 x, int4 y);\n"
37780"uint4 __ovld __cnfn max(uint4 x, uint4 y);\n"
37781"int8 __ovld __cnfn max(int8 x, int8 y);\n"
37782"uint8 __ovld __cnfn max(uint8 x, uint8 y);\n"
37783"int16 __ovld __cnfn max(int16 x, int16 y);\n"
37784"uint16 __ovld __cnfn max(uint16 x, uint16 y);\n"
37785"long __ovld __cnfn max(long x, long y);\n"
37786"ulong __ovld __cnfn max(ulong x, ulong y);\n"
37787"long2 __ovld __cnfn max(long2 x, long2 y);\n"
37788"ulong2 __ovld __cnfn max(ulong2 x, ulong2 y);\n"
37789"long3 __ovld __cnfn max(long3 x, long3 y);\n"
37790"ulong3 __ovld __cnfn max(ulong3 x, ulong3 y);\n"
37791"long4 __ovld __cnfn max(long4 x, long4 y);\n"
37792"ulong4 __ovld __cnfn max(ulong4 x, ulong4 y);\n"
37793"long8 __ovld __cnfn max(long8 x, long8 y);\n"
37794"ulong8 __ovld __cnfn max(ulong8 x, ulong8 y);\n"
37795"long16 __ovld __cnfn max(long16 x, long16 y);\n"
37796"ulong16 __ovld __cnfn max(ulong16 x, ulong16 y);\n"
37797"char __ovld __cnfn max(char x, char y);\n"
37798"uchar __ovld __cnfn max(uchar x, uchar y);\n"
37799"char2 __ovld __cnfn max(char2 x, char y);\n"
37800"uchar2 __ovld __cnfn max(uchar2 x, uchar y);\n"
37801"char3 __ovld __cnfn max(char3 x, char y);\n"
37802"uchar3 __ovld __cnfn max(uchar3 x, uchar y);\n"
37803"char4 __ovld __cnfn max(char4 x, char y);\n"
37804"uchar4 __ovld __cnfn max(uchar4 x, uchar y);\n"
37805"char8 __ovld __cnfn max(char8 x, char y);\n"
37806"uchar8 __ovld __cnfn max(uchar8 x, uchar y);\n"
37807"char16 __ovld __cnfn max(char16 x, char y);\n"
37808"uchar16 __ovld __cnfn max(uchar16 x, uchar y);\n"
37809"short __ovld __cnfn max(short x, short y);\n"
37810"ushort __ovld __cnfn max(ushort x, ushort y);\n"
37811"short2 __ovld __cnfn max(short2 x, short y);\n"
37812"ushort2 __ovld __cnfn max(ushort2 x, ushort y);\n"
37813"short3 __ovld __cnfn max(short3 x, short y);\n"
37814"ushort3 __ovld __cnfn max(ushort3 x, ushort y);\n"
37815"short4 __ovld __cnfn max(short4 x, short y);\n"
37816"ushort4 __ovld __cnfn max(ushort4 x, ushort y);\n"
37817"short8 __ovld __cnfn max(short8 x, short y);\n"
37818"ushort8 __ovld __cnfn max(ushort8 x, ushort y);\n"
37819"short16 __ovld __cnfn max(short16 x, short y);\n"
37820"ushort16 __ovld __cnfn max(ushort16 x, ushort y);\n"
37821"int __ovld __cnfn max(int x, int y);\n"
37822"uint __ovld __cnfn max(uint x, uint y);\n"
37823"int2 __ovld __cnfn max(int2 x, int y);\n"
37824"uint2 __ovld __cnfn max(uint2 x, uint y);\n"
37825"int3 __ovld __cnfn max(int3 x, int y);\n"
37826"uint3 __ovld __cnfn max(uint3 x, uint y);\n"
37827"int4 __ovld __cnfn max(int4 x, int y);\n"
37828"uint4 __ovld __cnfn max(uint4 x, uint y);\n"
37829"int8 __ovld __cnfn max(int8 x, int y);\n"
37830"uint8 __ovld __cnfn max(uint8 x, uint y);\n"
37831"int16 __ovld __cnfn max(int16 x, int y);\n"
37832"uint16 __ovld __cnfn max(uint16 x, uint y);\n"
37833"long __ovld __cnfn max(long x, long y);\n"
37834"ulong __ovld __cnfn max(ulong x, ulong y);\n"
37835"long2 __ovld __cnfn max(long2 x, long y);\n"
37836"ulong2 __ovld __cnfn max(ulong2 x, ulong y);\n"
37837"long3 __ovld __cnfn max(long3 x, long y);\n"
37838"ulong3 __ovld __cnfn max(ulong3 x, ulong y);\n"
37839"long4 __ovld __cnfn max(long4 x, long y);\n"
37840"ulong4 __ovld __cnfn max(ulong4 x, ulong y);\n"
37841"long8 __ovld __cnfn max(long8 x, long y);\n"
37842"ulong8 __ovld __cnfn max(ulong8 x, ulong y);\n"
37843"long16 __ovld __cnfn max(long16 x, long y);\n"
37844"ulong16 __ovld __cnfn max(ulong16 x, ulong y);\n"
37845"\n"
37846"/**\n"
37847" * Returns y if y < x, otherwise it returns x.\n"
37848" */\n"
37849"char __ovld __cnfn min(char x, char y);\n"
37850"uchar __ovld __cnfn min(uchar x, uchar y);\n"
37851"char2 __ovld __cnfn min(char2 x, char2 y);\n"
37852"uchar2 __ovld __cnfn min(uchar2 x, uchar2 y);\n"
37853"char3 __ovld __cnfn min(char3 x, char3 y);\n"
37854"uchar3 __ovld __cnfn min(uchar3 x, uchar3 y);\n"
37855"char4 __ovld __cnfn min(char4 x, char4 y);\n"
37856"uchar4 __ovld __cnfn min(uchar4 x, uchar4 y);\n"
37857"char8 __ovld __cnfn min(char8 x, char8 y);\n"
37858"uchar8 __ovld __cnfn min(uchar8 x, uchar8 y);\n"
37859"char16 __ovld __cnfn min(char16 x, char16 y);\n"
37860"uchar16 __ovld __cnfn min(uchar16 x, uchar16 y);\n"
37861"short __ovld __cnfn min(short x, short y);\n"
37862"ushort __ovld __cnfn min(ushort x, ushort y);\n"
37863"short2 __ovld __cnfn min(short2 x, short2 y);\n"
37864"ushort2 __ovld __cnfn min(ushort2 x, ushort2 y);\n"
37865"short3 __ovld __cnfn min(short3 x, short3 y);\n"
37866"ushort3 __ovld __cnfn min(ushort3 x, ushort3 y);\n"
37867"short4 __ovld __cnfn min(short4 x, short4 y);\n"
37868"ushort4 __ovld __cnfn min(ushort4 x, ushort4 y);\n"
37869"short8 __ovld __cnfn min(short8 x, short8 y);\n"
37870"ushort8 __ovld __cnfn min(ushort8 x, ushort8 y);\n"
37871"short16 __ovld __cnfn min(short16 x, short16 y);\n"
37872"ushort16 __ovld __cnfn min(ushort16 x, ushort16 y);\n"
37873"int __ovld __cnfn min(int x, int y);\n"
37874"uint __ovld __cnfn min(uint x, uint y);\n"
37875"int2 __ovld __cnfn min(int2 x, int2 y);\n"
37876"uint2 __ovld __cnfn min(uint2 x, uint2 y);\n"
37877"int3 __ovld __cnfn min(int3 x, int3 y);\n"
37878"uint3 __ovld __cnfn min(uint3 x, uint3 y);\n"
37879"int4 __ovld __cnfn min(int4 x, int4 y);\n"
37880"uint4 __ovld __cnfn min(uint4 x, uint4 y);\n"
37881"int8 __ovld __cnfn min(int8 x, int8 y);\n"
37882"uint8 __ovld __cnfn min(uint8 x, uint8 y);\n"
37883"int16 __ovld __cnfn min(int16 x, int16 y);\n"
37884"uint16 __ovld __cnfn min(uint16 x, uint16 y);\n"
37885"long __ovld __cnfn min(long x, long y);\n"
37886"ulong __ovld __cnfn min(ulong x, ulong y);\n"
37887"long2 __ovld __cnfn min(long2 x, long2 y);\n"
37888"ulong2 __ovld __cnfn min(ulong2 x, ulong2 y);\n"
37889"long3 __ovld __cnfn min(long3 x, long3 y);\n"
37890"ulong3 __ovld __cnfn min(ulong3 x, ulong3 y);\n"
37891"long4 __ovld __cnfn min(long4 x, long4 y);\n"
37892"ulong4 __ovld __cnfn min(ulong4 x, ulong4 y);\n"
37893"long8 __ovld __cnfn min(long8 x, long8 y);\n"
37894"ulong8 __ovld __cnfn min(ulong8 x, ulong8 y);\n"
37895"long16 __ovld __cnfn min(long16 x, long16 y);\n"
37896"ulong16 __ovld __cnfn min(ulong16 x, ulong16 y);\n"
37897"char __ovld __cnfn min(char x, char y);\n"
37898"uchar __ovld __cnfn min(uchar x, uchar y);\n"
37899"char2 __ovld __cnfn min(char2 x, char y);\n"
37900"uchar2 __ovld __cnfn min(uchar2 x, uchar y);\n"
37901"char3 __ovld __cnfn min(char3 x, char y);\n"
37902"uchar3 __ovld __cnfn min(uchar3 x, uchar y);\n"
37903"char4 __ovld __cnfn min(char4 x, char y);\n"
37904"uchar4 __ovld __cnfn min(uchar4 x, uchar y);\n"
37905"char8 __ovld __cnfn min(char8 x, char y);\n"
37906"uchar8 __ovld __cnfn min(uchar8 x, uchar y);\n"
37907"char16 __ovld __cnfn min(char16 x, char y);\n"
37908"uchar16 __ovld __cnfn min(uchar16 x, uchar y);\n"
37909"short __ovld __cnfn min(short x, short y);\n"
37910"ushort __ovld __cnfn min(ushort x, ushort y);\n"
37911"short2 __ovld __cnfn min(short2 x, short y);\n"
37912"ushort2 __ovld __cnfn min(ushort2 x, ushort y);\n"
37913"short3 __ovld __cnfn min(short3 x, short y);\n"
37914"ushort3 __ovld __cnfn min(ushort3 x, ushort y);\n"
37915"short4 __ovld __cnfn min(short4 x, short y);\n"
37916"ushort4 __ovld __cnfn min(ushort4 x, ushort y);\n"
37917"short8 __ovld __cnfn min(short8 x, short y);\n"
37918"ushort8 __ovld __cnfn min(ushort8 x, ushort y);\n"
37919"short16 __ovld __cnfn min(short16 x, short y);\n"
37920"ushort16 __ovld __cnfn min(ushort16 x, ushort y);\n"
37921"int __ovld __cnfn min(int x, int y);\n"
37922"uint __ovld __cnfn min(uint x, uint y);\n"
37923"int2 __ovld __cnfn min(int2 x, int y);\n"
37924"uint2 __ovld __cnfn min(uint2 x, uint y);\n"
37925"int3 __ovld __cnfn min(int3 x, int y);\n"
37926"uint3 __ovld __cnfn min(uint3 x, uint y);\n"
37927"int4 __ovld __cnfn min(int4 x, int y);\n"
37928"uint4 __ovld __cnfn min(uint4 x, uint y);\n"
37929"int8 __ovld __cnfn min(int8 x, int y);\n"
37930"uint8 __ovld __cnfn min(uint8 x, uint y);\n"
37931"int16 __ovld __cnfn min(int16 x, int y);\n"
37932"uint16 __ovld __cnfn min(uint16 x, uint y);\n"
37933"long __ovld __cnfn min(long x, long y);\n"
37934"ulong __ovld __cnfn min(ulong x, ulong y);\n"
37935"long2 __ovld __cnfn min(long2 x, long y);\n"
37936"ulong2 __ovld __cnfn min(ulong2 x, ulong y);\n"
37937"long3 __ovld __cnfn min(long3 x, long y);\n"
37938"ulong3 __ovld __cnfn min(ulong3 x, ulong y);\n"
37939"long4 __ovld __cnfn min(long4 x, long y);\n"
37940"ulong4 __ovld __cnfn min(ulong4 x, ulong y);\n"
37941"long8 __ovld __cnfn min(long8 x, long y);\n"
37942"ulong8 __ovld __cnfn min(ulong8 x, ulong y);\n"
37943"long16 __ovld __cnfn min(long16 x, long y);\n"
37944"ulong16 __ovld __cnfn min(ulong16 x, ulong y);\n"
37945"\n"
37946"/**\n"
37947" * Computes x * y and returns the high half of the\n"
37948" * product of x and y.\n"
37949" */\n"
37950"char __ovld __cnfn mul_hi(char x, char y);\n"
37951"uchar __ovld __cnfn mul_hi(uchar x, uchar y);\n"
37952"char2 __ovld __cnfn mul_hi(char2 x, char2 y);\n"
37953"uchar2 __ovld __cnfn mul_hi(uchar2 x, uchar2 y);\n"
37954"char3 __ovld __cnfn mul_hi(char3 x, char3 y);\n"
37955"uchar3 __ovld __cnfn mul_hi(uchar3 x, uchar3 y);\n"
37956"char4 __ovld __cnfn mul_hi(char4 x, char4 y);\n"
37957"uchar4 __ovld __cnfn mul_hi(uchar4 x, uchar4 y);\n"
37958"char8 __ovld __cnfn mul_hi(char8 x, char8 y);\n"
37959"uchar8 __ovld __cnfn mul_hi(uchar8 x, uchar8 y);\n"
37960"char16 __ovld __cnfn mul_hi(char16 x, char16 y);\n"
37961"uchar16 __ovld __cnfn mul_hi(uchar16 x, uchar16 y);\n"
37962"short __ovld __cnfn mul_hi(short x, short y);\n"
37963"ushort __ovld __cnfn mul_hi(ushort x, ushort y);\n"
37964"short2 __ovld __cnfn mul_hi(short2 x, short2 y);\n"
37965"ushort2 __ovld __cnfn mul_hi(ushort2 x, ushort2 y);\n"
37966"short3 __ovld __cnfn mul_hi(short3 x, short3 y);\n"
37967"ushort3 __ovld __cnfn mul_hi(ushort3 x, ushort3 y);\n"
37968"short4 __ovld __cnfn mul_hi(short4 x, short4 y);\n"
37969"ushort4 __ovld __cnfn mul_hi(ushort4 x, ushort4 y);\n"
37970"short8 __ovld __cnfn mul_hi(short8 x, short8 y);\n"
37971"ushort8 __ovld __cnfn mul_hi(ushort8 x, ushort8 y);\n"
37972"short16 __ovld __cnfn mul_hi(short16 x, short16 y);\n"
37973"ushort16 __ovld __cnfn mul_hi(ushort16 x, ushort16 y);\n"
37974"int __ovld __cnfn mul_hi(int x, int y);\n"
37975"uint __ovld __cnfn mul_hi(uint x, uint y);\n"
37976"int2 __ovld __cnfn mul_hi(int2 x, int2 y);\n"
37977"uint2 __ovld __cnfn mul_hi(uint2 x, uint2 y);\n"
37978"int3 __ovld __cnfn mul_hi(int3 x, int3 y);\n"
37979"uint3 __ovld __cnfn mul_hi(uint3 x, uint3 y);\n"
37980"int4 __ovld __cnfn mul_hi(int4 x, int4 y);\n"
37981"uint4 __ovld __cnfn mul_hi(uint4 x, uint4 y);\n"
37982"int8 __ovld __cnfn mul_hi(int8 x, int8 y);\n"
37983"uint8 __ovld __cnfn mul_hi(uint8 x, uint8 y);\n"
37984"int16 __ovld __cnfn mul_hi(int16 x, int16 y);\n"
37985"uint16 __ovld __cnfn mul_hi(uint16 x, uint16 y);\n"
37986"long __ovld __cnfn mul_hi(long x, long y);\n"
37987"ulong __ovld __cnfn mul_hi(ulong x, ulong y);\n"
37988"long2 __ovld __cnfn mul_hi(long2 x, long2 y);\n"
37989"ulong2 __ovld __cnfn mul_hi(ulong2 x, ulong2 y);\n"
37990"long3 __ovld __cnfn mul_hi(long3 x, long3 y);\n"
37991"ulong3 __ovld __cnfn mul_hi(ulong3 x, ulong3 y);\n"
37992"long4 __ovld __cnfn mul_hi(long4 x, long4 y);\n"
37993"ulong4 __ovld __cnfn mul_hi(ulong4 x, ulong4 y);\n"
37994"long8 __ovld __cnfn mul_hi(long8 x, long8 y);\n"
37995"ulong8 __ovld __cnfn mul_hi(ulong8 x, ulong8 y);\n"
37996"long16 __ovld __cnfn mul_hi(long16 x, long16 y);\n"
37997"ulong16 __ovld __cnfn mul_hi(ulong16 x, ulong16 y);\n"
37998"\n"
37999"/**\n"
38000" * For each element in v, the bits are shifted left by\n"
38001" * the number of bits given by the corresponding\n"
38002" * element in i (subject to usual shift modulo rules\n"
38003" * described in section 6.3). Bits shifted off the left\n"
38004" * side of the element are shifted back in from the\n"
38005" * right.\n"
38006" */\n"
38007"char __ovld __cnfn rotate(char v, char i);\n"
38008"uchar __ovld __cnfn rotate(uchar v, uchar i);\n"
38009"char2 __ovld __cnfn rotate(char2 v, char2 i);\n"
38010"uchar2 __ovld __cnfn rotate(uchar2 v, uchar2 i);\n"
38011"char3 __ovld __cnfn rotate(char3 v, char3 i);\n"
38012"uchar3 __ovld __cnfn rotate(uchar3 v, uchar3 i);\n"
38013"char4 __ovld __cnfn rotate(char4 v, char4 i);\n"
38014"uchar4 __ovld __cnfn rotate(uchar4 v, uchar4 i);\n"
38015"char8 __ovld __cnfn rotate(char8 v, char8 i);\n"
38016"uchar8 __ovld __cnfn rotate(uchar8 v, uchar8 i);\n"
38017"char16 __ovld __cnfn rotate(char16 v, char16 i);\n"
38018"uchar16 __ovld __cnfn rotate(uchar16 v, uchar16 i);\n"
38019"short __ovld __cnfn rotate(short v, short i);\n"
38020"ushort __ovld __cnfn rotate(ushort v, ushort i);\n"
38021"short2 __ovld __cnfn rotate(short2 v, short2 i);\n"
38022"ushort2 __ovld __cnfn rotate(ushort2 v, ushort2 i);\n"
38023"short3 __ovld __cnfn rotate(short3 v, short3 i);\n"
38024"ushort3 __ovld __cnfn rotate(ushort3 v, ushort3 i);\n"
38025"short4 __ovld __cnfn rotate(short4 v, short4 i);\n"
38026"ushort4 __ovld __cnfn rotate(ushort4 v, ushort4 i);\n"
38027"short8 __ovld __cnfn rotate(short8 v, short8 i);\n"
38028"ushort8 __ovld __cnfn rotate(ushort8 v, ushort8 i);\n"
38029"short16 __ovld __cnfn rotate(short16 v, short16 i);\n"
38030"ushort16 __ovld __cnfn rotate(ushort16 v, ushort16 i);\n"
38031"int __ovld __cnfn rotate(int v, int i);\n"
38032"uint __ovld __cnfn rotate(uint v, uint i);\n"
38033"int2 __ovld __cnfn rotate(int2 v, int2 i);\n"
38034"uint2 __ovld __cnfn rotate(uint2 v, uint2 i);\n"
38035"int3 __ovld __cnfn rotate(int3 v, int3 i);\n"
38036"uint3 __ovld __cnfn rotate(uint3 v, uint3 i);\n"
38037"int4 __ovld __cnfn rotate(int4 v, int4 i);\n"
38038"uint4 __ovld __cnfn rotate(uint4 v, uint4 i);\n"
38039"int8 __ovld __cnfn rotate(int8 v, int8 i);\n"
38040"uint8 __ovld __cnfn rotate(uint8 v, uint8 i);\n"
38041"int16 __ovld __cnfn rotate(int16 v, int16 i);\n"
38042"uint16 __ovld __cnfn rotate(uint16 v, uint16 i);\n"
38043"long __ovld __cnfn rotate(long v, long i);\n"
38044"ulong __ovld __cnfn rotate(ulong v, ulong i);\n"
38045"long2 __ovld __cnfn rotate(long2 v, long2 i);\n"
38046"ulong2 __ovld __cnfn rotate(ulong2 v, ulong2 i);\n"
38047"long3 __ovld __cnfn rotate(long3 v, long3 i);\n"
38048"ulong3 __ovld __cnfn rotate(ulong3 v, ulong3 i);\n"
38049"long4 __ovld __cnfn rotate(long4 v, long4 i);\n"
38050"ulong4 __ovld __cnfn rotate(ulong4 v, ulong4 i);\n"
38051"long8 __ovld __cnfn rotate(long8 v, long8 i);\n"
38052"ulong8 __ovld __cnfn rotate(ulong8 v, ulong8 i);\n"
38053"long16 __ovld __cnfn rotate(long16 v, long16 i);\n"
38054"ulong16 __ovld __cnfn rotate(ulong16 v, ulong16 i);\n"
38055"\n"
38056"/**\n"
38057" * Returns x - y and saturates the result.\n"
38058" */\n"
38059"char __ovld __cnfn sub_sat(char x, char y);\n"
38060"uchar __ovld __cnfn sub_sat(uchar x, uchar y);\n"
38061"char2 __ovld __cnfn sub_sat(char2 x, char2 y);\n"
38062"uchar2 __ovld __cnfn sub_sat(uchar2 x, uchar2 y);\n"
38063"char3 __ovld __cnfn sub_sat(char3 x, char3 y);\n"
38064"uchar3 __ovld __cnfn sub_sat(uchar3 x, uchar3 y);\n"
38065"char4 __ovld __cnfn sub_sat(char4 x, char4 y);\n"
38066"uchar4 __ovld __cnfn sub_sat(uchar4 x, uchar4 y);\n"
38067"char8 __ovld __cnfn sub_sat(char8 x, char8 y);\n"
38068"uchar8 __ovld __cnfn sub_sat(uchar8 x, uchar8 y);\n"
38069"char16 __ovld __cnfn sub_sat(char16 x, char16 y);\n"
38070"uchar16 __ovld __cnfn sub_sat(uchar16 x, uchar16 y);\n"
38071"short __ovld __cnfn sub_sat(short x, short y);\n"
38072"ushort __ovld __cnfn sub_sat(ushort x, ushort y);\n"
38073"short2 __ovld __cnfn sub_sat(short2 x, short2 y);\n"
38074"ushort2 __ovld __cnfn sub_sat(ushort2 x, ushort2 y);\n"
38075"short3 __ovld __cnfn sub_sat(short3 x, short3 y);\n"
38076"ushort3 __ovld __cnfn sub_sat(ushort3 x, ushort3 y);\n"
38077"short4 __ovld __cnfn sub_sat(short4 x, short4 y);\n"
38078"ushort4 __ovld __cnfn sub_sat(ushort4 x, ushort4 y);\n"
38079"short8 __ovld __cnfn sub_sat(short8 x, short8 y);\n"
38080"ushort8 __ovld __cnfn sub_sat(ushort8 x, ushort8 y);\n"
38081"short16 __ovld __cnfn sub_sat(short16 x, short16 y);\n"
38082"ushort16 __ovld __cnfn sub_sat(ushort16 x, ushort16 y);\n"
38083"int __ovld __cnfn sub_sat(int x, int y);\n"
38084"uint __ovld __cnfn sub_sat(uint x, uint y);\n"
38085"int2 __ovld __cnfn sub_sat(int2 x, int2 y);\n"
38086"uint2 __ovld __cnfn sub_sat(uint2 x, uint2 y);\n"
38087"int3 __ovld __cnfn sub_sat(int3 x, int3 y);\n"
38088"uint3 __ovld __cnfn sub_sat(uint3 x, uint3 y);\n"
38089"int4 __ovld __cnfn sub_sat(int4 x, int4 y);\n"
38090"uint4 __ovld __cnfn sub_sat(uint4 x, uint4 y);\n"
38091"int8 __ovld __cnfn sub_sat(int8 x, int8 y);\n"
38092"uint8 __ovld __cnfn sub_sat(uint8 x, uint8 y);\n"
38093"int16 __ovld __cnfn sub_sat(int16 x, int16 y);\n"
38094"uint16 __ovld __cnfn sub_sat(uint16 x, uint16 y);\n"
38095"long __ovld __cnfn sub_sat(long x, long y);\n"
38096"ulong __ovld __cnfn sub_sat(ulong x, ulong y);\n"
38097"long2 __ovld __cnfn sub_sat(long2 x, long2 y);\n"
38098"ulong2 __ovld __cnfn sub_sat(ulong2 x, ulong2 y);\n"
38099"long3 __ovld __cnfn sub_sat(long3 x, long3 y);\n"
38100"ulong3 __ovld __cnfn sub_sat(ulong3 x, ulong3 y);\n"
38101"long4 __ovld __cnfn sub_sat(long4 x, long4 y);\n"
38102"ulong4 __ovld __cnfn sub_sat(ulong4 x, ulong4 y);\n"
38103"long8 __ovld __cnfn sub_sat(long8 x, long8 y);\n"
38104"ulong8 __ovld __cnfn sub_sat(ulong8 x, ulong8 y);\n"
38105"long16 __ovld __cnfn sub_sat(long16 x, long16 y);\n"
38106"ulong16 __ovld __cnfn sub_sat(ulong16 x, ulong16 y);\n"
38107"\n"
38108"/**\n"
38109" * result[i] = ((short)hi[i] << 8) | lo[i]\n"
38110" * result[i] = ((ushort)hi[i] << 8) | lo[i]\n"
38111" */\n"
38112"short __ovld __cnfn upsample(char hi, uchar lo);\n"
38113"ushort __ovld __cnfn upsample(uchar hi, uchar lo);\n"
38114"short2 __ovld __cnfn upsample(char2 hi, uchar2 lo);\n"
38115"short3 __ovld __cnfn upsample(char3 hi, uchar3 lo);\n"
38116"short4 __ovld __cnfn upsample(char4 hi, uchar4 lo);\n"
38117"short8 __ovld __cnfn upsample(char8 hi, uchar8 lo);\n"
38118"short16 __ovld __cnfn upsample(char16 hi, uchar16 lo);\n"
38119"ushort2 __ovld __cnfn upsample(uchar2 hi, uchar2 lo);\n"
38120"ushort3 __ovld __cnfn upsample(uchar3 hi, uchar3 lo);\n"
38121"ushort4 __ovld __cnfn upsample(uchar4 hi, uchar4 lo);\n"
38122"ushort8 __ovld __cnfn upsample(uchar8 hi, uchar8 lo);\n"
38123"ushort16 __ovld __cnfn upsample(uchar16 hi, uchar16 lo);\n"
38124"\n"
38125"/**\n"
38126" * result[i] = ((int)hi[i] << 16) | lo[i]\n"
38127" * result[i] = ((uint)hi[i] << 16) | lo[i]\n"
38128" */\n"
38129"int __ovld __cnfn upsample(short hi, ushort lo);\n"
38130"uint __ovld __cnfn upsample(ushort hi, ushort lo);\n"
38131"int2 __ovld __cnfn upsample(short2 hi, ushort2 lo);\n"
38132"int3 __ovld __cnfn upsample(short3 hi, ushort3 lo);\n"
38133"int4 __ovld __cnfn upsample(short4 hi, ushort4 lo);\n"
38134"int8 __ovld __cnfn upsample(short8 hi, ushort8 lo);\n"
38135"int16 __ovld __cnfn upsample(short16 hi, ushort16 lo);\n"
38136"uint2 __ovld __cnfn upsample(ushort2 hi, ushort2 lo);\n"
38137"uint3 __ovld __cnfn upsample(ushort3 hi, ushort3 lo);\n"
38138"uint4 __ovld __cnfn upsample(ushort4 hi, ushort4 lo);\n"
38139"uint8 __ovld __cnfn upsample(ushort8 hi, ushort8 lo);\n"
38140"uint16 __ovld __cnfn upsample(ushort16 hi, ushort16 lo);\n"
38141"/**\n"
38142" * result[i] = ((long)hi[i] << 32) | lo[i]\n"
38143" * result[i] = ((ulong)hi[i] << 32) | lo[i]\n"
38144" */\n"
38145"long __ovld __cnfn upsample(int hi, uint lo);\n"
38146"ulong __ovld __cnfn upsample(uint hi, uint lo);\n"
38147"long2 __ovld __cnfn upsample(int2 hi, uint2 lo);\n"
38148"long3 __ovld __cnfn upsample(int3 hi, uint3 lo);\n"
38149"long4 __ovld __cnfn upsample(int4 hi, uint4 lo);\n"
38150"long8 __ovld __cnfn upsample(int8 hi, uint8 lo);\n"
38151"long16 __ovld __cnfn upsample(int16 hi, uint16 lo);\n"
38152"ulong2 __ovld __cnfn upsample(uint2 hi, uint2 lo);\n"
38153"ulong3 __ovld __cnfn upsample(uint3 hi, uint3 lo);\n"
38154"ulong4 __ovld __cnfn upsample(uint4 hi, uint4 lo);\n"
38155"ulong8 __ovld __cnfn upsample(uint8 hi, uint8 lo);\n"
38156"ulong16 __ovld __cnfn upsample(uint16 hi, uint16 lo);\n"
38157"\n"
38158"/*\n"
38159" * popcount(x): returns the number of set bit in x\n"
38160" */\n"
38161"char __ovld __cnfn popcount(char x);\n"
38162"uchar __ovld __cnfn popcount(uchar x);\n"
38163"char2 __ovld __cnfn popcount(char2 x);\n"
38164"uchar2 __ovld __cnfn popcount(uchar2 x);\n"
38165"char3 __ovld __cnfn popcount(char3 x);\n"
38166"uchar3 __ovld __cnfn popcount(uchar3 x);\n"
38167"char4 __ovld __cnfn popcount(char4 x);\n"
38168"uchar4 __ovld __cnfn popcount(uchar4 x);\n"
38169"char8 __ovld __cnfn popcount(char8 x);\n"
38170"uchar8 __ovld __cnfn popcount(uchar8 x);\n"
38171"char16 __ovld __cnfn popcount(char16 x);\n"
38172"uchar16 __ovld __cnfn popcount(uchar16 x);\n"
38173"short __ovld __cnfn popcount(short x);\n"
38174"ushort __ovld __cnfn popcount(ushort x);\n"
38175"short2 __ovld __cnfn popcount(short2 x);\n"
38176"ushort2 __ovld __cnfn popcount(ushort2 x);\n"
38177"short3 __ovld __cnfn popcount(short3 x);\n"
38178"ushort3 __ovld __cnfn popcount(ushort3 x);\n"
38179"short4 __ovld __cnfn popcount(short4 x);\n"
38180"ushort4 __ovld __cnfn popcount(ushort4 x);\n"
38181"short8 __ovld __cnfn popcount(short8 x);\n"
38182"ushort8 __ovld __cnfn popcount(ushort8 x);\n"
38183"short16 __ovld __cnfn popcount(short16 x);\n"
38184"ushort16 __ovld __cnfn popcount(ushort16 x);\n"
38185"int __ovld __cnfn popcount(int x);\n"
38186"uint __ovld __cnfn popcount(uint x);\n"
38187"int2 __ovld __cnfn popcount(int2 x);\n"
38188"uint2 __ovld __cnfn popcount(uint2 x);\n"
38189"int3 __ovld __cnfn popcount(int3 x);\n"
38190"uint3 __ovld __cnfn popcount(uint3 x);\n"
38191"int4 __ovld __cnfn popcount(int4 x);\n"
38192"uint4 __ovld __cnfn popcount(uint4 x);\n"
38193"int8 __ovld __cnfn popcount(int8 x);\n"
38194"uint8 __ovld __cnfn popcount(uint8 x);\n"
38195"int16 __ovld __cnfn popcount(int16 x);\n"
38196"uint16 __ovld __cnfn popcount(uint16 x);\n"
38197"long __ovld __cnfn popcount(long x);\n"
38198"ulong __ovld __cnfn popcount(ulong x);\n"
38199"long2 __ovld __cnfn popcount(long2 x);\n"
38200"ulong2 __ovld __cnfn popcount(ulong2 x);\n"
38201"long3 __ovld __cnfn popcount(long3 x);\n"
38202"ulong3 __ovld __cnfn popcount(ulong3 x);\n"
38203"long4 __ovld __cnfn popcount(long4 x);\n"
38204"ulong4 __ovld __cnfn popcount(ulong4 x);\n"
38205"long8 __ovld __cnfn popcount(long8 x);\n"
38206"ulong8 __ovld __cnfn popcount(ulong8 x);\n"
38207"long16 __ovld __cnfn popcount(long16 x);\n"
38208"ulong16 __ovld __cnfn popcount(ulong16 x);\n"
38209"\n"
38210"/**\n"
38211" * Multiply two 24-bit integer values x and y and add\n"
38212" * the 32-bit integer result to the 32-bit integer z.\n"
38213" * Refer to definition of mul24 to see how the 24-bit\n"
38214" * integer multiplication is performed.\n"
38215" */\n"
38216"int __ovld __cnfn mad24(int x, int y, int z);\n"
38217"uint __ovld __cnfn mad24(uint x, uint y, uint z);\n"
38218"int2 __ovld __cnfn mad24(int2 x, int2 y, int2 z);\n"
38219"uint2 __ovld __cnfn mad24(uint2 x, uint2 y, uint2 z);\n"
38220"int3 __ovld __cnfn mad24(int3 x, int3 y, int3 z);\n"
38221"uint3 __ovld __cnfn mad24(uint3 x, uint3 y, uint3 z);\n"
38222"int4 __ovld __cnfn mad24(int4 x, int4 y, int4 z);\n"
38223"uint4 __ovld __cnfn mad24(uint4 x, uint4 y, uint4 z);\n"
38224"int8 __ovld __cnfn mad24(int8 x, int8 y, int8 z);\n"
38225"uint8 __ovld __cnfn mad24(uint8 x, uint8 y, uint8 z);\n"
38226"int16 __ovld __cnfn mad24(int16 x, int16 y, int16 z);\n"
38227"uint16 __ovld __cnfn mad24(uint16 x, uint16 y, uint16 z);\n"
38228"\n"
38229"/**\n"
38230" * Multiply two 24-bit integer values x and y. x and y\n"
38231" * are 32-bit integers but only the low 24-bits are used\n"
38232" * to perform the multiplication. mul24 should only\n"
38233" * be used when values in x and y are in the range [-\n"
38234" * 2^23, 2^23-1] if x and y are signed integers and in the\n"
38235" * range [0, 2^24-1] if x and y are unsigned integers. If\n"
38236" * x and y are not in this range, the multiplication\n"
38237" * result is implementation-defined.\n"
38238" */\n"
38239"int __ovld __cnfn mul24(int x, int y);\n"
38240"uint __ovld __cnfn mul24(uint x, uint y);\n"
38241"int2 __ovld __cnfn mul24(int2 x, int2 y);\n"
38242"uint2 __ovld __cnfn mul24(uint2 x, uint2 y);\n"
38243"int3 __ovld __cnfn mul24(int3 x, int3 y);\n"
38244"uint3 __ovld __cnfn mul24(uint3 x, uint3 y);\n"
38245"int4 __ovld __cnfn mul24(int4 x, int4 y);\n"
38246"uint4 __ovld __cnfn mul24(uint4 x, uint4 y);\n"
38247"int8 __ovld __cnfn mul24(int8 x, int8 y);\n"
38248"uint8 __ovld __cnfn mul24(uint8 x, uint8 y);\n"
38249"int16 __ovld __cnfn mul24(int16 x, int16 y);\n"
38250"uint16 __ovld __cnfn mul24(uint16 x, uint16 y);\n"
38251"\n"
38252"// OpenCL v1.1 s6.11.4, v1.2 s6.12.4, v2.0 s6.13.4 - Common Functions\n"
38253"\n"
38254"/**\n"
38255" * Returns fmin(fmax(x, minval), maxval).\n"
38256" * Results are undefined if minval > maxval.\n"
38257" */\n"
38258"float __ovld __cnfn clamp(float x, float minval, float maxval);\n"
38259"float2 __ovld __cnfn clamp(float2 x, float2 minval, float2 maxval);\n"
38260"float3 __ovld __cnfn clamp(float3 x, float3 minval, float3 maxval);\n"
38261"float4 __ovld __cnfn clamp(float4 x, float4 minval, float4 maxval);\n"
38262"float8 __ovld __cnfn clamp(float8 x, float8 minval, float8 maxval);\n"
38263"float16 __ovld __cnfn clamp(float16 x, float16 minval, float16 maxval);\n"
38264"float2 __ovld __cnfn clamp(float2 x, float minval, float maxval);\n"
38265"float3 __ovld __cnfn clamp(float3 x, float minval, float maxval);\n"
38266"float4 __ovld __cnfn clamp(float4 x, float minval, float maxval);\n"
38267"float8 __ovld __cnfn clamp(float8 x, float minval, float maxval);\n"
38268"float16 __ovld __cnfn clamp(float16 x, float minval, float maxval);\n"
38269"#ifdef cl_khr_fp64\n"
38270"double __ovld __cnfn clamp(double x, double minval, double maxval);\n"
38271"double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval);\n"
38272"double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval);\n"
38273"double4 __ovld __cnfn clamp(double4 x, double4 minval, double4 maxval);\n"
38274"double8 __ovld __cnfn clamp(double8 x, double8 minval, double8 maxval);\n"
38275"double16 __ovld __cnfn clamp(double16 x, double16 minval, double16 maxval);\n"
38276"double2 __ovld __cnfn clamp(double2 x, double minval, double maxval);\n"
38277"double3 __ovld __cnfn clamp(double3 x, double minval, double maxval);\n"
38278"double4 __ovld __cnfn clamp(double4 x, double minval, double maxval);\n"
38279"double8 __ovld __cnfn clamp(double8 x, double minval, double maxval);\n"
38280"double16 __ovld __cnfn clamp(double16 x, double minval, double maxval);\n"
38281"#endif //cl_khr_fp64\n"
38282"#ifdef cl_khr_fp16\n"
38283"half __ovld __cnfn clamp(half x, half minval, half maxval);\n"
38284"half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval);\n"
38285"half3 __ovld __cnfn clamp(half3 x, half3 minval, half3 maxval);\n"
38286"half4 __ovld __cnfn clamp(half4 x, half4 minval, half4 maxval);\n"
38287"half8 __ovld __cnfn clamp(half8 x, half8 minval, half8 maxval);\n"
38288"half16 __ovld __cnfn clamp(half16 x, half16 minval, half16 maxval);\n"
38289"half2 __ovld __cnfn clamp(half2 x, half minval, half maxval);\n"
38290"half3 __ovld __cnfn clamp(half3 x, half minval, half maxval);\n"
38291"half4 __ovld __cnfn clamp(half4 x, half minval, half maxval);\n"
38292"half8 __ovld __cnfn clamp(half8 x, half minval, half maxval);\n"
38293"half16 __ovld __cnfn clamp(half16 x, half minval, half maxval);\n"
38294"#endif //cl_khr_fp16\n"
38295"\n"
38296"/**\n"
38297" * Converts radians to degrees, i.e. (180 / PI) *\n"
38298" * radians.\n"
38299" */\n"
38300"float __ovld __cnfn degrees(float radians);\n"
38301"float2 __ovld __cnfn degrees(float2 radians);\n"
38302"float3 __ovld __cnfn degrees(float3 radians);\n"
38303"float4 __ovld __cnfn degrees(float4 radians);\n"
38304"float8 __ovld __cnfn degrees(float8 radians);\n"
38305"float16 __ovld __cnfn degrees(float16 radians);\n"
38306"#ifdef cl_khr_fp64\n"
38307"double __ovld __cnfn degrees(double radians);\n"
38308"double2 __ovld __cnfn degrees(double2 radians);\n"
38309"double3 __ovld __cnfn degrees(double3 radians);\n"
38310"double4 __ovld __cnfn degrees(double4 radians);\n"
38311"double8 __ovld __cnfn degrees(double8 radians);\n"
38312"double16 __ovld __cnfn degrees(double16 radians);\n"
38313"#endif //cl_khr_fp64\n"
38314"#ifdef cl_khr_fp16\n"
38315"half __ovld __cnfn degrees(half radians);\n"
38316"half2 __ovld __cnfn degrees(half2 radians);\n"
38317"half3 __ovld __cnfn degrees(half3 radians);\n"
38318"half4 __ovld __cnfn degrees(half4 radians);\n"
38319"half8 __ovld __cnfn degrees(half8 radians);\n"
38320"half16 __ovld __cnfn degrees(half16 radians);\n"
38321"#endif //cl_khr_fp16\n"
38322"\n"
38323"/**\n"
38324" * Returns y if x < y, otherwise it returns x. If x and y\n"
38325" * are infinite or NaN, the return values are undefined.\n"
38326" */\n"
38327"float __ovld __cnfn max(float x, float y);\n"
38328"float2 __ovld __cnfn max(float2 x, float2 y);\n"
38329"float3 __ovld __cnfn max(float3 x, float3 y);\n"
38330"float4 __ovld __cnfn max(float4 x, float4 y);\n"
38331"float8 __ovld __cnfn max(float8 x, float8 y);\n"
38332"float16 __ovld __cnfn max(float16 x, float16 y);\n"
38333"float2 __ovld __cnfn max(float2 x, float y);\n"
38334"float3 __ovld __cnfn max(float3 x, float y);\n"
38335"float4 __ovld __cnfn max(float4 x, float y);\n"
38336"float8 __ovld __cnfn max(float8 x, float y);\n"
38337"float16 __ovld __cnfn max(float16 x, float y);\n"
38338"#ifdef cl_khr_fp64\n"
38339"double __ovld __cnfn max(double x, double y);\n"
38340"double2 __ovld __cnfn max(double2 x, double2 y);\n"
38341"double3 __ovld __cnfn max(double3 x, double3 y);\n"
38342"double4 __ovld __cnfn max(double4 x, double4 y);\n"
38343"double8 __ovld __cnfn max(double8 x, double8 y);\n"
38344"double16 __ovld __cnfn max(double16 x, double16 y);\n"
38345"double2 __ovld __cnfn max(double2 x, double y);\n"
38346"double3 __ovld __cnfn max(double3 x, double y);\n"
38347"double4 __ovld __cnfn max(double4 x, double y);\n"
38348"double8 __ovld __cnfn max(double8 x, double y);\n"
38349"double16 __ovld __cnfn max(double16 x, double y);\n"
38350"#endif //cl_khr_fp64\n"
38351"#ifdef cl_khr_fp16\n"
38352"half __ovld __cnfn max(half x, half y);\n"
38353"half2 __ovld __cnfn max(half2 x, half2 y);\n"
38354"half3 __ovld __cnfn max(half3 x, half3 y);\n"
38355"half4 __ovld __cnfn max(half4 x, half4 y);\n"
38356"half8 __ovld __cnfn max(half8 x, half8 y);\n"
38357"half16 __ovld __cnfn max(half16 x, half16 y);\n"
38358"half2 __ovld __cnfn max(half2 x, half y);\n"
38359"half3 __ovld __cnfn max(half3 x, half y);\n"
38360"half4 __ovld __cnfn max(half4 x, half y);\n"
38361"half8 __ovld __cnfn max(half8 x, half y);\n"
38362"half16 __ovld __cnfn max(half16 x, half y);\n"
38363"#endif //cl_khr_fp16\n"
38364"\n"
38365"/**\n"
38366" * Returns y if y < x, otherwise it returns x. If x and y\n"
38367" * are infinite or NaN, the return values are undefined.\n"
38368" */\n"
38369"float __ovld __cnfn min(float x, float y);\n"
38370"float2 __ovld __cnfn min(float2 x, float2 y);\n"
38371"float3 __ovld __cnfn min(float3 x, float3 y);\n"
38372"float4 __ovld __cnfn min(float4 x, float4 y);\n"
38373"float8 __ovld __cnfn min(float8 x, float8 y);\n"
38374"float16 __ovld __cnfn min(float16 x, float16 y);\n"
38375"float2 __ovld __cnfn min(float2 x, float y);\n"
38376"float3 __ovld __cnfn min(float3 x, float y);\n"
38377"float4 __ovld __cnfn min(float4 x, float y);\n"
38378"float8 __ovld __cnfn min(float8 x, float y);\n"
38379"float16 __ovld __cnfn min(float16 x, float y);\n"
38380"#ifdef cl_khr_fp64\n"
38381"double __ovld __cnfn min(double x, double y);\n"
38382"double2 __ovld __cnfn min(double2 x, double2 y);\n"
38383"double3 __ovld __cnfn min(double3 x, double3 y);\n"
38384"double4 __ovld __cnfn min(double4 x, double4 y);\n"
38385"double8 __ovld __cnfn min(double8 x, double8 y);\n"
38386"double16 __ovld __cnfn min(double16 x, double16 y);\n"
38387"double2 __ovld __cnfn min(double2 x, double y);\n"
38388"double3 __ovld __cnfn min(double3 x, double y);\n"
38389"double4 __ovld __cnfn min(double4 x, double y);\n"
38390"double8 __ovld __cnfn min(double8 x, double y);\n"
38391"double16 __ovld __cnfn min(double16 x, double y);\n"
38392"#endif //cl_khr_fp64\n"
38393"#ifdef cl_khr_fp16\n"
38394"half __ovld __cnfn min(half x, half y);\n"
38395"half2 __ovld __cnfn min(half2 x, half2 y);\n"
38396"half3 __ovld __cnfn min(half3 x, half3 y);\n"
38397"half4 __ovld __cnfn min(half4 x, half4 y);\n"
38398"half8 __ovld __cnfn min(half8 x, half8 y);\n"
38399"half16 __ovld __cnfn min(half16 x, half16 y);\n"
38400"half2 __ovld __cnfn min(half2 x, half y);\n"
38401"half3 __ovld __cnfn min(half3 x, half y);\n"
38402"half4 __ovld __cnfn min(half4 x, half y);\n"
38403"half8 __ovld __cnfn min(half8 x, half y);\n"
38404"half16 __ovld __cnfn min(half16 x, half y);\n"
38405"#endif //cl_khr_fp16\n"
38406"\n"
38407"/**\n"
38408" * Returns the linear blend of x & y implemented as:\n"
38409" * x + (y - x) * a\n"
38410" * a must be a value in the range 0.0 ... 1.0. If a is not\n"
38411" * in the range 0.0 ... 1.0, the return values are\n"
38412" * undefined.\n"
38413" */\n"
38414"float __ovld __cnfn mix(float x, float y, float a);\n"
38415"float2 __ovld __cnfn mix(float2 x, float2 y, float2 a);\n"
38416"float3 __ovld __cnfn mix(float3 x, float3 y, float3 a);\n"
38417"float4 __ovld __cnfn mix(float4 x, float4 y, float4 a);\n"
38418"float8 __ovld __cnfn mix(float8 x, float8 y, float8 a);\n"
38419"float16 __ovld __cnfn mix(float16 x, float16 y, float16 a);\n"
38420"float2 __ovld __cnfn mix(float2 x, float2 y, float a);\n"
38421"float3 __ovld __cnfn mix(float3 x, float3 y, float a);\n"
38422"float4 __ovld __cnfn mix(float4 x, float4 y, float a);\n"
38423"float8 __ovld __cnfn mix(float8 x, float8 y, float a);\n"
38424"float16 __ovld __cnfn mix(float16 x, float16 y, float a);\n"
38425"#ifdef cl_khr_fp64\n"
38426"double __ovld __cnfn mix(double x, double y, double a);\n"
38427"double2 __ovld __cnfn mix(double2 x, double2 y, double2 a);\n"
38428"double3 __ovld __cnfn mix(double3 x, double3 y, double3 a);\n"
38429"double4 __ovld __cnfn mix(double4 x, double4 y, double4 a);\n"
38430"double8 __ovld __cnfn mix(double8 x, double8 y, double8 a);\n"
38431"double16 __ovld __cnfn mix(double16 x, double16 y, double16 a);\n"
38432"double2 __ovld __cnfn mix(double2 x, double2 y, double a);\n"
38433"double3 __ovld __cnfn mix(double3 x, double3 y, double a);\n"
38434"double4 __ovld __cnfn mix(double4 x, double4 y, double a);\n"
38435"double8 __ovld __cnfn mix(double8 x, double8 y, double a);\n"
38436"double16 __ovld __cnfn mix(double16 x, double16 y, double a);\n"
38437"#endif //cl_khr_fp64\n"
38438"#ifdef cl_khr_fp16\n"
38439"half __ovld __cnfn mix(half x, half y, half a);\n"
38440"half2 __ovld __cnfn mix(half2 x, half2 y, half2 a);\n"
38441"half3 __ovld __cnfn mix(half3 x, half3 y, half3 a);\n"
38442"half4 __ovld __cnfn mix(half4 x, half4 y, half4 a);\n"
38443"half8 __ovld __cnfn mix(half8 x, half8 y, half8 a);\n"
38444"half16 __ovld __cnfn mix(half16 x, half16 y, half16 a);\n"
38445"half2 __ovld __cnfn mix(half2 x, half2 y, half a);\n"
38446"half3 __ovld __cnfn mix(half3 x, half3 y, half a);\n"
38447"half4 __ovld __cnfn mix(half4 x, half4 y, half a);\n"
38448"half8 __ovld __cnfn mix(half8 x, half8 y, half a);\n"
38449"half16 __ovld __cnfn mix(half16 x, half16 y, half a);\n"
38450"#endif //cl_khr_fp16\n"
38451"\n"
38452"/**\n"
38453" * Converts degrees to radians, i.e. (PI / 180) *\n"
38454" * degrees.\n"
38455" */\n"
38456"float __ovld __cnfn radians(float degrees);\n"
38457"float2 __ovld __cnfn radians(float2 degrees);\n"
38458"float3 __ovld __cnfn radians(float3 degrees);\n"
38459"float4 __ovld __cnfn radians(float4 degrees);\n"
38460"float8 __ovld __cnfn radians(float8 degrees);\n"
38461"float16 __ovld __cnfn radians(float16 degrees);\n"
38462"#ifdef cl_khr_fp64\n"
38463"double __ovld __cnfn radians(double degrees);\n"
38464"double2 __ovld __cnfn radians(double2 degrees);\n"
38465"double3 __ovld __cnfn radians(double3 degrees);\n"
38466"double4 __ovld __cnfn radians(double4 degrees);\n"
38467"double8 __ovld __cnfn radians(double8 degrees);\n"
38468"double16 __ovld __cnfn radians(double16 degrees);\n"
38469"#endif //cl_khr_fp64\n"
38470"#ifdef cl_khr_fp16\n"
38471"half __ovld __cnfn radians(half degrees);\n"
38472"half2 __ovld __cnfn radians(half2 degrees);\n"
38473"half3 __ovld __cnfn radians(half3 degrees);\n"
38474"half4 __ovld __cnfn radians(half4 degrees);\n"
38475"half8 __ovld __cnfn radians(half8 degrees);\n"
38476"half16 __ovld __cnfn radians(half16 degrees);\n"
38477"#endif //cl_khr_fp16\n"
38478"\n"
38479"/**\n"
38480" * Returns 0.0 if x < edge, otherwise it returns 1.0.\n"
38481" */\n"
38482"float __ovld __cnfn step(float edge, float x);\n"
38483"float2 __ovld __cnfn step(float2 edge, float2 x);\n"
38484"float3 __ovld __cnfn step(float3 edge, float3 x);\n"
38485"float4 __ovld __cnfn step(float4 edge, float4 x);\n"
38486"float8 __ovld __cnfn step(float8 edge, float8 x);\n"
38487"float16 __ovld __cnfn step(float16 edge, float16 x);\n"
38488"float2 __ovld __cnfn step(float edge, float2 x);\n"
38489"float3 __ovld __cnfn step(float edge, float3 x);\n"
38490"float4 __ovld __cnfn step(float edge, float4 x);\n"
38491"float8 __ovld __cnfn step(float edge, float8 x);\n"
38492"float16 __ovld __cnfn step(float edge, float16 x);\n"
38493"#ifdef cl_khr_fp64\n"
38494"double __ovld __cnfn step(double edge, double x);\n"
38495"double2 __ovld __cnfn step(double2 edge, double2 x);\n"
38496"double3 __ovld __cnfn step(double3 edge, double3 x);\n"
38497"double4 __ovld __cnfn step(double4 edge, double4 x);\n"
38498"double8 __ovld __cnfn step(double8 edge, double8 x);\n"
38499"double16 __ovld __cnfn step(double16 edge, double16 x);\n"
38500"double2 __ovld __cnfn step(double edge, double2 x);\n"
38501"double3 __ovld __cnfn step(double edge, double3 x);\n"
38502"double4 __ovld __cnfn step(double edge, double4 x);\n"
38503"double8 __ovld __cnfn step(double edge, double8 x);\n"
38504"double16 __ovld __cnfn step(double edge, double16 x);\n"
38505"#endif //cl_khr_fp64\n"
38506"#ifdef cl_khr_fp16\n"
38507"half __ovld __cnfn step(half edge, half x);\n"
38508"half2 __ovld __cnfn step(half2 edge, half2 x);\n"
38509"half3 __ovld __cnfn step(half3 edge, half3 x);\n"
38510"half4 __ovld __cnfn step(half4 edge, half4 x);\n"
38511"half8 __ovld __cnfn step(half8 edge, half8 x);\n"
38512"half16 __ovld __cnfn step(half16 edge, half16 x);\n"
38513"half __ovld __cnfn step(half edge, half x);\n"
38514"half2 __ovld __cnfn step(half edge, half2 x);\n"
38515"half3 __ovld __cnfn step(half edge, half3 x);\n"
38516"half4 __ovld __cnfn step(half edge, half4 x);\n"
38517"half8 __ovld __cnfn step(half edge, half8 x);\n"
38518"half16 __ovld __cnfn step(half edge, half16 x);\n"
38519"#endif //cl_khr_fp16\n"
38520"\n"
38521"/**\n"
38522" * Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and\n"
38523" * performs smooth Hermite interpolation between 0\n"
38524" * and 1when edge0 < x < edge1. This is useful in\n"
38525" * cases where you would want a threshold function\n"
38526" * with a smooth transition.\n"
38527" * This is equivalent to:\n"
38528" * gentype t;\n"
38529" * t = clamp ((x - edge0) / (edge1 - edge0), 0, 1);\n"
38530" * return t * t * (3 - 2 * t);\n"
38531" * Results are undefined if edge0 >= edge1 or if x,\n"
38532" * edge0 or edge1 is a NaN.\n"
38533" */\n"
38534"float __ovld __cnfn smoothstep(float edge0, float edge1, float x);\n"
38535"float2 __ovld __cnfn smoothstep(float2 edge0, float2 edge1, float2 x);\n"
38536"float3 __ovld __cnfn smoothstep(float3 edge0, float3 edge1, float3 x);\n"
38537"float4 __ovld __cnfn smoothstep(float4 edge0, float4 edge1, float4 x);\n"
38538"float8 __ovld __cnfn smoothstep(float8 edge0, float8 edge1, float8 x);\n"
38539"float16 __ovld __cnfn smoothstep(float16 edge0, float16 edge1, float16 x);\n"
38540"float2 __ovld __cnfn smoothstep(float edge0, float edge1, float2 x);\n"
38541"float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x);\n"
38542"float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x);\n"
38543"float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x);\n"
38544"float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x);\n"
38545"#ifdef cl_khr_fp64\n"
38546"double __ovld __cnfn smoothstep(double edge0, double edge1, double x);\n"
38547"double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x);\n"
38548"double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x);\n"
38549"double4 __ovld __cnfn smoothstep(double4 edge0, double4 edge1, double4 x);\n"
38550"double8 __ovld __cnfn smoothstep(double8 edge0, double8 edge1, double8 x);\n"
38551"double16 __ovld __cnfn smoothstep(double16 edge0, double16 edge1, double16 x);\n"
38552"double2 __ovld __cnfn smoothstep(double edge0, double edge1, double2 x);\n"
38553"double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x);\n"
38554"double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x);\n"
38555"double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x);\n"
38556"double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x);\n"
38557"#endif //cl_khr_fp64\n"
38558"#ifdef cl_khr_fp16\n"
38559"half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n"
38560"half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x);\n"
38561"half3 __ovld __cnfn smoothstep(half3 edge0, half3 edge1, half3 x);\n"
38562"half4 __ovld __cnfn smoothstep(half4 edge0, half4 edge1, half4 x);\n"
38563"half8 __ovld __cnfn smoothstep(half8 edge0, half8 edge1, half8 x);\n"
38564"half16 __ovld __cnfn smoothstep(half16 edge0, half16 edge1, half16 x);\n"
38565"half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n"
38566"half2 __ovld __cnfn smoothstep(half edge0, half edge1, half2 x);\n"
38567"half3 __ovld __cnfn smoothstep(half edge0, half edge1, half3 x);\n"
38568"half4 __ovld __cnfn smoothstep(half edge0, half edge1, half4 x);\n"
38569"half8 __ovld __cnfn smoothstep(half edge0, half edge1, half8 x);\n"
38570"half16 __ovld __cnfn smoothstep(half edge0, half edge1, half16 x);\n"
38571"#endif //cl_khr_fp16\n"
38572"\n"
38573"/**\n"
38574" * Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x =\n"
38575" * +0.0, or -1.0 if x < 0. Returns 0.0 if x is a NaN.\n"
38576" */\n"
38577"float __ovld __cnfn sign(float x);\n"
38578"float2 __ovld __cnfn sign(float2 x);\n"
38579"float3 __ovld __cnfn sign(float3 x);\n"
38580"float4 __ovld __cnfn sign(float4 x);\n"
38581"float8 __ovld __cnfn sign(float8 x);\n"
38582"float16 __ovld __cnfn sign(float16 x);\n"
38583"#ifdef cl_khr_fp64\n"
38584"double __ovld __cnfn sign(double x);\n"
38585"double2 __ovld __cnfn sign(double2 x);\n"
38586"double3 __ovld __cnfn sign(double3 x);\n"
38587"double4 __ovld __cnfn sign(double4 x);\n"
38588"double8 __ovld __cnfn sign(double8 x);\n"
38589"double16 __ovld __cnfn sign(double16 x);\n"
38590"#endif //cl_khr_fp64\n"
38591"#ifdef cl_khr_fp16\n"
38592"half __ovld __cnfn sign(half x);\n"
38593"half2 __ovld __cnfn sign(half2 x);\n"
38594"half3 __ovld __cnfn sign(half3 x);\n"
38595"half4 __ovld __cnfn sign(half4 x);\n"
38596"half8 __ovld __cnfn sign(half8 x);\n"
38597"half16 __ovld __cnfn sign(half16 x);\n"
38598"#endif //cl_khr_fp16\n"
38599"\n"
38600"// OpenCL v1.1 s6.11.5, v1.2 s6.12.5, v2.0 s6.13.5 - Geometric Functions\n"
38601"\n"
38602"/**\n"
38603" * Returns the cross product of p0.xyz and p1.xyz. The\n"
38604" * w component of float4 result returned will be 0.0.\n"
38605" */\n"
38606"float4 __ovld __cnfn cross(float4 p0, float4 p1);\n"
38607"float3 __ovld __cnfn cross(float3 p0, float3 p1);\n"
38608"#ifdef cl_khr_fp64\n"
38609"double4 __ovld __cnfn cross(double4 p0, double4 p1);\n"
38610"double3 __ovld __cnfn cross(double3 p0, double3 p1);\n"
38611"#endif //cl_khr_fp64\n"
38612"#ifdef cl_khr_fp16\n"
38613"half4 __ovld __cnfn cross(half4 p0, half4 p1);\n"
38614"half3 __ovld __cnfn cross(half3 p0, half3 p1);\n"
38615"#endif //cl_khr_fp16\n"
38616"\n"
38617"/**\n"
38618" * Compute dot product.\n"
38619" */\n"
38620"float __ovld __cnfn dot(float p0, float p1);\n"
38621"float __ovld __cnfn dot(float2 p0, float2 p1);\n"
38622"float __ovld __cnfn dot(float3 p0, float3 p1);\n"
38623"float __ovld __cnfn dot(float4 p0, float4 p1);\n"
38624"#ifdef cl_khr_fp64\n"
38625"double __ovld __cnfn dot(double p0, double p1);\n"
38626"double __ovld __cnfn dot(double2 p0, double2 p1);\n"
38627"double __ovld __cnfn dot(double3 p0, double3 p1);\n"
38628"double __ovld __cnfn dot(double4 p0, double4 p1);\n"
38629"#endif //cl_khr_fp64\n"
38630"#ifdef cl_khr_fp16\n"
38631"half __ovld __cnfn dot(half p0, half p1);\n"
38632"half __ovld __cnfn dot(half2 p0, half2 p1);\n"
38633"half __ovld __cnfn dot(half3 p0, half3 p1);\n"
38634"half __ovld __cnfn dot(half4 p0, half4 p1);\n"
38635"#endif //cl_khr_fp16\n"
38636"\n"
38637"/**\n"
38638" * Returns the distance between p0 and p1. This is\n"
38639" * calculated as length(p0 - p1).\n"
38640" */\n"
38641"float __ovld __cnfn distance(float p0, float p1);\n"
38642"float __ovld __cnfn distance(float2 p0, float2 p1);\n"
38643"float __ovld __cnfn distance(float3 p0, float3 p1);\n"
38644"float __ovld __cnfn distance(float4 p0, float4 p1);\n"
38645"#ifdef cl_khr_fp64\n"
38646"double __ovld __cnfn distance(double p0, double p1);\n"
38647"double __ovld __cnfn distance(double2 p0, double2 p1);\n"
38648"double __ovld __cnfn distance(double3 p0, double3 p1);\n"
38649"double __ovld __cnfn distance(double4 p0, double4 p1);\n"
38650"#endif //cl_khr_fp64\n"
38651"#ifdef cl_khr_fp16\n"
38652"half __ovld __cnfn distance(half p0, half p1);\n"
38653"half __ovld __cnfn distance(half2 p0, half2 p1);\n"
38654"half __ovld __cnfn distance(half3 p0, half3 p1);\n"
38655"half __ovld __cnfn distance(half4 p0, half4 p1);\n"
38656"#endif //cl_khr_fp16\n"
38657"\n"
38658"/**\n"
38659" * Return the length of vector p, i.e.,\n"
38660" * sqrt(p.x2 + p.y 2 + ...)\n"
38661" */\n"
38662"float __ovld __cnfn length(float p);\n"
38663"float __ovld __cnfn length(float2 p);\n"
38664"float __ovld __cnfn length(float3 p);\n"
38665"float __ovld __cnfn length(float4 p);\n"
38666"#ifdef cl_khr_fp64\n"
38667"double __ovld __cnfn length(double p);\n"
38668"double __ovld __cnfn length(double2 p);\n"
38669"double __ovld __cnfn length(double3 p);\n"
38670"double __ovld __cnfn length(double4 p);\n"
38671"#endif //cl_khr_fp64\n"
38672"#ifdef cl_khr_fp16\n"
38673"half __ovld __cnfn length(half p);\n"
38674"half __ovld __cnfn length(half2 p);\n"
38675"half __ovld __cnfn length(half3 p);\n"
38676"half __ovld __cnfn length(half4 p);\n"
38677"#endif //cl_khr_fp16\n"
38678"\n"
38679"/**\n"
38680" * Returns a vector in the same direction as p but with a\n"
38681" * length of 1.\n"
38682" */\n"
38683"float __ovld __cnfn normalize(float p);\n"
38684"float2 __ovld __cnfn normalize(float2 p);\n"
38685"float3 __ovld __cnfn normalize(float3 p);\n"
38686"float4 __ovld __cnfn normalize(float4 p);\n"
38687"#ifdef cl_khr_fp64\n"
38688"double __ovld __cnfn normalize(double p);\n"
38689"double2 __ovld __cnfn normalize(double2 p);\n"
38690"double3 __ovld __cnfn normalize(double3 p);\n"
38691"double4 __ovld __cnfn normalize(double4 p);\n"
38692"#endif //cl_khr_fp64\n"
38693"#ifdef cl_khr_fp16\n"
38694"half __ovld __cnfn normalize(half p);\n"
38695"half2 __ovld __cnfn normalize(half2 p);\n"
38696"half3 __ovld __cnfn normalize(half3 p);\n"
38697"half4 __ovld __cnfn normalize(half4 p);\n"
38698"#endif //cl_khr_fp16\n"
38699"\n"
38700"/**\n"
38701" * Returns fast_length(p0 - p1).\n"
38702" */\n"
38703"float __ovld __cnfn fast_distance(float p0, float p1);\n"
38704"float __ovld __cnfn fast_distance(float2 p0, float2 p1);\n"
38705"float __ovld __cnfn fast_distance(float3 p0, float3 p1);\n"
38706"float __ovld __cnfn fast_distance(float4 p0, float4 p1);\n"
38707"#ifdef cl_khr_fp16\n"
38708"half __ovld __cnfn fast_distance(half p0, half p1);\n"
38709"half __ovld __cnfn fast_distance(half2 p0, half2 p1);\n"
38710"half __ovld __cnfn fast_distance(half3 p0, half3 p1);\n"
38711"half __ovld __cnfn fast_distance(half4 p0, half4 p1);\n"
38712"#endif //cl_khr_fp16\n"
38713"\n"
38714"/**\n"
38715" * Returns the length of vector p computed as:\n"
38716" * half_sqrt(p.x2 + p.y2 + ...)\n"
38717" */\n"
38718"float __ovld __cnfn fast_length(float p);\n"
38719"float __ovld __cnfn fast_length(float2 p);\n"
38720"float __ovld __cnfn fast_length(float3 p);\n"
38721"float __ovld __cnfn fast_length(float4 p);\n"
38722"#ifdef cl_khr_fp16\n"
38723"half __ovld __cnfn fast_length(half p);\n"
38724"half __ovld __cnfn fast_length(half2 p);\n"
38725"half __ovld __cnfn fast_length(half3 p);\n"
38726"half __ovld __cnfn fast_length(half4 p);\n"
38727"#endif //cl_khr_fp16\n"
38728"\n"
38729"/**\n"
38730" * Returns a vector in the same direction as p but with a\n"
38731" * length of 1. fast_normalize is computed as:\n"
38732" * p * half_rsqrt (p.x^2 + p.y^2 + ... )\n"
38733" * The result shall be within 8192 ulps error from the\n"
38734" * infinitely precise result of\n"
38735" * if (all(p == 0.0f))\n"
38736" * result = p;\n"
38737" * else\n"
38738" * result = p / sqrt (p.x^2 + p.y^2 + ...);\n"
38739" * with the following exceptions:\n"
38740" * 1) If the sum of squares is greater than FLT_MAX\n"
38741" * then the value of the floating-point values in the\n"
38742" * result vector are undefined.\n"
38743" * 2) If the sum of squares is less than FLT_MIN then\n"
38744" * the implementation may return back p.\n"
38745" * 3) If the device is in \"denorms are flushed to zero\"\n"
38746" * mode, individual operand elements with magnitude\n"
38747" * less than sqrt(FLT_MIN) may be flushed to zero\n"
38748" * before proceeding with the calculation.\n"
38749" */\n"
38750"float __ovld __cnfn fast_normalize(float p);\n"
38751"float2 __ovld __cnfn fast_normalize(float2 p);\n"
38752"float3 __ovld __cnfn fast_normalize(float3 p);\n"
38753"float4 __ovld __cnfn fast_normalize(float4 p);\n"
38754"#ifdef cl_khr_fp16\n"
38755"half __ovld __cnfn fast_normalize(half p);\n"
38756"half2 __ovld __cnfn fast_normalize(half2 p);\n"
38757"half3 __ovld __cnfn fast_normalize(half3 p);\n"
38758"half4 __ovld __cnfn fast_normalize(half4 p);\n"
38759"#endif //cl_khr_fp16\n"
38760"\n"
38761"// OpenCL v1.1 s6.11.6, v1.2 s6.12.6, v2.0 s6.13.6 - Relational Functions\n"
38762"\n"
38763"/**\n"
38764" * intn isequal (floatn x, floatn y)\n"
38765" * Returns the component-wise compare of x == y.\n"
38766" */\n"
38767"int __ovld __cnfn isequal(float x, float y);\n"
38768"int2 __ovld __cnfn isequal(float2 x, float2 y);\n"
38769"int3 __ovld __cnfn isequal(float3 x, float3 y);\n"
38770"int4 __ovld __cnfn isequal(float4 x, float4 y);\n"
38771"int8 __ovld __cnfn isequal(float8 x, float8 y);\n"
38772"int16 __ovld __cnfn isequal(float16 x, float16 y);\n"
38773"#ifdef cl_khr_fp64\n"
38774"int __ovld __cnfn isequal(double x, double y);\n"
38775"long2 __ovld __cnfn isequal(double2 x, double2 y);\n"
38776"long3 __ovld __cnfn isequal(double3 x, double3 y);\n"
38777"long4 __ovld __cnfn isequal(double4 x, double4 y);\n"
38778"long8 __ovld __cnfn isequal(double8 x, double8 y);\n"
38779"long16 __ovld __cnfn isequal(double16 x, double16 y);\n"
38780"#endif //cl_khr_fp64\n"
38781"#ifdef cl_khr_fp16\n"
38782"int __ovld __cnfn isequal(half x, half y);\n"
38783"short2 __ovld __cnfn isequal(half2 x, half2 y);\n"
38784"short3 __ovld __cnfn isequal(half3 x, half3 y);\n"
38785"short4 __ovld __cnfn isequal(half4 x, half4 y);\n"
38786"short8 __ovld __cnfn isequal(half8 x, half8 y);\n"
38787"short16 __ovld __cnfn isequal(half16 x, half16 y);\n"
38788"#endif //cl_khr_fp16\n"
38789"\n"
38790"/**\n"
38791" * Returns the component-wise compare of x != y.\n"
38792" */\n"
38793"int __ovld __cnfn isnotequal(float x, float y);\n"
38794"int2 __ovld __cnfn isnotequal(float2 x, float2 y);\n"
38795"int3 __ovld __cnfn isnotequal(float3 x, float3 y);\n"
38796"int4 __ovld __cnfn isnotequal(float4 x, float4 y);\n"
38797"int8 __ovld __cnfn isnotequal(float8 x, float8 y);\n"
38798"int16 __ovld __cnfn isnotequal(float16 x, float16 y);\n"
38799"#ifdef cl_khr_fp64\n"
38800"int __ovld __cnfn isnotequal(double x, double y);\n"
38801"long2 __ovld __cnfn isnotequal(double2 x, double2 y);\n"
38802"long3 __ovld __cnfn isnotequal(double3 x, double3 y);\n"
38803"long4 __ovld __cnfn isnotequal(double4 x, double4 y);\n"
38804"long8 __ovld __cnfn isnotequal(double8 x, double8 y);\n"
38805"long16 __ovld __cnfn isnotequal(double16 x, double16 y);\n"
38806"#endif //cl_khr_fp64\n"
38807"#ifdef cl_khr_fp16\n"
38808"int __ovld __cnfn isnotequal(half x, half y);\n"
38809"short2 __ovld __cnfn isnotequal(half2 x, half2 y);\n"
38810"short3 __ovld __cnfn isnotequal(half3 x, half3 y);\n"
38811"short4 __ovld __cnfn isnotequal(half4 x, half4 y);\n"
38812"short8 __ovld __cnfn isnotequal(half8 x, half8 y);\n"
38813"short16 __ovld __cnfn isnotequal(half16 x, half16 y);\n"
38814"#endif //cl_khr_fp16\n"
38815"\n"
38816"/**\n"
38817" * Returns the component-wise compare of x > y.\n"
38818" */\n"
38819"int __ovld __cnfn isgreater(float x, float y);\n"
38820"int2 __ovld __cnfn isgreater(float2 x, float2 y);\n"
38821"int3 __ovld __cnfn isgreater(float3 x, float3 y);\n"
38822"int4 __ovld __cnfn isgreater(float4 x, float4 y);\n"
38823"int8 __ovld __cnfn isgreater(float8 x, float8 y);\n"
38824"int16 __ovld __cnfn isgreater(float16 x, float16 y);\n"
38825"#ifdef cl_khr_fp64\n"
38826"int __ovld __cnfn isgreater(double x, double y);\n"
38827"long2 __ovld __cnfn isgreater(double2 x, double2 y);\n"
38828"long3 __ovld __cnfn isgreater(double3 x, double3 y);\n"
38829"long4 __ovld __cnfn isgreater(double4 x, double4 y);\n"
38830"long8 __ovld __cnfn isgreater(double8 x, double8 y);\n"
38831"long16 __ovld __cnfn isgreater(double16 x, double16 y);\n"
38832"#endif //cl_khr_fp64\n"
38833"#ifdef cl_khr_fp16\n"
38834"int __ovld __cnfn isgreater(half x, half y);\n"
38835"short2 __ovld __cnfn isgreater(half2 x, half2 y);\n"
38836"short3 __ovld __cnfn isgreater(half3 x, half3 y);\n"
38837"short4 __ovld __cnfn isgreater(half4 x, half4 y);\n"
38838"short8 __ovld __cnfn isgreater(half8 x, half8 y);\n"
38839"short16 __ovld __cnfn isgreater(half16 x, half16 y);\n"
38840"#endif //cl_khr_fp16\n"
38841"\n"
38842"/**\n"
38843" * Returns the component-wise compare of x >= y.\n"
38844" */\n"
38845"int __ovld __cnfn isgreaterequal(float x, float y);\n"
38846"int2 __ovld __cnfn isgreaterequal(float2 x, float2 y);\n"
38847"int3 __ovld __cnfn isgreaterequal(float3 x, float3 y);\n"
38848"int4 __ovld __cnfn isgreaterequal(float4 x, float4 y);\n"
38849"int8 __ovld __cnfn isgreaterequal(float8 x, float8 y);\n"
38850"int16 __ovld __cnfn isgreaterequal(float16 x, float16 y);\n"
38851"#ifdef cl_khr_fp64\n"
38852"int __ovld __cnfn isgreaterequal(double x, double y);\n"
38853"long2 __ovld __cnfn isgreaterequal(double2 x, double2 y);\n"
38854"long3 __ovld __cnfn isgreaterequal(double3 x, double3 y);\n"
38855"long4 __ovld __cnfn isgreaterequal(double4 x, double4 y);\n"
38856"long8 __ovld __cnfn isgreaterequal(double8 x, double8 y);\n"
38857"long16 __ovld __cnfn isgreaterequal(double16 x, double16 y);\n"
38858"#endif //cl_khr_fp64\n"
38859"#ifdef cl_khr_fp16\n"
38860"int __ovld __cnfn isgreaterequal(half x, half y);\n"
38861"short2 __ovld __cnfn isgreaterequal(half2 x, half2 y);\n"
38862"short3 __ovld __cnfn isgreaterequal(half3 x, half3 y);\n"
38863"short4 __ovld __cnfn isgreaterequal(half4 x, half4 y);\n"
38864"short8 __ovld __cnfn isgreaterequal(half8 x, half8 y);\n"
38865"short16 __ovld __cnfn isgreaterequal(half16 x, half16 y);\n"
38866"#endif //cl_khr_fp16\n"
38867"\n"
38868"/**\n"
38869" * Returns the component-wise compare of x < y.\n"
38870" */\n"
38871"int __ovld __cnfn isless(float x, float y);\n"
38872"int2 __ovld __cnfn isless(float2 x, float2 y);\n"
38873"int3 __ovld __cnfn isless(float3 x, float3 y);\n"
38874"int4 __ovld __cnfn isless(float4 x, float4 y);\n"
38875"int8 __ovld __cnfn isless(float8 x, float8 y);\n"
38876"int16 __ovld __cnfn isless(float16 x, float16 y);\n"
38877"#ifdef cl_khr_fp64\n"
38878"int __ovld __cnfn isless(double x, double y);\n"
38879"long2 __ovld __cnfn isless(double2 x, double2 y);\n"
38880"long3 __ovld __cnfn isless(double3 x, double3 y);\n"
38881"long4 __ovld __cnfn isless(double4 x, double4 y);\n"
38882"long8 __ovld __cnfn isless(double8 x, double8 y);\n"
38883"long16 __ovld __cnfn isless(double16 x, double16 y);\n"
38884"#endif //cl_khr_fp64\n"
38885"#ifdef cl_khr_fp16\n"
38886"int __ovld __cnfn isless(half x, half y);\n"
38887"short2 __ovld __cnfn isless(half2 x, half2 y);\n"
38888"short3 __ovld __cnfn isless(half3 x, half3 y);\n"
38889"short4 __ovld __cnfn isless(half4 x, half4 y);\n"
38890"short8 __ovld __cnfn isless(half8 x, half8 y);\n"
38891"short16 __ovld __cnfn isless(half16 x, half16 y);\n"
38892"#endif //cl_khr_fp16\n"
38893"\n"
38894"/**\n"
38895" * Returns the component-wise compare of x <= y.\n"
38896" */\n"
38897"int __ovld __cnfn islessequal(float x, float y);\n"
38898"int2 __ovld __cnfn islessequal(float2 x, float2 y);\n"
38899"int3 __ovld __cnfn islessequal(float3 x, float3 y);\n"
38900"int4 __ovld __cnfn islessequal(float4 x, float4 y);\n"
38901"int8 __ovld __cnfn islessequal(float8 x, float8 y);\n"
38902"int16 __ovld __cnfn islessequal(float16 x, float16 y);\n"
38903"#ifdef cl_khr_fp64\n"
38904"int __ovld __cnfn islessequal(double x, double y);\n"
38905"long2 __ovld __cnfn islessequal(double2 x, double2 y);\n"
38906"long3 __ovld __cnfn islessequal(double3 x, double3 y);\n"
38907"long4 __ovld __cnfn islessequal(double4 x, double4 y);\n"
38908"long8 __ovld __cnfn islessequal(double8 x, double8 y);\n"
38909"long16 __ovld __cnfn islessequal(double16 x, double16 y);\n"
38910"#endif //cl_khr_fp64\n"
38911"#ifdef cl_khr_fp16\n"
38912"int __ovld __cnfn islessequal(half x, half y);\n"
38913"short2 __ovld __cnfn islessequal(half2 x, half2 y);\n"
38914"short3 __ovld __cnfn islessequal(half3 x, half3 y);\n"
38915"short4 __ovld __cnfn islessequal(half4 x, half4 y);\n"
38916"short8 __ovld __cnfn islessequal(half8 x, half8 y);\n"
38917"short16 __ovld __cnfn islessequal(half16 x, half16 y);\n"
38918"#endif //cl_khr_fp16\n"
38919"\n"
38920"/**\n"
38921" * Returns the component-wise compare of\n"
38922" * (x < y) || (x > y) .\n"
38923" */\n"
38924"int __ovld __cnfn islessgreater(float x, float y);\n"
38925"int2 __ovld __cnfn islessgreater(float2 x, float2 y);\n"
38926"int3 __ovld __cnfn islessgreater(float3 x, float3 y);\n"
38927"int4 __ovld __cnfn islessgreater(float4 x, float4 y);\n"
38928"int8 __ovld __cnfn islessgreater(float8 x, float8 y);\n"
38929"int16 __ovld __cnfn islessgreater(float16 x, float16 y);\n"
38930"#ifdef cl_khr_fp64\n"
38931"int __ovld __cnfn islessgreater(double x, double y);\n"
38932"long2 __ovld __cnfn islessgreater(double2 x, double2 y);\n"
38933"long3 __ovld __cnfn islessgreater(double3 x, double3 y);\n"
38934"long4 __ovld __cnfn islessgreater(double4 x, double4 y);\n"
38935"long8 __ovld __cnfn islessgreater(double8 x, double8 y);\n"
38936"long16 __ovld __cnfn islessgreater(double16 x, double16 y);\n"
38937"#endif //cl_khr_fp64\n"
38938"#ifdef cl_khr_fp16\n"
38939"int __ovld __cnfn islessgreater(half x, half y);\n"
38940"short2 __ovld __cnfn islessgreater(half2 x, half2 y);\n"
38941"short3 __ovld __cnfn islessgreater(half3 x, half3 y);\n"
38942"short4 __ovld __cnfn islessgreater(half4 x, half4 y);\n"
38943"short8 __ovld __cnfn islessgreater(half8 x, half8 y);\n"
38944"short16 __ovld __cnfn islessgreater(half16 x, half16 y);\n"
38945"#endif //cl_khr_fp16\n"
38946"\n"
38947"/**\n"
38948" * Test for finite value.\n"
38949" */\n"
38950"int __ovld __cnfn isfinite(float);\n"
38951"int2 __ovld __cnfn isfinite(float2);\n"
38952"int3 __ovld __cnfn isfinite(float3);\n"
38953"int4 __ovld __cnfn isfinite(float4);\n"
38954"int8 __ovld __cnfn isfinite(float8);\n"
38955"int16 __ovld __cnfn isfinite(float16);\n"
38956"#ifdef cl_khr_fp64\n"
38957"int __ovld __cnfn isfinite(double);\n"
38958"long2 __ovld __cnfn isfinite(double2);\n"
38959"long3 __ovld __cnfn isfinite(double3);\n"
38960"long4 __ovld __cnfn isfinite(double4);\n"
38961"long8 __ovld __cnfn isfinite(double8);\n"
38962"long16 __ovld __cnfn isfinite(double16);\n"
38963"#endif //cl_khr_fp64\n"
38964"#ifdef cl_khr_fp16\n"
38965"int __ovld __cnfn isfinite(half);\n"
38966"short2 __ovld __cnfn isfinite(half2);\n"
38967"short3 __ovld __cnfn isfinite(half3);\n"
38968"short4 __ovld __cnfn isfinite(half4);\n"
38969"short8 __ovld __cnfn isfinite(half8);\n"
38970"short16 __ovld __cnfn isfinite(half16);\n"
38971"#endif //cl_khr_fp16\n"
38972"\n"
38973"/**\n"
38974" * Test for infinity value (+ve or -ve) .\n"
38975" */\n"
38976"int __ovld __cnfn isinf(float);\n"
38977"int2 __ovld __cnfn isinf(float2);\n"
38978"int3 __ovld __cnfn isinf(float3);\n"
38979"int4 __ovld __cnfn isinf(float4);\n"
38980"int8 __ovld __cnfn isinf(float8);\n"
38981"int16 __ovld __cnfn isinf(float16);\n"
38982"#ifdef cl_khr_fp64\n"
38983"int __ovld __cnfn isinf(double);\n"
38984"long2 __ovld __cnfn isinf(double2);\n"
38985"long3 __ovld __cnfn isinf(double3);\n"
38986"long4 __ovld __cnfn isinf(double4);\n"
38987"long8 __ovld __cnfn isinf(double8);\n"
38988"long16 __ovld __cnfn isinf(double16);\n"
38989"#endif //cl_khr_fp64\n"
38990"#ifdef cl_khr_fp16\n"
38991"int __ovld __cnfn isinf(half);\n"
38992"short2 __ovld __cnfn isinf(half2);\n"
38993"short3 __ovld __cnfn isinf(half3);\n"
38994"short4 __ovld __cnfn isinf(half4);\n"
38995"short8 __ovld __cnfn isinf(half8);\n"
38996"short16 __ovld __cnfn isinf(half16);\n"
38997"#endif //cl_khr_fp16\n"
38998"\n"
38999"/**\n"
39000" * Test for a NaN.\n"
39001" */\n"
39002"int __ovld __cnfn isnan(float);\n"
39003"int2 __ovld __cnfn isnan(float2);\n"
39004"int3 __ovld __cnfn isnan(float3);\n"
39005"int4 __ovld __cnfn isnan(float4);\n"
39006"int8 __ovld __cnfn isnan(float8);\n"
39007"int16 __ovld __cnfn isnan(float16);\n"
39008"#ifdef cl_khr_fp64\n"
39009"int __ovld __cnfn isnan(double);\n"
39010"long2 __ovld __cnfn isnan(double2);\n"
39011"long3 __ovld __cnfn isnan(double3);\n"
39012"long4 __ovld __cnfn isnan(double4);\n"
39013"long8 __ovld __cnfn isnan(double8);\n"
39014"long16 __ovld __cnfn isnan(double16);\n"
39015"#endif //cl_khr_fp64\n"
39016"#ifdef cl_khr_fp16\n"
39017"int __ovld __cnfn isnan(half);\n"
39018"short2 __ovld __cnfn isnan(half2);\n"
39019"short3 __ovld __cnfn isnan(half3);\n"
39020"short4 __ovld __cnfn isnan(half4);\n"
39021"short8 __ovld __cnfn isnan(half8);\n"
39022"short16 __ovld __cnfn isnan(half16);\n"
39023"#endif //cl_khr_fp16\n"
39024"\n"
39025"/**\n"
39026" * Test for a normal value.\n"
39027" */\n"
39028"int __ovld __cnfn isnormal(float);\n"
39029"int2 __ovld __cnfn isnormal(float2);\n"
39030"int3 __ovld __cnfn isnormal(float3);\n"
39031"int4 __ovld __cnfn isnormal(float4);\n"
39032"int8 __ovld __cnfn isnormal(float8);\n"
39033"int16 __ovld __cnfn isnormal(float16);\n"
39034"#ifdef cl_khr_fp64\n"
39035"int __ovld __cnfn isnormal(double);\n"
39036"long2 __ovld __cnfn isnormal(double2);\n"
39037"long3 __ovld __cnfn isnormal(double3);\n"
39038"long4 __ovld __cnfn isnormal(double4);\n"
39039"long8 __ovld __cnfn isnormal(double8);\n"
39040"long16 __ovld __cnfn isnormal(double16);\n"
39041"#endif //cl_khr_fp64\n"
39042"#ifdef cl_khr_fp16\n"
39043"int __ovld __cnfn isnormal(half);\n"
39044"short2 __ovld __cnfn isnormal(half2);\n"
39045"short3 __ovld __cnfn isnormal(half3);\n"
39046"short4 __ovld __cnfn isnormal(half4);\n"
39047"short8 __ovld __cnfn isnormal(half8);\n"
39048"short16 __ovld __cnfn isnormal(half16);\n"
39049"#endif //cl_khr_fp16\n"
39050"\n"
39051"/**\n"
39052" * Test if arguments are ordered. isordered() takes\n"
39053" * arguments x and y, and returns the result\n"
39054" * isequal(x, x) && isequal(y, y).\n"
39055" */\n"
39056"int __ovld __cnfn isordered(float x, float y);\n"
39057"int2 __ovld __cnfn isordered(float2 x, float2 y);\n"
39058"int3 __ovld __cnfn isordered(float3 x, float3 y);\n"
39059"int4 __ovld __cnfn isordered(float4 x, float4 y);\n"
39060"int8 __ovld __cnfn isordered(float8 x, float8 y);\n"
39061"int16 __ovld __cnfn isordered(float16 x, float16 y);\n"
39062"#ifdef cl_khr_fp64\n"
39063"int __ovld __cnfn isordered(double x, double y);\n"
39064"long2 __ovld __cnfn isordered(double2 x, double2 y);\n"
39065"long3 __ovld __cnfn isordered(double3 x, double3 y);\n"
39066"long4 __ovld __cnfn isordered(double4 x, double4 y);\n"
39067"long8 __ovld __cnfn isordered(double8 x, double8 y);\n"
39068"long16 __ovld __cnfn isordered(double16 x, double16 y);\n"
39069"#endif //cl_khr_fp64\n"
39070"#ifdef cl_khr_fp16\n"
39071"int __ovld __cnfn isordered(half x, half y);\n"
39072"short2 __ovld __cnfn isordered(half2 x, half2 y);\n"
39073"short3 __ovld __cnfn isordered(half3 x, half3 y);\n"
39074"short4 __ovld __cnfn isordered(half4 x, half4 y);\n"
39075"short8 __ovld __cnfn isordered(half8 x, half8 y);\n"
39076"short16 __ovld __cnfn isordered(half16 x, half16 y);\n"
39077"#endif //cl_khr_fp16\n"
39078"\n"
39079"/**\n"
39080" * Test if arguments are unordered. isunordered()\n"
39081" * takes arguments x and y, returning non-zero if x or y\n"
39082" * is NaN, and zero otherwise.\n"
39083" */\n"
39084"int __ovld __cnfn isunordered(float x, float y);\n"
39085"int2 __ovld __cnfn isunordered(float2 x, float2 y);\n"
39086"int3 __ovld __cnfn isunordered(float3 x, float3 y);\n"
39087"int4 __ovld __cnfn isunordered(float4 x, float4 y);\n"
39088"int8 __ovld __cnfn isunordered(float8 x, float8 y);\n"
39089"int16 __ovld __cnfn isunordered(float16 x, float16 y);\n"
39090"#ifdef cl_khr_fp64\n"
39091"int __ovld __cnfn isunordered(double x, double y);\n"
39092"long2 __ovld __cnfn isunordered(double2 x, double2 y);\n"
39093"long3 __ovld __cnfn isunordered(double3 x, double3 y);\n"
39094"long4 __ovld __cnfn isunordered(double4 x, double4 y);\n"
39095"long8 __ovld __cnfn isunordered(double8 x, double8 y);\n"
39096"long16 __ovld __cnfn isunordered(double16 x, double16 y);\n"
39097"#endif //cl_khr_fp64\n"
39098"#ifdef cl_khr_fp16\n"
39099"int __ovld __cnfn isunordered(half x, half y);\n"
39100"short2 __ovld __cnfn isunordered(half2 x, half2 y);\n"
39101"short3 __ovld __cnfn isunordered(half3 x, half3 y);\n"
39102"short4 __ovld __cnfn isunordered(half4 x, half4 y);\n"
39103"short8 __ovld __cnfn isunordered(half8 x, half8 y);\n"
39104"short16 __ovld __cnfn isunordered(half16 x, half16 y);\n"
39105"#endif //cl_khr_fp16\n"
39106"\n"
39107"/**\n"
39108" * Test for sign bit. The scalar version of the function\n"
39109" * returns a 1 if the sign bit in the float is set else returns\n"
39110" * 0. The vector version of the function returns the\n"
39111" * following for each component in floatn: a -1 if the\n"
39112" * sign bit in the float is set else returns 0.\n"
39113" */\n"
39114"int __ovld __cnfn signbit(float);\n"
39115"int2 __ovld __cnfn signbit(float2);\n"
39116"int3 __ovld __cnfn signbit(float3);\n"
39117"int4 __ovld __cnfn signbit(float4);\n"
39118"int8 __ovld __cnfn signbit(float8);\n"
39119"int16 __ovld __cnfn signbit(float16);\n"
39120"#ifdef cl_khr_fp64\n"
39121"int __ovld __cnfn signbit(double);\n"
39122"long2 __ovld __cnfn signbit(double2);\n"
39123"long3 __ovld __cnfn signbit(double3);\n"
39124"long4 __ovld __cnfn signbit(double4);\n"
39125"long8 __ovld __cnfn signbit(double8);\n"
39126"long16 __ovld __cnfn signbit(double16);\n"
39127"#endif //cl_khr_fp64\n"
39128"#ifdef cl_khr_fp16\n"
39129"int __ovld __cnfn signbit(half);\n"
39130"short2 __ovld __cnfn signbit(half2);\n"
39131"short3 __ovld __cnfn signbit(half3);\n"
39132"short4 __ovld __cnfn signbit(half4);\n"
39133"short8 __ovld __cnfn signbit(half8);\n"
39134"short16 __ovld __cnfn signbit(half16);\n"
39135"#endif //cl_khr_fp16\n"
39136"\n"
39137"/**\n"
39138" * Returns 1 if the most significant bit in any component\n"
39139" * of x is set; otherwise returns 0.\n"
39140" */\n"
39141"int __ovld __cnfn any(char x);\n"
39142"int __ovld __cnfn any(char2 x);\n"
39143"int __ovld __cnfn any(char3 x);\n"
39144"int __ovld __cnfn any(char4 x);\n"
39145"int __ovld __cnfn any(char8 x);\n"
39146"int __ovld __cnfn any(char16 x);\n"
39147"int __ovld __cnfn any(short x);\n"
39148"int __ovld __cnfn any(short2 x);\n"
39149"int __ovld __cnfn any(short3 x);\n"
39150"int __ovld __cnfn any(short4 x);\n"
39151"int __ovld __cnfn any(short8 x);\n"
39152"int __ovld __cnfn any(short16 x);\n"
39153"int __ovld __cnfn any(int x);\n"
39154"int __ovld __cnfn any(int2 x);\n"
39155"int __ovld __cnfn any(int3 x);\n"
39156"int __ovld __cnfn any(int4 x);\n"
39157"int __ovld __cnfn any(int8 x);\n"
39158"int __ovld __cnfn any(int16 x);\n"
39159"int __ovld __cnfn any(long x);\n"
39160"int __ovld __cnfn any(long2 x);\n"
39161"int __ovld __cnfn any(long3 x);\n"
39162"int __ovld __cnfn any(long4 x);\n"
39163"int __ovld __cnfn any(long8 x);\n"
39164"int __ovld __cnfn any(long16 x);\n"
39165"\n"
39166"/**\n"
39167" * Returns 1 if the most significant bit in all components\n"
39168" * of x is set; otherwise returns 0.\n"
39169" */\n"
39170"int __ovld __cnfn all(char x);\n"
39171"int __ovld __cnfn all(char2 x);\n"
39172"int __ovld __cnfn all(char3 x);\n"
39173"int __ovld __cnfn all(char4 x);\n"
39174"int __ovld __cnfn all(char8 x);\n"
39175"int __ovld __cnfn all(char16 x);\n"
39176"int __ovld __cnfn all(short x);\n"
39177"int __ovld __cnfn all(short2 x);\n"
39178"int __ovld __cnfn all(short3 x);\n"
39179"int __ovld __cnfn all(short4 x);\n"
39180"int __ovld __cnfn all(short8 x);\n"
39181"int __ovld __cnfn all(short16 x);\n"
39182"int __ovld __cnfn all(int x);\n"
39183"int __ovld __cnfn all(int2 x);\n"
39184"int __ovld __cnfn all(int3 x);\n"
39185"int __ovld __cnfn all(int4 x);\n"
39186"int __ovld __cnfn all(int8 x);\n"
39187"int __ovld __cnfn all(int16 x);\n"
39188"int __ovld __cnfn all(long x);\n"
39189"int __ovld __cnfn all(long2 x);\n"
39190"int __ovld __cnfn all(long3 x);\n"
39191"int __ovld __cnfn all(long4 x);\n"
39192"int __ovld __cnfn all(long8 x);\n"
39193"int __ovld __cnfn all(long16 x);\n"
39194"\n"
39195"/**\n"
39196" * Each bit of the result is the corresponding bit of a if\n"
39197" * the corresponding bit of c is 0. Otherwise it is the\n"
39198" * corresponding bit of b.\n"
39199" */\n"
39200"char __ovld __cnfn bitselect(char a, char b, char c);\n"
39201"uchar __ovld __cnfn bitselect(uchar a, uchar b, uchar c);\n"
39202"char2 __ovld __cnfn bitselect(char2 a, char2 b, char2 c);\n"
39203"uchar2 __ovld __cnfn bitselect(uchar2 a, uchar2 b, uchar2 c);\n"
39204"char3 __ovld __cnfn bitselect(char3 a, char3 b, char3 c);\n"
39205"uchar3 __ovld __cnfn bitselect(uchar3 a, uchar3 b, uchar3 c);\n"
39206"char4 __ovld __cnfn bitselect(char4 a, char4 b, char4 c);\n"
39207"uchar4 __ovld __cnfn bitselect(uchar4 a, uchar4 b, uchar4 c);\n"
39208"char8 __ovld __cnfn bitselect(char8 a, char8 b, char8 c);\n"
39209"uchar8 __ovld __cnfn bitselect(uchar8 a, uchar8 b, uchar8 c);\n"
39210"char16 __ovld __cnfn bitselect(char16 a, char16 b, char16 c);\n"
39211"uchar16 __ovld __cnfn bitselect(uchar16 a, uchar16 b, uchar16 c);\n"
39212"short __ovld __cnfn bitselect(short a, short b, short c);\n"
39213"ushort __ovld __cnfn bitselect(ushort a, ushort b, ushort c);\n"
39214"short2 __ovld __cnfn bitselect(short2 a, short2 b, short2 c);\n"
39215"ushort2 __ovld __cnfn bitselect(ushort2 a, ushort2 b, ushort2 c);\n"
39216"short3 __ovld __cnfn bitselect(short3 a, short3 b, short3 c);\n"
39217"ushort3 __ovld __cnfn bitselect(ushort3 a, ushort3 b, ushort3 c);\n"
39218"short4 __ovld __cnfn bitselect(short4 a, short4 b, short4 c);\n"
39219"ushort4 __ovld __cnfn bitselect(ushort4 a, ushort4 b, ushort4 c);\n"
39220"short8 __ovld __cnfn bitselect(short8 a, short8 b, short8 c);\n"
39221"ushort8 __ovld __cnfn bitselect(ushort8 a, ushort8 b, ushort8 c);\n"
39222"short16 __ovld __cnfn bitselect(short16 a, short16 b, short16 c);\n"
39223"ushort16 __ovld __cnfn bitselect(ushort16 a, ushort16 b, ushort16 c);\n"
39224"int __ovld __cnfn bitselect(int a, int b, int c);\n"
39225"uint __ovld __cnfn bitselect(uint a, uint b, uint c);\n"
39226"int2 __ovld __cnfn bitselect(int2 a, int2 b, int2 c);\n"
39227"uint2 __ovld __cnfn bitselect(uint2 a, uint2 b, uint2 c);\n"
39228"int3 __ovld __cnfn bitselect(int3 a, int3 b, int3 c);\n"
39229"uint3 __ovld __cnfn bitselect(uint3 a, uint3 b, uint3 c);\n"
39230"int4 __ovld __cnfn bitselect(int4 a, int4 b, int4 c);\n"
39231"uint4 __ovld __cnfn bitselect(uint4 a, uint4 b, uint4 c);\n"
39232"int8 __ovld __cnfn bitselect(int8 a, int8 b, int8 c);\n"
39233"uint8 __ovld __cnfn bitselect(uint8 a, uint8 b, uint8 c);\n"
39234"int16 __ovld __cnfn bitselect(int16 a, int16 b, int16 c);\n"
39235"uint16 __ovld __cnfn bitselect(uint16 a, uint16 b, uint16 c);\n"
39236"long __ovld __cnfn bitselect(long a, long b, long c);\n"
39237"ulong __ovld __cnfn bitselect(ulong a, ulong b, ulong c);\n"
39238"long2 __ovld __cnfn bitselect(long2 a, long2 b, long2 c);\n"
39239"ulong2 __ovld __cnfn bitselect(ulong2 a, ulong2 b, ulong2 c);\n"
39240"long3 __ovld __cnfn bitselect(long3 a, long3 b, long3 c);\n"
39241"ulong3 __ovld __cnfn bitselect(ulong3 a, ulong3 b, ulong3 c);\n"
39242"long4 __ovld __cnfn bitselect(long4 a, long4 b, long4 c);\n"
39243"ulong4 __ovld __cnfn bitselect(ulong4 a, ulong4 b, ulong4 c);\n"
39244"long8 __ovld __cnfn bitselect(long8 a, long8 b, long8 c);\n"
39245"ulong8 __ovld __cnfn bitselect(ulong8 a, ulong8 b, ulong8 c);\n"
39246"long16 __ovld __cnfn bitselect(long16 a, long16 b, long16 c);\n"
39247"ulong16 __ovld __cnfn bitselect(ulong16 a, ulong16 b, ulong16 c);\n"
39248"float __ovld __cnfn bitselect(float a, float b, float c);\n"
39249"float2 __ovld __cnfn bitselect(float2 a, float2 b, float2 c);\n"
39250"float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c);\n"
39251"float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c);\n"
39252"float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c);\n"
39253"float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c);\n"
39254"#ifdef cl_khr_fp64\n"
39255"double __ovld __cnfn bitselect(double a, double b, double c);\n"
39256"double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c);\n"
39257"double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c);\n"
39258"double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c);\n"
39259"double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c);\n"
39260"double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c);\n"
39261"#endif //cl_khr_fp64\n"
39262"#ifdef cl_khr_fp16\n"
39263"half __ovld __cnfn bitselect(half a, half b, half c);\n"
39264"half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c);\n"
39265"half3 __ovld __cnfn bitselect(half3 a, half3 b, half3 c);\n"
39266"half4 __ovld __cnfn bitselect(half4 a, half4 b, half4 c);\n"
39267"half8 __ovld __cnfn bitselect(half8 a, half8 b, half8 c);\n"
39268"half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);\n"
39269"#endif //cl_khr_fp16\n"
39270"\n"
39271"/**\n"
39272" * For each component of a vector type,\n"
39273" * result[i] = if MSB of c[i] is set ? b[i] : a[i].\n"
39274" * For a scalar type, result = c ? b : a.\n"
39275" * b and a must have the same type.\n"
39276" * c must have the same number of elements and bits as a.\n"
39277" */\n"
39278"char __ovld __cnfn select(char a, char b, char c);\n"
39279"uchar __ovld __cnfn select(uchar a, uchar b, char c);\n"
39280"char2 __ovld __cnfn select(char2 a, char2 b, char2 c);\n"
39281"uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, char2 c);\n"
39282"char3 __ovld __cnfn select(char3 a, char3 b, char3 c);\n"
39283"uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, char3 c);\n"
39284"char4 __ovld __cnfn select(char4 a, char4 b, char4 c);\n"
39285"uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, char4 c);\n"
39286"char8 __ovld __cnfn select(char8 a, char8 b, char8 c);\n"
39287"uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);\n"
39288"char16 __ovld __cnfn select(char16 a, char16 b, char16 c);\n"
39289"uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);\n"
39290"\n"
39291"short __ovld __cnfn select(short a, short b, short c);\n"
39292"ushort __ovld __cnfn select(ushort a, ushort b, short c);\n"
39293"short2 __ovld __cnfn select(short2 a, short2 b, short2 c);\n"
39294"ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, short2 c);\n"
39295"short3 __ovld __cnfn select(short3 a, short3 b, short3 c);\n"
39296"ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, short3 c);\n"
39297"short4 __ovld __cnfn select(short4 a, short4 b, short4 c);\n"
39298"ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, short4 c);\n"
39299"short8 __ovld __cnfn select(short8 a, short8 b, short8 c);\n"
39300"ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);\n"
39301"short16 __ovld __cnfn select(short16 a, short16 b, short16 c);\n"
39302"ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);\n"
39303"\n"
39304"int __ovld __cnfn select(int a, int b, int c);\n"
39305"uint __ovld __cnfn select(uint a, uint b, int c);\n"
39306"int2 __ovld __cnfn select(int2 a, int2 b, int2 c);\n"
39307"uint2 __ovld __cnfn select(uint2 a, uint2 b, int2 c);\n"
39308"int3 __ovld __cnfn select(int3 a, int3 b, int3 c);\n"
39309"uint3 __ovld __cnfn select(uint3 a, uint3 b, int3 c);\n"
39310"int4 __ovld __cnfn select(int4 a, int4 b, int4 c);\n"
39311"uint4 __ovld __cnfn select(uint4 a, uint4 b, int4 c);\n"
39312"int8 __ovld __cnfn select(int8 a, int8 b, int8 c);\n"
39313"uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);\n"
39314"int16 __ovld __cnfn select(int16 a, int16 b, int16 c);\n"
39315"uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);\n"
39316"float __ovld __cnfn select(float a, float b, int c);\n"
39317"float2 __ovld __cnfn select(float2 a, float2 b, int2 c);\n"
39318"float3 __ovld __cnfn select(float3 a, float3 b, int3 c);\n"
39319"float4 __ovld __cnfn select(float4 a, float4 b, int4 c);\n"
39320"float8 __ovld __cnfn select(float8 a, float8 b, int8 c);\n"
39321"float16 __ovld __cnfn select(float16 a, float16 b, int16 c);\n"
39322"\n"
39323"long __ovld __cnfn select(long a, long b, long c);\n"
39324"ulong __ovld __cnfn select(ulong a, ulong b, long c);\n"
39325"long2 __ovld __cnfn select(long2 a, long2 b, long2 c);\n"
39326"ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, long2 c);\n"
39327"long3 __ovld __cnfn select(long3 a, long3 b, long3 c);\n"
39328"ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, long3 c);\n"
39329"long4 __ovld __cnfn select(long4 a, long4 b, long4 c);\n"
39330"ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, long4 c);\n"
39331"long8 __ovld __cnfn select(long8 a, long8 b, long8 c);\n"
39332"ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);\n"
39333"long16 __ovld __cnfn select(long16 a, long16 b, long16 c);\n"
39334"ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);\n"
39335"\n"
39336"char __ovld __cnfn select(char a, char b, uchar c);\n"
39337"uchar __ovld __cnfn select(uchar a, uchar b, uchar c);\n"
39338"char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);\n"
39339"uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uchar2 c);\n"
39340"char3 __ovld __cnfn select(char3 a, char3 b, uchar3 c);\n"
39341"uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uchar3 c);\n"
39342"char4 __ovld __cnfn select(char4 a, char4 b, uchar4 c);\n"
39343"uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uchar4 c);\n"
39344"char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);\n"
39345"uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);\n"
39346"char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);\n"
39347"uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);\n"
39348"\n"
39349"short __ovld __cnfn select(short a, short b, ushort c);\n"
39350"ushort __ovld __cnfn select(ushort a, ushort b, ushort c);\n"
39351"short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);\n"
39352"ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ushort2 c);\n"
39353"short3 __ovld __cnfn select(short3 a, short3 b, ushort3 c);\n"
39354"ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ushort3 c);\n"
39355"short4 __ovld __cnfn select(short4 a, short4 b, ushort4 c);\n"
39356"ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ushort4 c);\n"
39357"short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);\n"
39358"ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);\n"
39359"short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);\n"
39360"ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);\n"
39361"\n"
39362"int __ovld __cnfn select(int a, int b, uint c);\n"
39363"uint __ovld __cnfn select(uint a, uint b, uint c);\n"
39364"int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);\n"
39365"uint2 __ovld __cnfn select(uint2 a, uint2 b, uint2 c);\n"
39366"int3 __ovld __cnfn select(int3 a, int3 b, uint3 c);\n"
39367"uint3 __ovld __cnfn select(uint3 a, uint3 b, uint3 c);\n"
39368"int4 __ovld __cnfn select(int4 a, int4 b, uint4 c);\n"
39369"uint4 __ovld __cnfn select(uint4 a, uint4 b, uint4 c);\n"
39370"int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);\n"
39371"uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);\n"
39372"int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);\n"
39373"uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);\n"
39374"float __ovld __cnfn select(float a, float b, uint c);\n"
39375"float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);\n"
39376"float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);\n"
39377"float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);\n"
39378"float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);\n"
39379"float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);\n"
39380"\n"
39381"long __ovld __cnfn select(long a, long b, ulong c);\n"
39382"ulong __ovld __cnfn select(ulong a, ulong b, ulong c);\n"
39383"long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);\n"
39384"ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ulong2 c);\n"
39385"long3 __ovld __cnfn select(long3 a, long3 b, ulong3 c);\n"
39386"ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ulong3 c);\n"
39387"long4 __ovld __cnfn select(long4 a, long4 b, ulong4 c);\n"
39388"ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ulong4 c);\n"
39389"long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);\n"
39390"ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);\n"
39391"long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);\n"
39392"ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);\n"
39393"\n"
39394"#ifdef cl_khr_fp64\n"
39395"double __ovld __cnfn select(double a, double b, long c);\n"
39396"double2 __ovld __cnfn select(double2 a, double2 b, long2 c);\n"
39397"double3 __ovld __cnfn select(double3 a, double3 b, long3 c);\n"
39398"double4 __ovld __cnfn select(double4 a, double4 b, long4 c);\n"
39399"double8 __ovld __cnfn select(double8 a, double8 b, long8 c);\n"
39400"double16 __ovld __cnfn select(double16 a, double16 b, long16 c);\n"
39401"double __ovld __cnfn select(double a, double b, ulong c);\n"
39402"double2 __ovld __cnfn select(double2 a, double2 b, ulong2 c);\n"
39403"double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c);\n"
39404"double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c);\n"
39405"double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c);\n"
39406"double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c);\n"
39407"#endif //cl_khr_fp64\n"
39408"#ifdef cl_khr_fp16\n"
39409"half __ovld __cnfn select(half a, half b, short c);\n"
39410"half2 __ovld __cnfn select(half2 a, half2 b, short2 c);\n"
39411"half3 __ovld __cnfn select(half3 a, half3 b, short3 c);\n"
39412"half4 __ovld __cnfn select(half4 a, half4 b, short4 c);\n"
39413"half8 __ovld __cnfn select(half8 a, half8 b, short8 c);\n"
39414"half16 __ovld __cnfn select(half16 a, half16 b, short16 c);\n"
39415"half __ovld __cnfn select(half a, half b, ushort c);\n"
39416"half2 __ovld __cnfn select(half2 a, half2 b, ushort2 c);\n"
39417"half3 __ovld __cnfn select(half3 a, half3 b, ushort3 c);\n"
39418"half4 __ovld __cnfn select(half4 a, half4 b, ushort4 c);\n"
39419"half8 __ovld __cnfn select(half8 a, half8 b, ushort8 c);\n"
39420"half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c);\n"
39421"#endif //cl_khr_fp16\n"
39422"\n"
39423"// OpenCL v1.1 s6.11.7, v1.2 s6.12.7, v2.0 s6.13.7 - Vector Data Load and Store Functions\n"
39424"// OpenCL extensions v1.1 s9.6.6, v1.2 s9.5.6, v2.0 s9.4.6 - Vector Data Load and Store Functions for Half Type\n"
39425"/**\n"
39426" * Use generic type gentype to indicate the built-in data types\n"
39427" * char, uchar, short, ushort, int, uint, long, ulong, float,\n"
39428" * double or half.\n"
39429" *\n"
39430" * vloadn return sizeof (gentypen) bytes of data read from address (p + (offset * n)).\n"
39431" *\n"
39432" * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)).\n"
39433" *\n"
39434" * The address computed as (p + (offset * n)) must be\n"
39435" * 8-bit aligned if gentype is char, uchar;\n"
39436" * 16-bit aligned if gentype is short, ushort, half;\n"
39437" * 32-bit aligned if gentype is int, uint, float;\n"
39438" * 64-bit aligned if gentype is long, ulong, double.\n"
39439" */\n"
39440"\n"
39441"char2 __ovld vload2(size_t offset, const __constant char *p);\n"
39442"uchar2 __ovld vload2(size_t offset, const __constant uchar *p);\n"
39443"short2 __ovld vload2(size_t offset, const __constant short *p);\n"
39444"ushort2 __ovld vload2(size_t offset, const __constant ushort *p);\n"
39445"int2 __ovld vload2(size_t offset, const __constant int *p);\n"
39446"uint2 __ovld vload2(size_t offset, const __constant uint *p);\n"
39447"long2 __ovld vload2(size_t offset, const __constant long *p);\n"
39448"ulong2 __ovld vload2(size_t offset, const __constant ulong *p);\n"
39449"float2 __ovld vload2(size_t offset, const __constant float *p);\n"
39450"char3 __ovld vload3(size_t offset, const __constant char *p);\n"
39451"uchar3 __ovld vload3(size_t offset, const __constant uchar *p);\n"
39452"short3 __ovld vload3(size_t offset, const __constant short *p);\n"
39453"ushort3 __ovld vload3(size_t offset, const __constant ushort *p);\n"
39454"int3 __ovld vload3(size_t offset, const __constant int *p);\n"
39455"uint3 __ovld vload3(size_t offset, const __constant uint *p);\n"
39456"long3 __ovld vload3(size_t offset, const __constant long *p);\n"
39457"ulong3 __ovld vload3(size_t offset, const __constant ulong *p);\n"
39458"float3 __ovld vload3(size_t offset, const __constant float *p);\n"
39459"char4 __ovld vload4(size_t offset, const __constant char *p);\n"
39460"uchar4 __ovld vload4(size_t offset, const __constant uchar *p);\n"
39461"short4 __ovld vload4(size_t offset, const __constant short *p);\n"
39462"ushort4 __ovld vload4(size_t offset, const __constant ushort *p);\n"
39463"int4 __ovld vload4(size_t offset, const __constant int *p);\n"
39464"uint4 __ovld vload4(size_t offset, const __constant uint *p);\n"
39465"long4 __ovld vload4(size_t offset, const __constant long *p);\n"
39466"ulong4 __ovld vload4(size_t offset, const __constant ulong *p);\n"
39467"float4 __ovld vload4(size_t offset, const __constant float *p);\n"
39468"char8 __ovld vload8(size_t offset, const __constant char *p);\n"
39469"uchar8 __ovld vload8(size_t offset, const __constant uchar *p);\n"
39470"short8 __ovld vload8(size_t offset, const __constant short *p);\n"
39471"ushort8 __ovld vload8(size_t offset, const __constant ushort *p);\n"
39472"int8 __ovld vload8(size_t offset, const __constant int *p);\n"
39473"uint8 __ovld vload8(size_t offset, const __constant uint *p);\n"
39474"long8 __ovld vload8(size_t offset, const __constant long *p);\n"
39475"ulong8 __ovld vload8(size_t offset, const __constant ulong *p);\n"
39476"float8 __ovld vload8(size_t offset, const __constant float *p);\n"
39477"char16 __ovld vload16(size_t offset, const __constant char *p);\n"
39478"uchar16 __ovld vload16(size_t offset, const __constant uchar *p);\n"
39479"short16 __ovld vload16(size_t offset, const __constant short *p);\n"
39480"ushort16 __ovld vload16(size_t offset, const __constant ushort *p);\n"
39481"int16 __ovld vload16(size_t offset, const __constant int *p);\n"
39482"uint16 __ovld vload16(size_t offset, const __constant uint *p);\n"
39483"long16 __ovld vload16(size_t offset, const __constant long *p);\n"
39484"ulong16 __ovld vload16(size_t offset, const __constant ulong *p);\n"
39485"float16 __ovld vload16(size_t offset, const __constant float *p);\n"
39486"#ifdef cl_khr_fp64\n"
39487"double2 __ovld vload2(size_t offset, const __constant double *p);\n"
39488"double3 __ovld vload3(size_t offset, const __constant double *p);\n"
39489"double4 __ovld vload4(size_t offset, const __constant double *p);\n"
39490"double8 __ovld vload8(size_t offset, const __constant double *p);\n"
39491"double16 __ovld vload16(size_t offset, const __constant double *p);\n"
39492"#endif //cl_khr_fp64\n"
39493"\n"
39494"#ifdef cl_khr_fp16\n"
39495"half __ovld vload(size_t offset, const __constant half *p);\n"
39496"half2 __ovld vload2(size_t offset, const __constant half *p);\n"
39497"half3 __ovld vload3(size_t offset, const __constant half *p);\n"
39498"half4 __ovld vload4(size_t offset, const __constant half *p);\n"
39499"half8 __ovld vload8(size_t offset, const __constant half *p);\n"
39500"half16 __ovld vload16(size_t offset, const __constant half *p);\n"
39501"#endif //cl_khr_fp16\n"
39502"\n"
39503"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39504"char2 __ovld vload2(size_t offset, const char *p);\n"
39505"uchar2 __ovld vload2(size_t offset, const uchar *p);\n"
39506"short2 __ovld vload2(size_t offset, const short *p);\n"
39507"ushort2 __ovld vload2(size_t offset, const ushort *p);\n"
39508"int2 __ovld vload2(size_t offset, const int *p);\n"
39509"uint2 __ovld vload2(size_t offset, const uint *p);\n"
39510"long2 __ovld vload2(size_t offset, const long *p);\n"
39511"ulong2 __ovld vload2(size_t offset, const ulong *p);\n"
39512"float2 __ovld vload2(size_t offset, const float *p);\n"
39513"char3 __ovld vload3(size_t offset, const char *p);\n"
39514"uchar3 __ovld vload3(size_t offset, const uchar *p);\n"
39515"short3 __ovld vload3(size_t offset, const short *p);\n"
39516"ushort3 __ovld vload3(size_t offset, const ushort *p);\n"
39517"int3 __ovld vload3(size_t offset, const int *p);\n"
39518"uint3 __ovld vload3(size_t offset, const uint *p);\n"
39519"long3 __ovld vload3(size_t offset, const long *p);\n"
39520"ulong3 __ovld vload3(size_t offset, const ulong *p);\n"
39521"float3 __ovld vload3(size_t offset, const float *p);\n"
39522"char4 __ovld vload4(size_t offset, const char *p);\n"
39523"uchar4 __ovld vload4(size_t offset, const uchar *p);\n"
39524"short4 __ovld vload4(size_t offset, const short *p);\n"
39525"ushort4 __ovld vload4(size_t offset, const ushort *p);\n"
39526"int4 __ovld vload4(size_t offset, const int *p);\n"
39527"uint4 __ovld vload4(size_t offset, const uint *p);\n"
39528"long4 __ovld vload4(size_t offset, const long *p);\n"
39529"ulong4 __ovld vload4(size_t offset, const ulong *p);\n"
39530"float4 __ovld vload4(size_t offset, const float *p);\n"
39531"char8 __ovld vload8(size_t offset, const char *p);\n"
39532"uchar8 __ovld vload8(size_t offset, const uchar *p);\n"
39533"short8 __ovld vload8(size_t offset, const short *p);\n"
39534"ushort8 __ovld vload8(size_t offset, const ushort *p);\n"
39535"int8 __ovld vload8(size_t offset, const int *p);\n"
39536"uint8 __ovld vload8(size_t offset, const uint *p);\n"
39537"long8 __ovld vload8(size_t offset, const long *p);\n"
39538"ulong8 __ovld vload8(size_t offset, const ulong *p);\n"
39539"float8 __ovld vload8(size_t offset, const float *p);\n"
39540"char16 __ovld vload16(size_t offset, const char *p);\n"
39541"uchar16 __ovld vload16(size_t offset, const uchar *p);\n"
39542"short16 __ovld vload16(size_t offset, const short *p);\n"
39543"ushort16 __ovld vload16(size_t offset, const ushort *p);\n"
39544"int16 __ovld vload16(size_t offset, const int *p);\n"
39545"uint16 __ovld vload16(size_t offset, const uint *p);\n"
39546"long16 __ovld vload16(size_t offset, const long *p);\n"
39547"ulong16 __ovld vload16(size_t offset, const ulong *p);\n"
39548"float16 __ovld vload16(size_t offset, const float *p);\n"
39549"\n"
39550"#ifdef cl_khr_fp64\n"
39551"double2 __ovld vload2(size_t offset, const double *p);\n"
39552"double3 __ovld vload3(size_t offset, const double *p);\n"
39553"double4 __ovld vload4(size_t offset, const double *p);\n"
39554"double8 __ovld vload8(size_t offset, const double *p);\n"
39555"double16 __ovld vload16(size_t offset, const double *p);\n"
39556"#endif //cl_khr_fp64\n"
39557"\n"
39558"#ifdef cl_khr_fp16\n"
39559"half __ovld vload(size_t offset, const half *p);\n"
39560"half2 __ovld vload2(size_t offset, const half *p);\n"
39561"half3 __ovld vload3(size_t offset, const half *p);\n"
39562"half4 __ovld vload4(size_t offset, const half *p);\n"
39563"half8 __ovld vload8(size_t offset, const half *p);\n"
39564"half16 __ovld vload16(size_t offset, const half *p);\n"
39565"#endif //cl_khr_fp16\n"
39566"#else\n"
39567"char2 __ovld vload2(size_t offset, const __global char *p);\n"
39568"uchar2 __ovld vload2(size_t offset, const __global uchar *p);\n"
39569"short2 __ovld vload2(size_t offset, const __global short *p);\n"
39570"ushort2 __ovld vload2(size_t offset, const __global ushort *p);\n"
39571"int2 __ovld vload2(size_t offset, const __global int *p);\n"
39572"uint2 __ovld vload2(size_t offset, const __global uint *p);\n"
39573"long2 __ovld vload2(size_t offset, const __global long *p);\n"
39574"ulong2 __ovld vload2(size_t offset, const __global ulong *p);\n"
39575"float2 __ovld vload2(size_t offset, const __global float *p);\n"
39576"char3 __ovld vload3(size_t offset, const __global char *p);\n"
39577"uchar3 __ovld vload3(size_t offset, const __global uchar *p);\n"
39578"short3 __ovld vload3(size_t offset, const __global short *p);\n"
39579"ushort3 __ovld vload3(size_t offset, const __global ushort *p);\n"
39580"int3 __ovld vload3(size_t offset, const __global int *p);\n"
39581"uint3 __ovld vload3(size_t offset, const __global uint *p);\n"
39582"long3 __ovld vload3(size_t offset, const __global long *p);\n"
39583"ulong3 __ovld vload3(size_t offset, const __global ulong *p);\n"
39584"float3 __ovld vload3(size_t offset, const __global float *p);\n"
39585"char4 __ovld vload4(size_t offset, const __global char *p);\n"
39586"uchar4 __ovld vload4(size_t offset, const __global uchar *p);\n"
39587"short4 __ovld vload4(size_t offset, const __global short *p);\n"
39588"ushort4 __ovld vload4(size_t offset, const __global ushort *p);\n"
39589"int4 __ovld vload4(size_t offset, const __global int *p);\n"
39590"uint4 __ovld vload4(size_t offset, const __global uint *p);\n"
39591"long4 __ovld vload4(size_t offset, const __global long *p);\n"
39592"ulong4 __ovld vload4(size_t offset, const __global ulong *p);\n"
39593"float4 __ovld vload4(size_t offset, const __global float *p);\n"
39594"char8 __ovld vload8(size_t offset, const __global char *p);\n"
39595"uchar8 __ovld vload8(size_t offset, const __global uchar *p);\n"
39596"short8 __ovld vload8(size_t offset, const __global short *p);\n"
39597"ushort8 __ovld vload8(size_t offset, const __global ushort *p);\n"
39598"int8 __ovld vload8(size_t offset, const __global int *p);\n"
39599"uint8 __ovld vload8(size_t offset, const __global uint *p);\n"
39600"long8 __ovld vload8(size_t offset, const __global long *p);\n"
39601"ulong8 __ovld vload8(size_t offset, const __global ulong *p);\n"
39602"float8 __ovld vload8(size_t offset, const __global float *p);\n"
39603"char16 __ovld vload16(size_t offset, const __global char *p);\n"
39604"uchar16 __ovld vload16(size_t offset, const __global uchar *p);\n"
39605"short16 __ovld vload16(size_t offset, const __global short *p);\n"
39606"ushort16 __ovld vload16(size_t offset, const __global ushort *p);\n"
39607"int16 __ovld vload16(size_t offset, const __global int *p);\n"
39608"uint16 __ovld vload16(size_t offset, const __global uint *p);\n"
39609"long16 __ovld vload16(size_t offset, const __global long *p);\n"
39610"ulong16 __ovld vload16(size_t offset, const __global ulong *p);\n"
39611"float16 __ovld vload16(size_t offset, const __global float *p);\n"
39612"char2 __ovld vload2(size_t offset, const __local char *p);\n"
39613"uchar2 __ovld vload2(size_t offset, const __local uchar *p);\n"
39614"short2 __ovld vload2(size_t offset, const __local short *p);\n"
39615"ushort2 __ovld vload2(size_t offset, const __local ushort *p);\n"
39616"int2 __ovld vload2(size_t offset, const __local int *p);\n"
39617"uint2 __ovld vload2(size_t offset, const __local uint *p);\n"
39618"long2 __ovld vload2(size_t offset, const __local long *p);\n"
39619"ulong2 __ovld vload2(size_t offset, const __local ulong *p);\n"
39620"float2 __ovld vload2(size_t offset, const __local float *p);\n"
39621"char3 __ovld vload3(size_t offset, const __local char *p);\n"
39622"uchar3 __ovld vload3(size_t offset, const __local uchar *p);\n"
39623"short3 __ovld vload3(size_t offset, const __local short *p);\n"
39624"ushort3 __ovld vload3(size_t offset, const __local ushort *p);\n"
39625"int3 __ovld vload3(size_t offset, const __local int *p);\n"
39626"uint3 __ovld vload3(size_t offset, const __local uint *p);\n"
39627"long3 __ovld vload3(size_t offset, const __local long *p);\n"
39628"ulong3 __ovld vload3(size_t offset, const __local ulong *p);\n"
39629"float3 __ovld vload3(size_t offset, const __local float *p);\n"
39630"char4 __ovld vload4(size_t offset, const __local char *p);\n"
39631"uchar4 __ovld vload4(size_t offset, const __local uchar *p);\n"
39632"short4 __ovld vload4(size_t offset, const __local short *p);\n"
39633"ushort4 __ovld vload4(size_t offset, const __local ushort *p);\n"
39634"int4 __ovld vload4(size_t offset, const __local int *p);\n"
39635"uint4 __ovld vload4(size_t offset, const __local uint *p);\n"
39636"long4 __ovld vload4(size_t offset, const __local long *p);\n"
39637"ulong4 __ovld vload4(size_t offset, const __local ulong *p);\n"
39638"float4 __ovld vload4(size_t offset, const __local float *p);\n"
39639"char8 __ovld vload8(size_t offset, const __local char *p);\n"
39640"uchar8 __ovld vload8(size_t offset, const __local uchar *p);\n"
39641"short8 __ovld vload8(size_t offset, const __local short *p);\n"
39642"ushort8 __ovld vload8(size_t offset, const __local ushort *p);\n"
39643"int8 __ovld vload8(size_t offset, const __local int *p);\n"
39644"uint8 __ovld vload8(size_t offset, const __local uint *p);\n"
39645"long8 __ovld vload8(size_t offset, const __local long *p);\n"
39646"ulong8 __ovld vload8(size_t offset, const __local ulong *p);\n"
39647"float8 __ovld vload8(size_t offset, const __local float *p);\n"
39648"char16 __ovld vload16(size_t offset, const __local char *p);\n"
39649"uchar16 __ovld vload16(size_t offset, const __local uchar *p);\n"
39650"short16 __ovld vload16(size_t offset, const __local short *p);\n"
39651"ushort16 __ovld vload16(size_t offset, const __local ushort *p);\n"
39652"int16 __ovld vload16(size_t offset, const __local int *p);\n"
39653"uint16 __ovld vload16(size_t offset, const __local uint *p);\n"
39654"long16 __ovld vload16(size_t offset, const __local long *p);\n"
39655"ulong16 __ovld vload16(size_t offset, const __local ulong *p);\n"
39656"float16 __ovld vload16(size_t offset, const __local float *p);\n"
39657"char2 __ovld vload2(size_t offset, const __private char *p);\n"
39658"uchar2 __ovld vload2(size_t offset, const __private uchar *p);\n"
39659"short2 __ovld vload2(size_t offset, const __private short *p);\n"
39660"ushort2 __ovld vload2(size_t offset, const __private ushort *p);\n"
39661"int2 __ovld vload2(size_t offset, const __private int *p);\n"
39662"uint2 __ovld vload2(size_t offset, const __private uint *p);\n"
39663"long2 __ovld vload2(size_t offset, const __private long *p);\n"
39664"ulong2 __ovld vload2(size_t offset, const __private ulong *p);\n"
39665"float2 __ovld vload2(size_t offset, const __private float *p);\n"
39666"char3 __ovld vload3(size_t offset, const __private char *p);\n"
39667"uchar3 __ovld vload3(size_t offset, const __private uchar *p);\n"
39668"short3 __ovld vload3(size_t offset, const __private short *p);\n"
39669"ushort3 __ovld vload3(size_t offset, const __private ushort *p);\n"
39670"int3 __ovld vload3(size_t offset, const __private int *p);\n"
39671"uint3 __ovld vload3(size_t offset, const __private uint *p);\n"
39672"long3 __ovld vload3(size_t offset, const __private long *p);\n"
39673"ulong3 __ovld vload3(size_t offset, const __private ulong *p);\n"
39674"float3 __ovld vload3(size_t offset, const __private float *p);\n"
39675"char4 __ovld vload4(size_t offset, const __private char *p);\n"
39676"uchar4 __ovld vload4(size_t offset, const __private uchar *p);\n"
39677"short4 __ovld vload4(size_t offset, const __private short *p);\n"
39678"ushort4 __ovld vload4(size_t offset, const __private ushort *p);\n"
39679"int4 __ovld vload4(size_t offset, const __private int *p);\n"
39680"uint4 __ovld vload4(size_t offset, const __private uint *p);\n"
39681"long4 __ovld vload4(size_t offset, const __private long *p);\n"
39682"ulong4 __ovld vload4(size_t offset, const __private ulong *p);\n"
39683"float4 __ovld vload4(size_t offset, const __private float *p);\n"
39684"char8 __ovld vload8(size_t offset, const __private char *p);\n"
39685"uchar8 __ovld vload8(size_t offset, const __private uchar *p);\n"
39686"short8 __ovld vload8(size_t offset, const __private short *p);\n"
39687"ushort8 __ovld vload8(size_t offset, const __private ushort *p);\n"
39688"int8 __ovld vload8(size_t offset, const __private int *p);\n"
39689"uint8 __ovld vload8(size_t offset, const __private uint *p);\n"
39690"long8 __ovld vload8(size_t offset, const __private long *p);\n"
39691"ulong8 __ovld vload8(size_t offset, const __private ulong *p);\n"
39692"float8 __ovld vload8(size_t offset, const __private float *p);\n"
39693"char16 __ovld vload16(size_t offset, const __private char *p);\n"
39694"uchar16 __ovld vload16(size_t offset, const __private uchar *p);\n"
39695"short16 __ovld vload16(size_t offset, const __private short *p);\n"
39696"ushort16 __ovld vload16(size_t offset, const __private ushort *p);\n"
39697"int16 __ovld vload16(size_t offset, const __private int *p);\n"
39698"uint16 __ovld vload16(size_t offset, const __private uint *p);\n"
39699"long16 __ovld vload16(size_t offset, const __private long *p);\n"
39700"ulong16 __ovld vload16(size_t offset, const __private ulong *p);\n"
39701"float16 __ovld vload16(size_t offset, const __private float *p);\n"
39702"\n"
39703"#ifdef cl_khr_fp64\n"
39704"double2 __ovld vload2(size_t offset, const __global double *p);\n"
39705"double3 __ovld vload3(size_t offset, const __global double *p);\n"
39706"double4 __ovld vload4(size_t offset, const __global double *p);\n"
39707"double8 __ovld vload8(size_t offset, const __global double *p);\n"
39708"double16 __ovld vload16(size_t offset, const __global double *p);\n"
39709"double2 __ovld vload2(size_t offset, const __local double *p);\n"
39710"double3 __ovld vload3(size_t offset, const __local double *p);\n"
39711"double4 __ovld vload4(size_t offset, const __local double *p);\n"
39712"double8 __ovld vload8(size_t offset, const __local double *p);\n"
39713"double16 __ovld vload16(size_t offset, const __local double *p);\n"
39714"double2 __ovld vload2(size_t offset, const __private double *p);\n"
39715"double3 __ovld vload3(size_t offset, const __private double *p);\n"
39716"double4 __ovld vload4(size_t offset, const __private double *p);\n"
39717"double8 __ovld vload8(size_t offset, const __private double *p);\n"
39718"double16 __ovld vload16(size_t offset, const __private double *p);\n"
39719"#endif //cl_khr_fp64\n"
39720"\n"
39721"#ifdef cl_khr_fp16\n"
39722"half __ovld vload(size_t offset, const __global half *p);\n"
39723"half2 __ovld vload2(size_t offset, const __global half *p);\n"
39724"half3 __ovld vload3(size_t offset, const __global half *p);\n"
39725"half4 __ovld vload4(size_t offset, const __global half *p);\n"
39726"half8 __ovld vload8(size_t offset, const __global half *p);\n"
39727"half16 __ovld vload16(size_t offset, const __global half *p);\n"
39728"half __ovld vload(size_t offset, const __local half *p);\n"
39729"half2 __ovld vload2(size_t offset, const __local half *p);\n"
39730"half3 __ovld vload3(size_t offset, const __local half *p);\n"
39731"half4 __ovld vload4(size_t offset, const __local half *p);\n"
39732"half8 __ovld vload8(size_t offset, const __local half *p);\n"
39733"half16 __ovld vload16(size_t offset, const __local half *p);\n"
39734"half __ovld vload(size_t offset, const __private half *p);\n"
39735"half2 __ovld vload2(size_t offset, const __private half *p);\n"
39736"half3 __ovld vload3(size_t offset, const __private half *p);\n"
39737"half4 __ovld vload4(size_t offset, const __private half *p);\n"
39738"half8 __ovld vload8(size_t offset, const __private half *p);\n"
39739"half16 __ovld vload16(size_t offset, const __private half *p);\n"
39740"#endif //cl_khr_fp16\n"
39741"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39742"\n"
39743"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39744"void __ovld vstore2(char2 data, size_t offset, char *p);\n"
39745"void __ovld vstore2(uchar2 data, size_t offset, uchar *p);\n"
39746"void __ovld vstore2(short2 data, size_t offset, short *p);\n"
39747"void __ovld vstore2(ushort2 data, size_t offset, ushort *p);\n"
39748"void __ovld vstore2(int2 data, size_t offset, int *p);\n"
39749"void __ovld vstore2(uint2 data, size_t offset, uint *p);\n"
39750"void __ovld vstore2(long2 data, size_t offset, long *p);\n"
39751"void __ovld vstore2(ulong2 data, size_t offset, ulong *p);\n"
39752"void __ovld vstore2(float2 data, size_t offset, float *p);\n"
39753"void __ovld vstore3(char3 data, size_t offset, char *p);\n"
39754"void __ovld vstore3(uchar3 data, size_t offset, uchar *p);\n"
39755"void __ovld vstore3(short3 data, size_t offset, short *p);\n"
39756"void __ovld vstore3(ushort3 data, size_t offset, ushort *p);\n"
39757"void __ovld vstore3(int3 data, size_t offset, int *p);\n"
39758"void __ovld vstore3(uint3 data, size_t offset, uint *p);\n"
39759"void __ovld vstore3(long3 data, size_t offset, long *p);\n"
39760"void __ovld vstore3(ulong3 data, size_t offset, ulong *p);\n"
39761"void __ovld vstore3(float3 data, size_t offset, float *p);\n"
39762"void __ovld vstore4(char4 data, size_t offset, char *p);\n"
39763"void __ovld vstore4(uchar4 data, size_t offset, uchar *p);\n"
39764"void __ovld vstore4(short4 data, size_t offset, short *p);\n"
39765"void __ovld vstore4(ushort4 data, size_t offset, ushort *p);\n"
39766"void __ovld vstore4(int4 data, size_t offset, int *p);\n"
39767"void __ovld vstore4(uint4 data, size_t offset, uint *p);\n"
39768"void __ovld vstore4(long4 data, size_t offset, long *p);\n"
39769"void __ovld vstore4(ulong4 data, size_t offset, ulong *p);\n"
39770"void __ovld vstore4(float4 data, size_t offset, float *p);\n"
39771"void __ovld vstore8(char8 data, size_t offset, char *p);\n"
39772"void __ovld vstore8(uchar8 data, size_t offset, uchar *p);\n"
39773"void __ovld vstore8(short8 data, size_t offset, short *p);\n"
39774"void __ovld vstore8(ushort8 data, size_t offset, ushort *p);\n"
39775"void __ovld vstore8(int8 data, size_t offset, int *p);\n"
39776"void __ovld vstore8(uint8 data, size_t offset, uint *p);\n"
39777"void __ovld vstore8(long8 data, size_t offset, long *p);\n"
39778"void __ovld vstore8(ulong8 data, size_t offset, ulong *p);\n"
39779"void __ovld vstore8(float8 data, size_t offset, float *p);\n"
39780"void __ovld vstore16(char16 data, size_t offset, char *p);\n"
39781"void __ovld vstore16(uchar16 data, size_t offset, uchar *p);\n"
39782"void __ovld vstore16(short16 data, size_t offset, short *p);\n"
39783"void __ovld vstore16(ushort16 data, size_t offset, ushort *p);\n"
39784"void __ovld vstore16(int16 data, size_t offset, int *p);\n"
39785"void __ovld vstore16(uint16 data, size_t offset, uint *p);\n"
39786"void __ovld vstore16(long16 data, size_t offset, long *p);\n"
39787"void __ovld vstore16(ulong16 data, size_t offset, ulong *p);\n"
39788"void __ovld vstore16(float16 data, size_t offset, float *p);\n"
39789"#ifdef cl_khr_fp64\n"
39790"void __ovld vstore2(double2 data, size_t offset, double *p);\n"
39791"void __ovld vstore3(double3 data, size_t offset, double *p);\n"
39792"void __ovld vstore4(double4 data, size_t offset, double *p);\n"
39793"void __ovld vstore8(double8 data, size_t offset, double *p);\n"
39794"void __ovld vstore16(double16 data, size_t offset, double *p);\n"
39795"#endif //cl_khr_fp64\n"
39796"#ifdef cl_khr_fp16\n"
39797"void __ovld vstore(half data, size_t offset, half *p);\n"
39798"void __ovld vstore2(half2 data, size_t offset, half *p);\n"
39799"void __ovld vstore3(half3 data, size_t offset, half *p);\n"
39800"void __ovld vstore4(half4 data, size_t offset, half *p);\n"
39801"void __ovld vstore8(half8 data, size_t offset, half *p);\n"
39802"void __ovld vstore16(half16 data, size_t offset, half *p);\n"
39803"#endif //cl_khr_fp16\n"
39804"#else\n"
39805"void __ovld vstore2(char2 data, size_t offset, __global char *p);\n"
39806"void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p);\n"
39807"void __ovld vstore2(short2 data, size_t offset, __global short *p);\n"
39808"void __ovld vstore2(ushort2 data, size_t offset, __global ushort *p);\n"
39809"void __ovld vstore2(int2 data, size_t offset, __global int *p);\n"
39810"void __ovld vstore2(uint2 data, size_t offset, __global uint *p);\n"
39811"void __ovld vstore2(long2 data, size_t offset, __global long *p);\n"
39812"void __ovld vstore2(ulong2 data, size_t offset, __global ulong *p);\n"
39813"void __ovld vstore2(float2 data, size_t offset, __global float *p);\n"
39814"void __ovld vstore3(char3 data, size_t offset, __global char *p);\n"
39815"void __ovld vstore3(uchar3 data, size_t offset, __global uchar *p);\n"
39816"void __ovld vstore3(short3 data, size_t offset, __global short *p);\n"
39817"void __ovld vstore3(ushort3 data, size_t offset, __global ushort *p);\n"
39818"void __ovld vstore3(int3 data, size_t offset, __global int *p);\n"
39819"void __ovld vstore3(uint3 data, size_t offset, __global uint *p);\n"
39820"void __ovld vstore3(long3 data, size_t offset, __global long *p);\n"
39821"void __ovld vstore3(ulong3 data, size_t offset, __global ulong *p);\n"
39822"void __ovld vstore3(float3 data, size_t offset, __global float *p);\n"
39823"void __ovld vstore4(char4 data, size_t offset, __global char *p);\n"
39824"void __ovld vstore4(uchar4 data, size_t offset, __global uchar *p);\n"
39825"void __ovld vstore4(short4 data, size_t offset, __global short *p);\n"
39826"void __ovld vstore4(ushort4 data, size_t offset, __global ushort *p);\n"
39827"void __ovld vstore4(int4 data, size_t offset, __global int *p);\n"
39828"void __ovld vstore4(uint4 data, size_t offset, __global uint *p);\n"
39829"void __ovld vstore4(long4 data, size_t offset, __global long *p);\n"
39830"void __ovld vstore4(ulong4 data, size_t offset, __global ulong *p);\n"
39831"void __ovld vstore4(float4 data, size_t offset, __global float *p);\n"
39832"void __ovld vstore8(char8 data, size_t offset, __global char *p);\n"
39833"void __ovld vstore8(uchar8 data, size_t offset, __global uchar *p);\n"
39834"void __ovld vstore8(short8 data, size_t offset, __global short *p);\n"
39835"void __ovld vstore8(ushort8 data, size_t offset, __global ushort *p);\n"
39836"void __ovld vstore8(int8 data, size_t offset, __global int *p);\n"
39837"void __ovld vstore8(uint8 data, size_t offset, __global uint *p);\n"
39838"void __ovld vstore8(long8 data, size_t offset, __global long *p);\n"
39839"void __ovld vstore8(ulong8 data, size_t offset, __global ulong *p);\n"
39840"void __ovld vstore8(float8 data, size_t offset, __global float *p);\n"
39841"void __ovld vstore16(char16 data, size_t offset, __global char *p);\n"
39842"void __ovld vstore16(uchar16 data, size_t offset, __global uchar *p);\n"
39843"void __ovld vstore16(short16 data, size_t offset, __global short *p);\n"
39844"void __ovld vstore16(ushort16 data, size_t offset, __global ushort *p);\n"
39845"void __ovld vstore16(int16 data, size_t offset, __global int *p);\n"
39846"void __ovld vstore16(uint16 data, size_t offset, __global uint *p);\n"
39847"void __ovld vstore16(long16 data, size_t offset, __global long *p);\n"
39848"void __ovld vstore16(ulong16 data, size_t offset, __global ulong *p);\n"
39849"void __ovld vstore16(float16 data, size_t offset, __global float *p);\n"
39850"void __ovld vstore2(char2 data, size_t offset, __local char *p);\n"
39851"void __ovld vstore2(uchar2 data, size_t offset, __local uchar *p);\n"
39852"void __ovld vstore2(short2 data, size_t offset, __local short *p);\n"
39853"void __ovld vstore2(ushort2 data, size_t offset, __local ushort *p);\n"
39854"void __ovld vstore2(int2 data, size_t offset, __local int *p);\n"
39855"void __ovld vstore2(uint2 data, size_t offset, __local uint *p);\n"
39856"void __ovld vstore2(long2 data, size_t offset, __local long *p);\n"
39857"void __ovld vstore2(ulong2 data, size_t offset, __local ulong *p);\n"
39858"void __ovld vstore2(float2 data, size_t offset, __local float *p);\n"
39859"void __ovld vstore3(char3 data, size_t offset, __local char *p);\n"
39860"void __ovld vstore3(uchar3 data, size_t offset, __local uchar *p);\n"
39861"void __ovld vstore3(short3 data, size_t offset, __local short *p);\n"
39862"void __ovld vstore3(ushort3 data, size_t offset, __local ushort *p);\n"
39863"void __ovld vstore3(int3 data, size_t offset, __local int *p);\n"
39864"void __ovld vstore3(uint3 data, size_t offset, __local uint *p);\n"
39865"void __ovld vstore3(long3 data, size_t offset, __local long *p);\n"
39866"void __ovld vstore3(ulong3 data, size_t offset, __local ulong *p);\n"
39867"void __ovld vstore3(float3 data, size_t offset, __local float *p);\n"
39868"void __ovld vstore4(char4 data, size_t offset, __local char *p);\n"
39869"void __ovld vstore4(uchar4 data, size_t offset, __local uchar *p);\n"
39870"void __ovld vstore4(short4 data, size_t offset, __local short *p);\n"
39871"void __ovld vstore4(ushort4 data, size_t offset, __local ushort *p);\n"
39872"void __ovld vstore4(int4 data, size_t offset, __local int *p);\n"
39873"void __ovld vstore4(uint4 data, size_t offset, __local uint *p);\n"
39874"void __ovld vstore4(long4 data, size_t offset, __local long *p);\n"
39875"void __ovld vstore4(ulong4 data, size_t offset, __local ulong *p);\n"
39876"void __ovld vstore4(float4 data, size_t offset, __local float *p);\n"
39877"void __ovld vstore8(char8 data, size_t offset, __local char *p);\n"
39878"void __ovld vstore8(uchar8 data, size_t offset, __local uchar *p);\n"
39879"void __ovld vstore8(short8 data, size_t offset, __local short *p);\n"
39880"void __ovld vstore8(ushort8 data, size_t offset, __local ushort *p);\n"
39881"void __ovld vstore8(int8 data, size_t offset, __local int *p);\n"
39882"void __ovld vstore8(uint8 data, size_t offset, __local uint *p);\n"
39883"void __ovld vstore8(long8 data, size_t offset, __local long *p);\n"
39884"void __ovld vstore8(ulong8 data, size_t offset, __local ulong *p);\n"
39885"void __ovld vstore8(float8 data, size_t offset, __local float *p);\n"
39886"void __ovld vstore16(char16 data, size_t offset, __local char *p);\n"
39887"void __ovld vstore16(uchar16 data, size_t offset, __local uchar *p);\n"
39888"void __ovld vstore16(short16 data, size_t offset, __local short *p);\n"
39889"void __ovld vstore16(ushort16 data, size_t offset, __local ushort *p);\n"
39890"void __ovld vstore16(int16 data, size_t offset, __local int *p);\n"
39891"void __ovld vstore16(uint16 data, size_t offset, __local uint *p);\n"
39892"void __ovld vstore16(long16 data, size_t offset, __local long *p);\n"
39893"void __ovld vstore16(ulong16 data, size_t offset, __local ulong *p);\n"
39894"void __ovld vstore16(float16 data, size_t offset, __local float *p);\n"
39895"void __ovld vstore2(char2 data, size_t offset, __private char *p);\n"
39896"void __ovld vstore2(uchar2 data, size_t offset, __private uchar *p);\n"
39897"void __ovld vstore2(short2 data, size_t offset, __private short *p);\n"
39898"void __ovld vstore2(ushort2 data, size_t offset, __private ushort *p);\n"
39899"void __ovld vstore2(int2 data, size_t offset, __private int *p);\n"
39900"void __ovld vstore2(uint2 data, size_t offset, __private uint *p);\n"
39901"void __ovld vstore2(long2 data, size_t offset, __private long *p);\n"
39902"void __ovld vstore2(ulong2 data, size_t offset, __private ulong *p);\n"
39903"void __ovld vstore2(float2 data, size_t offset, __private float *p);\n"
39904"void __ovld vstore3(char3 data, size_t offset, __private char *p);\n"
39905"void __ovld vstore3(uchar3 data, size_t offset, __private uchar *p);\n"
39906"void __ovld vstore3(short3 data, size_t offset, __private short *p);\n"
39907"void __ovld vstore3(ushort3 data, size_t offset, __private ushort *p);\n"
39908"void __ovld vstore3(int3 data, size_t offset, __private int *p);\n"
39909"void __ovld vstore3(uint3 data, size_t offset, __private uint *p);\n"
39910"void __ovld vstore3(long3 data, size_t offset, __private long *p);\n"
39911"void __ovld vstore3(ulong3 data, size_t offset, __private ulong *p);\n"
39912"void __ovld vstore3(float3 data, size_t offset, __private float *p);\n"
39913"void __ovld vstore4(char4 data, size_t offset, __private char *p);\n"
39914"void __ovld vstore4(uchar4 data, size_t offset, __private uchar *p);\n"
39915"void __ovld vstore4(short4 data, size_t offset, __private short *p);\n"
39916"void __ovld vstore4(ushort4 data, size_t offset, __private ushort *p);\n"
39917"void __ovld vstore4(int4 data, size_t offset, __private int *p);\n"
39918"void __ovld vstore4(uint4 data, size_t offset, __private uint *p);\n"
39919"void __ovld vstore4(long4 data, size_t offset, __private long *p);\n"
39920"void __ovld vstore4(ulong4 data, size_t offset, __private ulong *p);\n"
39921"void __ovld vstore4(float4 data, size_t offset, __private float *p);\n"
39922"void __ovld vstore8(char8 data, size_t offset, __private char *p);\n"
39923"void __ovld vstore8(uchar8 data, size_t offset, __private uchar *p);\n"
39924"void __ovld vstore8(short8 data, size_t offset, __private short *p);\n"
39925"void __ovld vstore8(ushort8 data, size_t offset, __private ushort *p);\n"
39926"void __ovld vstore8(int8 data, size_t offset, __private int *p);\n"
39927"void __ovld vstore8(uint8 data, size_t offset, __private uint *p);\n"
39928"void __ovld vstore8(long8 data, size_t offset, __private long *p);\n"
39929"void __ovld vstore8(ulong8 data, size_t offset, __private ulong *p);\n"
39930"void __ovld vstore8(float8 data, size_t offset, __private float *p);\n"
39931"void __ovld vstore16(char16 data, size_t offset, __private char *p);\n"
39932"void __ovld vstore16(uchar16 data, size_t offset, __private uchar *p);\n"
39933"void __ovld vstore16(short16 data, size_t offset, __private short *p);\n"
39934"void __ovld vstore16(ushort16 data, size_t offset, __private ushort *p);\n"
39935"void __ovld vstore16(int16 data, size_t offset, __private int *p);\n"
39936"void __ovld vstore16(uint16 data, size_t offset, __private uint *p);\n"
39937"void __ovld vstore16(long16 data, size_t offset, __private long *p);\n"
39938"void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p);\n"
39939"void __ovld vstore16(float16 data, size_t offset, __private float *p);\n"
39940"#ifdef cl_khr_fp64\n"
39941"void __ovld vstore2(double2 data, size_t offset, __global double *p);\n"
39942"void __ovld vstore3(double3 data, size_t offset, __global double *p);\n"
39943"void __ovld vstore4(double4 data, size_t offset, __global double *p);\n"
39944"void __ovld vstore8(double8 data, size_t offset, __global double *p);\n"
39945"void __ovld vstore16(double16 data, size_t offset, __global double *p);\n"
39946"void __ovld vstore2(double2 data, size_t offset, __local double *p);\n"
39947"void __ovld vstore3(double3 data, size_t offset, __local double *p);\n"
39948"void __ovld vstore4(double4 data, size_t offset, __local double *p);\n"
39949"void __ovld vstore8(double8 data, size_t offset, __local double *p);\n"
39950"void __ovld vstore16(double16 data, size_t offset, __local double *p);\n"
39951"void __ovld vstore2(double2 data, size_t offset, __private double *p);\n"
39952"void __ovld vstore3(double3 data, size_t offset, __private double *p);\n"
39953"void __ovld vstore4(double4 data, size_t offset, __private double *p);\n"
39954"void __ovld vstore8(double8 data, size_t offset, __private double *p);\n"
39955"void __ovld vstore16(double16 data, size_t offset, __private double *p);\n"
39956"#endif //cl_khr_fp64\n"
39957"#ifdef cl_khr_fp16\n"
39958"void __ovld vstore(half data, size_t offset, __global half *p);\n"
39959"void __ovld vstore2(half2 data, size_t offset, __global half *p);\n"
39960"void __ovld vstore3(half3 data, size_t offset, __global half *p);\n"
39961"void __ovld vstore4(half4 data, size_t offset, __global half *p);\n"
39962"void __ovld vstore8(half8 data, size_t offset, __global half *p);\n"
39963"void __ovld vstore16(half16 data, size_t offset, __global half *p);\n"
39964"void __ovld vstore(half data, size_t offset, __local half *p);\n"
39965"void __ovld vstore2(half2 data, size_t offset, __local half *p);\n"
39966"void __ovld vstore3(half3 data, size_t offset, __local half *p);\n"
39967"void __ovld vstore4(half4 data, size_t offset, __local half *p);\n"
39968"void __ovld vstore8(half8 data, size_t offset, __local half *p);\n"
39969"void __ovld vstore16(half16 data, size_t offset, __local half *p);\n"
39970"void __ovld vstore(half data, size_t offset, __private half *p);\n"
39971"void __ovld vstore2(half2 data, size_t offset, __private half *p);\n"
39972"void __ovld vstore3(half3 data, size_t offset, __private half *p);\n"
39973"void __ovld vstore4(half4 data, size_t offset, __private half *p);\n"
39974"void __ovld vstore8(half8 data, size_t offset, __private half *p);\n"
39975"void __ovld vstore16(half16 data, size_t offset, __private half *p);\n"
39976"#endif //cl_khr_fp16\n"
39977"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39978"\n"
39979"/**\n"
39980" * Read sizeof (half) bytes of data from address\n"
39981" * (p + offset). The data read is interpreted as a\n"
39982" * half value. The half value is converted to a\n"
39983" * float value and the float value is returned.\n"
39984" * The read address computed as (p + offset)\n"
39985" * must be 16-bit aligned.\n"
39986" */\n"
39987"float __ovld vload_half(size_t offset, const __constant half *p);\n"
39988"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39989"float __ovld vload_half(size_t offset, const half *p);\n"
39990"#else\n"
39991"float __ovld vload_half(size_t offset, const __global half *p);\n"
39992"float __ovld vload_half(size_t offset, const __local half *p);\n"
39993"float __ovld vload_half(size_t offset, const __private half *p);\n"
39994"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39995"\n"
39996"/**\n"
39997" * Read sizeof (halfn) bytes of data from address\n"
39998" * (p + (offset * n)). The data read is interpreted\n"
39999" * as a halfn value. The halfn value read is\n"
40000" * converted to a floatn value and the floatn\n"
40001" * value is returned. The read address computed\n"
40002" * as (p + (offset * n)) must be 16-bit aligned.\n"
40003" */\n"
40004"float2 __ovld vload_half2(size_t offset, const __constant half *p);\n"
40005"float3 __ovld vload_half3(size_t offset, const __constant half *p);\n"
40006"float4 __ovld vload_half4(size_t offset, const __constant half *p);\n"
40007"float8 __ovld vload_half8(size_t offset, const __constant half *p);\n"
40008"float16 __ovld vload_half16(size_t offset, const __constant half *p);\n"
40009"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40010"float2 __ovld vload_half2(size_t offset, const half *p);\n"
40011"float3 __ovld vload_half3(size_t offset, const half *p);\n"
40012"float4 __ovld vload_half4(size_t offset, const half *p);\n"
40013"float8 __ovld vload_half8(size_t offset, const half *p);\n"
40014"float16 __ovld vload_half16(size_t offset, const half *p);\n"
40015"#else\n"
40016"float2 __ovld vload_half2(size_t offset, const __global half *p);\n"
40017"float3 __ovld vload_half3(size_t offset, const __global half *p);\n"
40018"float4 __ovld vload_half4(size_t offset, const __global half *p);\n"
40019"float8 __ovld vload_half8(size_t offset, const __global half *p);\n"
40020"float16 __ovld vload_half16(size_t offset, const __global half *p);\n"
40021"float2 __ovld vload_half2(size_t offset, const __local half *p);\n"
40022"float3 __ovld vload_half3(size_t offset, const __local half *p);\n"
40023"float4 __ovld vload_half4(size_t offset, const __local half *p);\n"
40024"float8 __ovld vload_half8(size_t offset, const __local half *p);\n"
40025"float16 __ovld vload_half16(size_t offset, const __local half *p);\n"
40026"float2 __ovld vload_half2(size_t offset, const __private half *p);\n"
40027"float3 __ovld vload_half3(size_t offset, const __private half *p);\n"
40028"float4 __ovld vload_half4(size_t offset, const __private half *p);\n"
40029"float8 __ovld vload_half8(size_t offset, const __private half *p);\n"
40030"float16 __ovld vload_half16(size_t offset, const __private half *p);\n"
40031"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40032"\n"
40033"/**\n"
40034" * The float value given by data is first\n"
40035" * converted to a half value using the appropriate\n"
40036" * rounding mode. The half value is then written\n"
40037" * to address computed as (p + offset). The\n"
40038" * address computed as (p + offset) must be 16-\n"
40039" * bit aligned.\n"
40040" * vstore_half use the current rounding mode.\n"
40041" * The default current rounding mode is round to\n"
40042" * nearest even.\n"
40043" */\n"
40044"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40045"void __ovld vstore_half(float data, size_t offset, half *p);\n"
40046"void __ovld vstore_half_rte(float data, size_t offset, half *p);\n"
40047"void __ovld vstore_half_rtz(float data, size_t offset, half *p);\n"
40048"void __ovld vstore_half_rtp(float data, size_t offset, half *p);\n"
40049"void __ovld vstore_half_rtn(float data, size_t offset, half *p);\n"
40050"#ifdef cl_khr_fp64\n"
40051"void __ovld vstore_half(double data, size_t offset, half *p);\n"
40052"void __ovld vstore_half_rte(double data, size_t offset, half *p);\n"
40053"void __ovld vstore_half_rtz(double data, size_t offset, half *p);\n"
40054"void __ovld vstore_half_rtp(double data, size_t offset, half *p);\n"
40055"void __ovld vstore_half_rtn(double data, size_t offset, half *p);\n"
40056"#endif //cl_khr_fp64\n"
40057"#else\n"
40058"void __ovld vstore_half(float data, size_t offset, __global half *p);\n"
40059"void __ovld vstore_half_rte(float data, size_t offset, __global half *p);\n"
40060"void __ovld vstore_half_rtz(float data, size_t offset, __global half *p);\n"
40061"void __ovld vstore_half_rtp(float data, size_t offset, __global half *p);\n"
40062"void __ovld vstore_half_rtn(float data, size_t offset, __global half *p);\n"
40063"void __ovld vstore_half(float data, size_t offset, __local half *p);\n"
40064"void __ovld vstore_half_rte(float data, size_t offset, __local half *p);\n"
40065"void __ovld vstore_half_rtz(float data, size_t offset, __local half *p);\n"
40066"void __ovld vstore_half_rtp(float data, size_t offset, __local half *p);\n"
40067"void __ovld vstore_half_rtn(float data, size_t offset, __local half *p);\n"
40068"void __ovld vstore_half(float data, size_t offset, __private half *p);\n"
40069"void __ovld vstore_half_rte(float data, size_t offset, __private half *p);\n"
40070"void __ovld vstore_half_rtz(float data, size_t offset, __private half *p);\n"
40071"void __ovld vstore_half_rtp(float data, size_t offset, __private half *p);\n"
40072"void __ovld vstore_half_rtn(float data, size_t offset, __private half *p);\n"
40073"#ifdef cl_khr_fp64\n"
40074"void __ovld vstore_half(double data, size_t offset, __global half *p);\n"
40075"void __ovld vstore_half_rte(double data, size_t offset, __global half *p);\n"
40076"void __ovld vstore_half_rtz(double data, size_t offset, __global half *p);\n"
40077"void __ovld vstore_half_rtp(double data, size_t offset, __global half *p);\n"
40078"void __ovld vstore_half_rtn(double data, size_t offset, __global half *p);\n"
40079"void __ovld vstore_half(double data, size_t offset, __local half *p);\n"
40080"void __ovld vstore_half_rte(double data, size_t offset, __local half *p);\n"
40081"void __ovld vstore_half_rtz(double data, size_t offset, __local half *p);\n"
40082"void __ovld vstore_half_rtp(double data, size_t offset, __local half *p);\n"
40083"void __ovld vstore_half_rtn(double data, size_t offset, __local half *p);\n"
40084"void __ovld vstore_half(double data, size_t offset, __private half *p);\n"
40085"void __ovld vstore_half_rte(double data, size_t offset, __private half *p);\n"
40086"void __ovld vstore_half_rtz(double data, size_t offset, __private half *p);\n"
40087"void __ovld vstore_half_rtp(double data, size_t offset, __private half *p);\n"
40088"void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);\n"
40089"#endif //cl_khr_fp64\n"
40090"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40091"\n"
40092"/**\n"
40093" * The floatn value given by data is converted to\n"
40094" * a halfn value using the appropriate rounding\n"
40095" * mode. The halfn value is then written to\n"
40096" * address computed as (p + (offset * n)). The\n"
40097" * address computed as (p + (offset * n)) must be\n"
40098" * 16-bit aligned.\n"
40099" * vstore_halfn uses the current rounding mode.\n"
40100" * The default current rounding mode is round to\n"
40101" * nearest even.\n"
40102" */\n"
40103"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40104"void __ovld vstore_half2(float2 data, size_t offset, half *p);\n"
40105"void __ovld vstore_half3(float3 data, size_t offset, half *p);\n"
40106"void __ovld vstore_half4(float4 data, size_t offset, half *p);\n"
40107"void __ovld vstore_half8(float8 data, size_t offset, half *p);\n"
40108"void __ovld vstore_half16(float16 data, size_t offset, half *p);\n"
40109"void __ovld vstore_half2_rte(float2 data, size_t offset, half *p);\n"
40110"void __ovld vstore_half3_rte(float3 data, size_t offset, half *p);\n"
40111"void __ovld vstore_half4_rte(float4 data, size_t offset, half *p);\n"
40112"void __ovld vstore_half8_rte(float8 data, size_t offset, half *p);\n"
40113"void __ovld vstore_half16_rte(float16 data, size_t offset, half *p);\n"
40114"void __ovld vstore_half2_rtz(float2 data, size_t offset, half *p);\n"
40115"void __ovld vstore_half3_rtz(float3 data, size_t offset, half *p);\n"
40116"void __ovld vstore_half4_rtz(float4 data, size_t offset, half *p);\n"
40117"void __ovld vstore_half8_rtz(float8 data, size_t offset, half *p);\n"
40118"void __ovld vstore_half16_rtz(float16 data, size_t offset, half *p);\n"
40119"void __ovld vstore_half2_rtp(float2 data, size_t offset, half *p);\n"
40120"void __ovld vstore_half3_rtp(float3 data, size_t offset, half *p);\n"
40121"void __ovld vstore_half4_rtp(float4 data, size_t offset, half *p);\n"
40122"void __ovld vstore_half8_rtp(float8 data, size_t offset, half *p);\n"
40123"void __ovld vstore_half16_rtp(float16 data, size_t offset, half *p);\n"
40124"void __ovld vstore_half2_rtn(float2 data, size_t offset, half *p);\n"
40125"void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p);\n"
40126"void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p);\n"
40127"void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p);\n"
40128"void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p);\n"
40129"#ifdef cl_khr_fp64\n"
40130"void __ovld vstore_half2(double2 data, size_t offset, half *p);\n"
40131"void __ovld vstore_half3(double3 data, size_t offset, half *p);\n"
40132"void __ovld vstore_half4(double4 data, size_t offset, half *p);\n"
40133"void __ovld vstore_half8(double8 data, size_t offset, half *p);\n"
40134"void __ovld vstore_half16(double16 data, size_t offset, half *p);\n"
40135"void __ovld vstore_half2_rte(double2 data, size_t offset, half *p);\n"
40136"void __ovld vstore_half3_rte(double3 data, size_t offset, half *p);\n"
40137"void __ovld vstore_half4_rte(double4 data, size_t offset, half *p);\n"
40138"void __ovld vstore_half8_rte(double8 data, size_t offset, half *p);\n"
40139"void __ovld vstore_half16_rte(double16 data, size_t offset, half *p);\n"
40140"void __ovld vstore_half2_rtz(double2 data, size_t offset, half *p);\n"
40141"void __ovld vstore_half3_rtz(double3 data, size_t offset, half *p);\n"
40142"void __ovld vstore_half4_rtz(double4 data, size_t offset, half *p);\n"
40143"void __ovld vstore_half8_rtz(double8 data, size_t offset, half *p);\n"
40144"void __ovld vstore_half16_rtz(double16 data, size_t offset, half *p);\n"
40145"void __ovld vstore_half2_rtp(double2 data, size_t offset, half *p);\n"
40146"void __ovld vstore_half3_rtp(double3 data, size_t offset, half *p);\n"
40147"void __ovld vstore_half4_rtp(double4 data, size_t offset, half *p);\n"
40148"void __ovld vstore_half8_rtp(double8 data, size_t offset, half *p);\n"
40149"void __ovld vstore_half16_rtp(double16 data, size_t offset, half *p);\n"
40150"void __ovld vstore_half2_rtn(double2 data, size_t offset, half *p);\n"
40151"void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p);\n"
40152"void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p);\n"
40153"void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p);\n"
40154"void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p);\n"
40155"#endif //cl_khr_fp64\n"
40156"#else\n"
40157"void __ovld vstore_half2(float2 data, size_t offset, __global half *p);\n"
40158"void __ovld vstore_half3(float3 data, size_t offset, __global half *p);\n"
40159"void __ovld vstore_half4(float4 data, size_t offset, __global half *p);\n"
40160"void __ovld vstore_half8(float8 data, size_t offset, __global half *p);\n"
40161"void __ovld vstore_half16(float16 data, size_t offset, __global half *p);\n"
40162"void __ovld vstore_half2_rte(float2 data, size_t offset, __global half *p);\n"
40163"void __ovld vstore_half3_rte(float3 data, size_t offset, __global half *p);\n"
40164"void __ovld vstore_half4_rte(float4 data, size_t offset, __global half *p);\n"
40165"void __ovld vstore_half8_rte(float8 data, size_t offset, __global half *p);\n"
40166"void __ovld vstore_half16_rte(float16 data, size_t offset, __global half *p);\n"
40167"void __ovld vstore_half2_rtz(float2 data, size_t offset, __global half *p);\n"
40168"void __ovld vstore_half3_rtz(float3 data, size_t offset, __global half *p);\n"
40169"void __ovld vstore_half4_rtz(float4 data, size_t offset, __global half *p);\n"
40170"void __ovld vstore_half8_rtz(float8 data, size_t offset, __global half *p);\n"
40171"void __ovld vstore_half16_rtz(float16 data, size_t offset, __global half *p);\n"
40172"void __ovld vstore_half2_rtp(float2 data, size_t offset, __global half *p);\n"
40173"void __ovld vstore_half3_rtp(float3 data, size_t offset, __global half *p);\n"
40174"void __ovld vstore_half4_rtp(float4 data, size_t offset, __global half *p);\n"
40175"void __ovld vstore_half8_rtp(float8 data, size_t offset, __global half *p);\n"
40176"void __ovld vstore_half16_rtp(float16 data, size_t offset, __global half *p);\n"
40177"void __ovld vstore_half2_rtn(float2 data, size_t offset, __global half *p);\n"
40178"void __ovld vstore_half3_rtn(float3 data, size_t offset, __global half *p);\n"
40179"void __ovld vstore_half4_rtn(float4 data, size_t offset, __global half *p);\n"
40180"void __ovld vstore_half8_rtn(float8 data, size_t offset, __global half *p);\n"
40181"void __ovld vstore_half16_rtn(float16 data, size_t offset, __global half *p);\n"
40182"void __ovld vstore_half2(float2 data, size_t offset, __local half *p);\n"
40183"void __ovld vstore_half3(float3 data, size_t offset, __local half *p);\n"
40184"void __ovld vstore_half4(float4 data, size_t offset, __local half *p);\n"
40185"void __ovld vstore_half8(float8 data, size_t offset, __local half *p);\n"
40186"void __ovld vstore_half16(float16 data, size_t offset, __local half *p);\n"
40187"void __ovld vstore_half2_rte(float2 data, size_t offset, __local half *p);\n"
40188"void __ovld vstore_half3_rte(float3 data, size_t offset, __local half *p);\n"
40189"void __ovld vstore_half4_rte(float4 data, size_t offset, __local half *p);\n"
40190"void __ovld vstore_half8_rte(float8 data, size_t offset, __local half *p);\n"
40191"void __ovld vstore_half16_rte(float16 data, size_t offset, __local half *p);\n"
40192"void __ovld vstore_half2_rtz(float2 data, size_t offset, __local half *p);\n"
40193"void __ovld vstore_half3_rtz(float3 data, size_t offset, __local half *p);\n"
40194"void __ovld vstore_half4_rtz(float4 data, size_t offset, __local half *p);\n"
40195"void __ovld vstore_half8_rtz(float8 data, size_t offset, __local half *p);\n"
40196"void __ovld vstore_half16_rtz(float16 data, size_t offset, __local half *p);\n"
40197"void __ovld vstore_half2_rtp(float2 data, size_t offset, __local half *p);\n"
40198"void __ovld vstore_half3_rtp(float3 data, size_t offset, __local half *p);\n"
40199"void __ovld vstore_half4_rtp(float4 data, size_t offset, __local half *p);\n"
40200"void __ovld vstore_half8_rtp(float8 data, size_t offset, __local half *p);\n"
40201"void __ovld vstore_half16_rtp(float16 data, size_t offset, __local half *p);\n"
40202"void __ovld vstore_half2_rtn(float2 data, size_t offset, __local half *p);\n"
40203"void __ovld vstore_half3_rtn(float3 data, size_t offset, __local half *p);\n"
40204"void __ovld vstore_half4_rtn(float4 data, size_t offset, __local half *p);\n"
40205"void __ovld vstore_half8_rtn(float8 data, size_t offset, __local half *p);\n"
40206"void __ovld vstore_half16_rtn(float16 data, size_t offset, __local half *p);\n"
40207"void __ovld vstore_half2(float2 data, size_t offset, __private half *p);\n"
40208"void __ovld vstore_half3(float3 data, size_t offset, __private half *p);\n"
40209"void __ovld vstore_half4(float4 data, size_t offset, __private half *p);\n"
40210"void __ovld vstore_half8(float8 data, size_t offset, __private half *p);\n"
40211"void __ovld vstore_half16(float16 data, size_t offset, __private half *p);\n"
40212"void __ovld vstore_half2_rte(float2 data, size_t offset, __private half *p);\n"
40213"void __ovld vstore_half3_rte(float3 data, size_t offset, __private half *p);\n"
40214"void __ovld vstore_half4_rte(float4 data, size_t offset, __private half *p);\n"
40215"void __ovld vstore_half8_rte(float8 data, size_t offset, __private half *p);\n"
40216"void __ovld vstore_half16_rte(float16 data, size_t offset, __private half *p);\n"
40217"void __ovld vstore_half2_rtz(float2 data, size_t offset, __private half *p);\n"
40218"void __ovld vstore_half3_rtz(float3 data, size_t offset, __private half *p);\n"
40219"void __ovld vstore_half4_rtz(float4 data, size_t offset, __private half *p);\n"
40220"void __ovld vstore_half8_rtz(float8 data, size_t offset, __private half *p);\n"
40221"void __ovld vstore_half16_rtz(float16 data, size_t offset, __private half *p);\n"
40222"void __ovld vstore_half2_rtp(float2 data, size_t offset, __private half *p);\n"
40223"void __ovld vstore_half3_rtp(float3 data, size_t offset, __private half *p);\n"
40224"void __ovld vstore_half4_rtp(float4 data, size_t offset, __private half *p);\n"
40225"void __ovld vstore_half8_rtp(float8 data, size_t offset, __private half *p);\n"
40226"void __ovld vstore_half16_rtp(float16 data, size_t offset, __private half *p);\n"
40227"void __ovld vstore_half2_rtn(float2 data, size_t offset, __private half *p);\n"
40228"void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p);\n"
40229"void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p);\n"
40230"void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p);\n"
40231"void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p);\n"
40232"#ifdef cl_khr_fp64\n"
40233"void __ovld vstore_half2(double2 data, size_t offset, __global half *p);\n"
40234"void __ovld vstore_half3(double3 data, size_t offset, __global half *p);\n"
40235"void __ovld vstore_half4(double4 data, size_t offset, __global half *p);\n"
40236"void __ovld vstore_half8(double8 data, size_t offset, __global half *p);\n"
40237"void __ovld vstore_half16(double16 data, size_t offset, __global half *p);\n"
40238"void __ovld vstore_half2_rte(double2 data, size_t offset, __global half *p);\n"
40239"void __ovld vstore_half3_rte(double3 data, size_t offset, __global half *p);\n"
40240"void __ovld vstore_half4_rte(double4 data, size_t offset, __global half *p);\n"
40241"void __ovld vstore_half8_rte(double8 data, size_t offset, __global half *p);\n"
40242"void __ovld vstore_half16_rte(double16 data, size_t offset, __global half *p);\n"
40243"void __ovld vstore_half2_rtz(double2 data, size_t offset, __global half *p);\n"
40244"void __ovld vstore_half3_rtz(double3 data, size_t offset, __global half *p);\n"
40245"void __ovld vstore_half4_rtz(double4 data, size_t offset, __global half *p);\n"
40246"void __ovld vstore_half8_rtz(double8 data, size_t offset, __global half *p);\n"
40247"void __ovld vstore_half16_rtz(double16 data, size_t offset, __global half *p);\n"
40248"void __ovld vstore_half2_rtp(double2 data, size_t offset, __global half *p);\n"
40249"void __ovld vstore_half3_rtp(double3 data, size_t offset, __global half *p);\n"
40250"void __ovld vstore_half4_rtp(double4 data, size_t offset, __global half *p);\n"
40251"void __ovld vstore_half8_rtp(double8 data, size_t offset, __global half *p);\n"
40252"void __ovld vstore_half16_rtp(double16 data, size_t offset, __global half *p);\n"
40253"void __ovld vstore_half2_rtn(double2 data, size_t offset, __global half *p);\n"
40254"void __ovld vstore_half3_rtn(double3 data, size_t offset, __global half *p);\n"
40255"void __ovld vstore_half4_rtn(double4 data, size_t offset, __global half *p);\n"
40256"void __ovld vstore_half8_rtn(double8 data, size_t offset, __global half *p);\n"
40257"void __ovld vstore_half16_rtn(double16 data, size_t offset, __global half *p);\n"
40258"void __ovld vstore_half2(double2 data, size_t offset, __local half *p);\n"
40259"void __ovld vstore_half3(double3 data, size_t offset, __local half *p);\n"
40260"void __ovld vstore_half4(double4 data, size_t offset, __local half *p);\n"
40261"void __ovld vstore_half8(double8 data, size_t offset, __local half *p);\n"
40262"void __ovld vstore_half16(double16 data, size_t offset, __local half *p);\n"
40263"void __ovld vstore_half2_rte(double2 data, size_t offset, __local half *p);\n"
40264"void __ovld vstore_half3_rte(double3 data, size_t offset, __local half *p);\n"
40265"void __ovld vstore_half4_rte(double4 data, size_t offset, __local half *p);\n"
40266"void __ovld vstore_half8_rte(double8 data, size_t offset, __local half *p);\n"
40267"void __ovld vstore_half16_rte(double16 data, size_t offset, __local half *p);\n"
40268"void __ovld vstore_half2_rtz(double2 data, size_t offset, __local half *p);\n"
40269"void __ovld vstore_half3_rtz(double3 data, size_t offset, __local half *p);\n"
40270"void __ovld vstore_half4_rtz(double4 data, size_t offset, __local half *p);\n"
40271"void __ovld vstore_half8_rtz(double8 data, size_t offset, __local half *p);\n"
40272"void __ovld vstore_half16_rtz(double16 data, size_t offset, __local half *p);\n"
40273"void __ovld vstore_half2_rtp(double2 data, size_t offset, __local half *p);\n"
40274"void __ovld vstore_half3_rtp(double3 data, size_t offset, __local half *p);\n"
40275"void __ovld vstore_half4_rtp(double4 data, size_t offset, __local half *p);\n"
40276"void __ovld vstore_half8_rtp(double8 data, size_t offset, __local half *p);\n"
40277"void __ovld vstore_half16_rtp(double16 data, size_t offset, __local half *p);\n"
40278"void __ovld vstore_half2_rtn(double2 data, size_t offset, __local half *p);\n"
40279"void __ovld vstore_half3_rtn(double3 data, size_t offset, __local half *p);\n"
40280"void __ovld vstore_half4_rtn(double4 data, size_t offset, __local half *p);\n"
40281"void __ovld vstore_half8_rtn(double8 data, size_t offset, __local half *p);\n"
40282"void __ovld vstore_half16_rtn(double16 data, size_t offset, __local half *p);\n"
40283"void __ovld vstore_half2(double2 data, size_t offset, __private half *p);\n"
40284"void __ovld vstore_half3(double3 data, size_t offset, __private half *p);\n"
40285"void __ovld vstore_half4(double4 data, size_t offset, __private half *p);\n"
40286"void __ovld vstore_half8(double8 data, size_t offset, __private half *p);\n"
40287"void __ovld vstore_half16(double16 data, size_t offset, __private half *p);\n"
40288"void __ovld vstore_half2_rte(double2 data, size_t offset, __private half *p);\n"
40289"void __ovld vstore_half3_rte(double3 data, size_t offset, __private half *p);\n"
40290"void __ovld vstore_half4_rte(double4 data, size_t offset, __private half *p);\n"
40291"void __ovld vstore_half8_rte(double8 data, size_t offset, __private half *p);\n"
40292"void __ovld vstore_half16_rte(double16 data, size_t offset, __private half *p);\n"
40293"void __ovld vstore_half2_rtz(double2 data, size_t offset, __private half *p);\n"
40294"void __ovld vstore_half3_rtz(double3 data, size_t offset, __private half *p);\n"
40295"void __ovld vstore_half4_rtz(double4 data, size_t offset, __private half *p);\n"
40296"void __ovld vstore_half8_rtz(double8 data, size_t offset, __private half *p);\n"
40297"void __ovld vstore_half16_rtz(double16 data, size_t offset, __private half *p);\n"
40298"void __ovld vstore_half2_rtp(double2 data, size_t offset, __private half *p);\n"
40299"void __ovld vstore_half3_rtp(double3 data, size_t offset, __private half *p);\n"
40300"void __ovld vstore_half4_rtp(double4 data, size_t offset, __private half *p);\n"
40301"void __ovld vstore_half8_rtp(double8 data, size_t offset, __private half *p);\n"
40302"void __ovld vstore_half16_rtp(double16 data, size_t offset, __private half *p);\n"
40303"void __ovld vstore_half2_rtn(double2 data, size_t offset, __private half *p);\n"
40304"void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p);\n"
40305"void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p);\n"
40306"void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p);\n"
40307"void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);\n"
40308"#endif //cl_khr_fp64\n"
40309"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40310"\n"
40311"/**\n"
40312" * For n = 1, 2, 4, 8 and 16 read sizeof (halfn)\n"
40313" * bytes of data from address (p + (offset * n)).\n"
40314" * The data read is interpreted as a halfn value.\n"
40315" * The halfn value read is converted to a floatn\n"
40316" * value and the floatn value is returned.\n"
40317" * The address computed as (p + (offset * n))\n"
40318" * must be aligned to sizeof (halfn) bytes.\n"
40319" * For n = 3, vloada_half3 reads a half3 from\n"
40320" * address (p + (offset * 4)) and returns a float3.\n"
40321" * The address computed as (p + (offset * 4))\n"
40322" * must be aligned to sizeof (half) * 4 bytes.\n"
40323" */\n"
40324"float __ovld vloada_half(size_t offset, const __constant half *p);\n"
40325"float2 __ovld vloada_half2(size_t offset, const __constant half *p);\n"
40326"float3 __ovld vloada_half3(size_t offset, const __constant half *p);\n"
40327"float4 __ovld vloada_half4(size_t offset, const __constant half *p);\n"
40328"float8 __ovld vloada_half8(size_t offset, const __constant half *p);\n"
40329"float16 __ovld vloada_half16(size_t offset, const __constant half *p);\n"
40330"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40331"float __ovld vloada_half(size_t offset, const half *p);\n"
40332"float2 __ovld vloada_half2(size_t offset, const half *p);\n"
40333"float3 __ovld vloada_half3(size_t offset, const half *p);\n"
40334"float4 __ovld vloada_half4(size_t offset, const half *p);\n"
40335"float8 __ovld vloada_half8(size_t offset, const half *p);\n"
40336"float16 __ovld vloada_half16(size_t offset, const half *p);\n"
40337"#else\n"
40338"float __ovld vloada_half(size_t offset, const __global half *p);\n"
40339"float2 __ovld vloada_half2(size_t offset, const __global half *p);\n"
40340"float3 __ovld vloada_half3(size_t offset, const __global half *p);\n"
40341"float4 __ovld vloada_half4(size_t offset, const __global half *p);\n"
40342"float8 __ovld vloada_half8(size_t offset, const __global half *p);\n"
40343"float16 __ovld vloada_half16(size_t offset, const __global half *p);\n"
40344"float __ovld vloada_half(size_t offset, const __local half *p);\n"
40345"float2 __ovld vloada_half2(size_t offset, const __local half *p);\n"
40346"float3 __ovld vloada_half3(size_t offset, const __local half *p);\n"
40347"float4 __ovld vloada_half4(size_t offset, const __local half *p);\n"
40348"float8 __ovld vloada_half8(size_t offset, const __local half *p);\n"
40349"float16 __ovld vloada_half16(size_t offset, const __local half *p);\n"
40350"float __ovld vloada_half(size_t offset, const __private half *p);\n"
40351"float2 __ovld vloada_half2(size_t offset, const __private half *p);\n"
40352"float3 __ovld vloada_half3(size_t offset, const __private half *p);\n"
40353"float4 __ovld vloada_half4(size_t offset, const __private half *p);\n"
40354"float8 __ovld vloada_half8(size_t offset, const __private half *p);\n"
40355"float16 __ovld vloada_half16(size_t offset, const __private half *p);\n"
40356"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40357"\n"
40358"/**\n"
40359" * The floatn value given by data is converted to\n"
40360" * a halfn value using the appropriate rounding\n"
40361" * mode.\n"
40362" * For n = 1, 2, 4, 8 and 16, the halfn value is\n"
40363" * written to the address computed as (p + (offset\n"
40364" * * n)). The address computed as (p + (offset *\n"
40365" * n)) must be aligned to sizeof (halfn) bytes.\n"
40366" * For n = 3, the half3 value is written to the\n"
40367" * address computed as (p + (offset * 4)). The\n"
40368" * address computed as (p + (offset * 4)) must be\n"
40369" * aligned to sizeof (half) * 4 bytes.\n"
40370" * vstorea_halfn uses the current rounding\n"
40371" * mode. The default current rounding mode is\n"
40372" * round to nearest even.\n"
40373" */\n"
40374"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40375"void __ovld vstorea_half(float data, size_t offset, half *p);\n"
40376"void __ovld vstorea_half2(float2 data, size_t offset, half *p);\n"
40377"void __ovld vstorea_half3(float3 data, size_t offset, half *p);\n"
40378"void __ovld vstorea_half4(float4 data, size_t offset, half *p);\n"
40379"void __ovld vstorea_half8(float8 data, size_t offset, half *p);\n"
40380"void __ovld vstorea_half16(float16 data, size_t offset, half *p);\n"
40381"\n"
40382"void __ovld vstorea_half_rte(float data, size_t offset, half *p);\n"
40383"void __ovld vstorea_half2_rte(float2 data, size_t offset, half *p);\n"
40384"void __ovld vstorea_half3_rte(float3 data, size_t offset, half *p);\n"
40385"void __ovld vstorea_half4_rte(float4 data, size_t offset, half *p);\n"
40386"void __ovld vstorea_half8_rte(float8 data, size_t offset, half *p);\n"
40387"void __ovld vstorea_half16_rte(float16 data, size_t offset, half *p);\n"
40388"\n"
40389"void __ovld vstorea_half_rtz(float data, size_t offset, half *p);\n"
40390"void __ovld vstorea_half2_rtz(float2 data, size_t offset, half *p);\n"
40391"void __ovld vstorea_half3_rtz(float3 data, size_t offset, half *p);\n"
40392"void __ovld vstorea_half4_rtz(float4 data, size_t offset, half *p);\n"
40393"void __ovld vstorea_half8_rtz(float8 data, size_t offset, half *p);\n"
40394"void __ovld vstorea_half16_rtz(float16 data, size_t offset, half *p);\n"
40395"\n"
40396"void __ovld vstorea_half_rtp(float data, size_t offset, half *p);\n"
40397"void __ovld vstorea_half2_rtp(float2 data, size_t offset, half *p);\n"
40398"void __ovld vstorea_half3_rtp(float3 data, size_t offset, half *p);\n"
40399"void __ovld vstorea_half4_rtp(float4 data, size_t offset, half *p);\n"
40400"void __ovld vstorea_half8_rtp(float8 data, size_t offset, half *p);\n"
40401"void __ovld vstorea_half16_rtp(float16 data, size_t offset, half *p);\n"
40402"\n"
40403"void __ovld vstorea_half_rtn(float data, size_t offset, half *p);\n"
40404"void __ovld vstorea_half2_rtn(float2 data, size_t offset, half *p);\n"
40405"void __ovld vstorea_half3_rtn(float3 data, size_t offset, half *p);\n"
40406"void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p);\n"
40407"void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p);\n"
40408"void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p);\n"
40409"\n"
40410"#ifdef cl_khr_fp64\n"
40411"void __ovld vstorea_half(double data, size_t offset, half *p);\n"
40412"void __ovld vstorea_half2(double2 data, size_t offset, half *p);\n"
40413"void __ovld vstorea_half3(double3 data, size_t offset, half *p);\n"
40414"void __ovld vstorea_half4(double4 data, size_t offset, half *p);\n"
40415"void __ovld vstorea_half8(double8 data, size_t offset, half *p);\n"
40416"void __ovld vstorea_half16(double16 data, size_t offset, half *p);\n"
40417"\n"
40418"void __ovld vstorea_half_rte(double data, size_t offset, half *p);\n"
40419"void __ovld vstorea_half2_rte(double2 data, size_t offset, half *p);\n"
40420"void __ovld vstorea_half3_rte(double3 data, size_t offset, half *p);\n"
40421"void __ovld vstorea_half4_rte(double4 data, size_t offset, half *p);\n"
40422"void __ovld vstorea_half8_rte(double8 data, size_t offset, half *p);\n"
40423"void __ovld vstorea_half16_rte(double16 data, size_t offset, half *p);\n"
40424"\n"
40425"void __ovld vstorea_half_rtz(double data, size_t offset, half *p);\n"
40426"void __ovld vstorea_half2_rtz(double2 data, size_t offset, half *p);\n"
40427"void __ovld vstorea_half3_rtz(double3 data, size_t offset, half *p);\n"
40428"void __ovld vstorea_half4_rtz(double4 data, size_t offset, half *p);\n"
40429"void __ovld vstorea_half8_rtz(double8 data, size_t offset, half *p);\n"
40430"void __ovld vstorea_half16_rtz(double16 data, size_t offset, half *p);\n"
40431"\n"
40432"void __ovld vstorea_half_rtp(double data, size_t offset, half *p);\n"
40433"void __ovld vstorea_half2_rtp(double2 data, size_t offset, half *p);\n"
40434"void __ovld vstorea_half3_rtp(double3 data, size_t offset, half *p);\n"
40435"void __ovld vstorea_half4_rtp(double4 data, size_t offset, half *p);\n"
40436"void __ovld vstorea_half8_rtp(double8 data, size_t offset, half *p);\n"
40437"void __ovld vstorea_half16_rtp(double16 data, size_t offset, half *p);\n"
40438"\n"
40439"void __ovld vstorea_half_rtn(double data, size_t offset, half *p);\n"
40440"void __ovld vstorea_half2_rtn(double2 data, size_t offset, half *p);\n"
40441"void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p);\n"
40442"void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p);\n"
40443"void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p);\n"
40444"void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p);\n"
40445"#endif //cl_khr_fp64\n"
40446"\n"
40447"#else\n"
40448"void __ovld vstorea_half(float data, size_t offset, __global half *p);\n"
40449"void __ovld vstorea_half2(float2 data, size_t offset, __global half *p);\n"
40450"void __ovld vstorea_half3(float3 data, size_t offset, __global half *p);\n"
40451"void __ovld vstorea_half4(float4 data, size_t offset, __global half *p);\n"
40452"void __ovld vstorea_half8(float8 data, size_t offset, __global half *p);\n"
40453"void __ovld vstorea_half16(float16 data, size_t offset, __global half *p);\n"
40454"\n"
40455"void __ovld vstorea_half_rte(float data, size_t offset, __global half *p);\n"
40456"void __ovld vstorea_half2_rte(float2 data, size_t offset, __global half *p);\n"
40457"void __ovld vstorea_half3_rte(float3 data, size_t offset, __global half *p);\n"
40458"void __ovld vstorea_half4_rte(float4 data, size_t offset, __global half *p);\n"
40459"void __ovld vstorea_half8_rte(float8 data, size_t offset, __global half *p);\n"
40460"void __ovld vstorea_half16_rte(float16 data, size_t offset, __global half *p);\n"
40461"\n"
40462"void __ovld vstorea_half_rtz(float data, size_t offset, __global half *p);\n"
40463"void __ovld vstorea_half2_rtz(float2 data, size_t offset, __global half *p);\n"
40464"void __ovld vstorea_half3_rtz(float3 data, size_t offset, __global half *p);\n"
40465"void __ovld vstorea_half4_rtz(float4 data, size_t offset, __global half *p);\n"
40466"void __ovld vstorea_half8_rtz(float8 data, size_t offset, __global half *p);\n"
40467"void __ovld vstorea_half16_rtz(float16 data, size_t offset, __global half *p);\n"
40468"\n"
40469"void __ovld vstorea_half_rtp(float data, size_t offset, __global half *p);\n"
40470"void __ovld vstorea_half2_rtp(float2 data, size_t offset, __global half *p);\n"
40471"void __ovld vstorea_half3_rtp(float3 data, size_t offset, __global half *p);\n"
40472"void __ovld vstorea_half4_rtp(float4 data, size_t offset, __global half *p);\n"
40473"void __ovld vstorea_half8_rtp(float8 data, size_t offset, __global half *p);\n"
40474"void __ovld vstorea_half16_rtp(float16 data, size_t offset, __global half *p);\n"
40475"\n"
40476"void __ovld vstorea_half_rtn(float data, size_t offset, __global half *p);\n"
40477"void __ovld vstorea_half2_rtn(float2 data, size_t offset, __global half *p);\n"
40478"void __ovld vstorea_half3_rtn(float3 data, size_t offset, __global half *p);\n"
40479"void __ovld vstorea_half4_rtn(float4 data, size_t offset, __global half *p);\n"
40480"void __ovld vstorea_half8_rtn(float8 data, size_t offset, __global half *p);\n"
40481"void __ovld vstorea_half16_rtn(float16 data, size_t offset, __global half *p);\n"
40482"\n"
40483"void __ovld vstorea_half(float data, size_t offset, __local half *p);\n"
40484"void __ovld vstorea_half2(float2 data, size_t offset, __local half *p);\n"
40485"void __ovld vstorea_half3(float3 data, size_t offset, __local half *p);\n"
40486"void __ovld vstorea_half4(float4 data, size_t offset, __local half *p);\n"
40487"void __ovld vstorea_half8(float8 data, size_t offset, __local half *p);\n"
40488"void __ovld vstorea_half16(float16 data, size_t offset, __local half *p);\n"
40489"\n"
40490"void __ovld vstorea_half_rte(float data, size_t offset, __local half *p);\n"
40491"void __ovld vstorea_half2_rte(float2 data, size_t offset, __local half *p);\n"
40492"void __ovld vstorea_half3_rte(float3 data, size_t offset, __local half *p);\n"
40493"void __ovld vstorea_half4_rte(float4 data, size_t offset, __local half *p);\n"
40494"void __ovld vstorea_half8_rte(float8 data, size_t offset, __local half *p);\n"
40495"void __ovld vstorea_half16_rte(float16 data, size_t offset, __local half *p);\n"
40496"\n"
40497"void __ovld vstorea_half_rtz(float data, size_t offset, __local half *p);\n"
40498"void __ovld vstorea_half2_rtz(float2 data, size_t offset, __local half *p);\n"
40499"void __ovld vstorea_half3_rtz(float3 data, size_t offset, __local half *p);\n"
40500"void __ovld vstorea_half4_rtz(float4 data, size_t offset, __local half *p);\n"
40501"void __ovld vstorea_half8_rtz(float8 data, size_t offset, __local half *p);\n"
40502"void __ovld vstorea_half16_rtz(float16 data, size_t offset, __local half *p);\n"
40503"\n"
40504"void __ovld vstorea_half_rtp(float data, size_t offset, __local half *p);\n"
40505"void __ovld vstorea_half2_rtp(float2 data, size_t offset, __local half *p);\n"
40506"void __ovld vstorea_half3_rtp(float3 data, size_t offset, __local half *p);\n"
40507"void __ovld vstorea_half4_rtp(float4 data, size_t offset, __local half *p);\n"
40508"void __ovld vstorea_half8_rtp(float8 data, size_t offset, __local half *p);\n"
40509"void __ovld vstorea_half16_rtp(float16 data, size_t offset, __local half *p);\n"
40510"\n"
40511"void __ovld vstorea_half_rtn(float data, size_t offset, __local half *p);\n"
40512"void __ovld vstorea_half2_rtn(float2 data, size_t offset, __local half *p);\n"
40513"void __ovld vstorea_half3_rtn(float3 data, size_t offset, __local half *p);\n"
40514"void __ovld vstorea_half4_rtn(float4 data, size_t offset, __local half *p);\n"
40515"void __ovld vstorea_half8_rtn(float8 data, size_t offset, __local half *p);\n"
40516"void __ovld vstorea_half16_rtn(float16 data, size_t offset, __local half *p);\n"
40517"\n"
40518"void __ovld vstorea_half(float data, size_t offset, __private half *p);\n"
40519"void __ovld vstorea_half2(float2 data, size_t offset, __private half *p);\n"
40520"void __ovld vstorea_half3(float3 data, size_t offset, __private half *p);\n"
40521"void __ovld vstorea_half4(float4 data, size_t offset, __private half *p);\n"
40522"void __ovld vstorea_half8(float8 data, size_t offset, __private half *p);\n"
40523"void __ovld vstorea_half16(float16 data, size_t offset, __private half *p);\n"
40524"\n"
40525"void __ovld vstorea_half_rte(float data, size_t offset, __private half *p);\n"
40526"void __ovld vstorea_half2_rte(float2 data, size_t offset, __private half *p);\n"
40527"void __ovld vstorea_half3_rte(float3 data, size_t offset, __private half *p);\n"
40528"void __ovld vstorea_half4_rte(float4 data, size_t offset, __private half *p);\n"
40529"void __ovld vstorea_half8_rte(float8 data, size_t offset, __private half *p);\n"
40530"void __ovld vstorea_half16_rte(float16 data, size_t offset, __private half *p);\n"
40531"\n"
40532"void __ovld vstorea_half_rtz(float data, size_t offset, __private half *p);\n"
40533"void __ovld vstorea_half2_rtz(float2 data, size_t offset, __private half *p);\n"
40534"void __ovld vstorea_half3_rtz(float3 data, size_t offset, __private half *p);\n"
40535"void __ovld vstorea_half4_rtz(float4 data, size_t offset, __private half *p);\n"
40536"void __ovld vstorea_half8_rtz(float8 data, size_t offset, __private half *p);\n"
40537"void __ovld vstorea_half16_rtz(float16 data, size_t offset, __private half *p);\n"
40538"\n"
40539"void __ovld vstorea_half_rtp(float data, size_t offset, __private half *p);\n"
40540"void __ovld vstorea_half2_rtp(float2 data, size_t offset, __private half *p);\n"
40541"void __ovld vstorea_half3_rtp(float3 data, size_t offset, __private half *p);\n"
40542"void __ovld vstorea_half4_rtp(float4 data, size_t offset, __private half *p);\n"
40543"void __ovld vstorea_half8_rtp(float8 data, size_t offset, __private half *p);\n"
40544"void __ovld vstorea_half16_rtp(float16 data, size_t offset, __private half *p);\n"
40545"\n"
40546"void __ovld vstorea_half_rtn(float data, size_t offset, __private half *p);\n"
40547"void __ovld vstorea_half2_rtn(float2 data, size_t offset, __private half *p);\n"
40548"void __ovld vstorea_half3_rtn(float3 data, size_t offset, __private half *p);\n"
40549"void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p);\n"
40550"void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p);\n"
40551"void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p);\n"
40552"\n"
40553"#ifdef cl_khr_fp64\n"
40554"void __ovld vstorea_half(double data, size_t offset, __global half *p);\n"
40555"void __ovld vstorea_half2(double2 data, size_t offset, __global half *p);\n"
40556"void __ovld vstorea_half3(double3 data, size_t offset, __global half *p);\n"
40557"void __ovld vstorea_half4(double4 data, size_t offset, __global half *p);\n"
40558"void __ovld vstorea_half8(double8 data, size_t offset, __global half *p);\n"
40559"void __ovld vstorea_half16(double16 data, size_t offset, __global half *p);\n"
40560"\n"
40561"void __ovld vstorea_half_rte(double data, size_t offset, __global half *p);\n"
40562"void __ovld vstorea_half2_rte(double2 data, size_t offset, __global half *p);\n"
40563"void __ovld vstorea_half3_rte(double3 data, size_t offset, __global half *p);\n"
40564"void __ovld vstorea_half4_rte(double4 data, size_t offset, __global half *p);\n"
40565"void __ovld vstorea_half8_rte(double8 data, size_t offset, __global half *p);\n"
40566"void __ovld vstorea_half16_rte(double16 data, size_t offset, __global half *p);\n"
40567"\n"
40568"void __ovld vstorea_half_rtz(double data, size_t offset, __global half *p);\n"
40569"void __ovld vstorea_half2_rtz(double2 data, size_t offset, __global half *p);\n"
40570"void __ovld vstorea_half3_rtz(double3 data, size_t offset, __global half *p);\n"
40571"void __ovld vstorea_half4_rtz(double4 data, size_t offset, __global half *p);\n"
40572"void __ovld vstorea_half8_rtz(double8 data, size_t offset, __global half *p);\n"
40573"void __ovld vstorea_half16_rtz(double16 data, size_t offset, __global half *p);\n"
40574"\n"
40575"void __ovld vstorea_half_rtp(double data, size_t offset, __global half *p);\n"
40576"void __ovld vstorea_half2_rtp(double2 data, size_t offset, __global half *p);\n"
40577"void __ovld vstorea_half3_rtp(double3 data, size_t offset, __global half *p);\n"
40578"void __ovld vstorea_half4_rtp(double4 data, size_t offset, __global half *p);\n"
40579"void __ovld vstorea_half8_rtp(double8 data, size_t offset, __global half *p);\n"
40580"void __ovld vstorea_half16_rtp(double16 data, size_t offset, __global half *p);\n"
40581"\n"
40582"void __ovld vstorea_half_rtn(double data, size_t offset, __global half *p);\n"
40583"void __ovld vstorea_half2_rtn(double2 data, size_t offset, __global half *p);\n"
40584"void __ovld vstorea_half3_rtn(double3 data, size_t offset, __global half *p);\n"
40585"void __ovld vstorea_half4_rtn(double4 data, size_t offset, __global half *p);\n"
40586"void __ovld vstorea_half8_rtn(double8 data, size_t offset, __global half *p);\n"
40587"void __ovld vstorea_half16_rtn(double16 data, size_t offset, __global half *p);\n"
40588"\n"
40589"void __ovld vstorea_half(double data, size_t offset, __local half *p);\n"
40590"void __ovld vstorea_half2(double2 data, size_t offset, __local half *p);\n"
40591"void __ovld vstorea_half3(double3 data, size_t offset, __local half *p);\n"
40592"void __ovld vstorea_half4(double4 data, size_t offset, __local half *p);\n"
40593"void __ovld vstorea_half8(double8 data, size_t offset, __local half *p);\n"
40594"void __ovld vstorea_half16(double16 data, size_t offset, __local half *p);\n"
40595"\n"
40596"void __ovld vstorea_half_rte(double data, size_t offset, __local half *p);\n"
40597"void __ovld vstorea_half2_rte(double2 data, size_t offset, __local half *p);\n"
40598"void __ovld vstorea_half3_rte(double3 data, size_t offset, __local half *p);\n"
40599"void __ovld vstorea_half4_rte(double4 data, size_t offset, __local half *p);\n"
40600"void __ovld vstorea_half8_rte(double8 data, size_t offset, __local half *p);\n"
40601"void __ovld vstorea_half16_rte(double16 data, size_t offset, __local half *p);\n"
40602"\n"
40603"void __ovld vstorea_half_rtz(double data, size_t offset, __local half *p);\n"
40604"void __ovld vstorea_half2_rtz(double2 data, size_t offset, __local half *p);\n"
40605"void __ovld vstorea_half3_rtz(double3 data, size_t offset, __local half *p);\n"
40606"void __ovld vstorea_half4_rtz(double4 data, size_t offset, __local half *p);\n"
40607"void __ovld vstorea_half8_rtz(double8 data, size_t offset, __local half *p);\n"
40608"void __ovld vstorea_half16_rtz(double16 data, size_t offset, __local half *p);\n"
40609"\n"
40610"void __ovld vstorea_half_rtp(double data, size_t offset, __local half *p);\n"
40611"void __ovld vstorea_half2_rtp(double2 data, size_t offset, __local half *p);\n"
40612"void __ovld vstorea_half3_rtp(double3 data, size_t offset, __local half *p);\n"
40613"void __ovld vstorea_half4_rtp(double4 data, size_t offset, __local half *p);\n"
40614"void __ovld vstorea_half8_rtp(double8 data, size_t offset, __local half *p);\n"
40615"void __ovld vstorea_half16_rtp(double16 data, size_t offset, __local half *p);\n"
40616"\n"
40617"void __ovld vstorea_half_rtn(double data, size_t offset, __local half *p);\n"
40618"void __ovld vstorea_half2_rtn(double2 data, size_t offset, __local half *p);\n"
40619"void __ovld vstorea_half3_rtn(double3 data, size_t offset, __local half *p);\n"
40620"void __ovld vstorea_half4_rtn(double4 data, size_t offset, __local half *p);\n"
40621"void __ovld vstorea_half8_rtn(double8 data, size_t offset, __local half *p);\n"
40622"void __ovld vstorea_half16_rtn(double16 data, size_t offset, __local half *p);\n"
40623"\n"
40624"void __ovld vstorea_half(double data, size_t offset, __private half *p);\n"
40625"void __ovld vstorea_half2(double2 data, size_t offset, __private half *p);\n"
40626"void __ovld vstorea_half3(double3 data, size_t offset, __private half *p);\n"
40627"void __ovld vstorea_half4(double4 data, size_t offset, __private half *p);\n"
40628"void __ovld vstorea_half8(double8 data, size_t offset, __private half *p);\n"
40629"void __ovld vstorea_half16(double16 data, size_t offset, __private half *p);\n"
40630"\n"
40631"void __ovld vstorea_half_rte(double data, size_t offset, __private half *p);\n"
40632"void __ovld vstorea_half2_rte(double2 data, size_t offset, __private half *p);\n"
40633"void __ovld vstorea_half3_rte(double3 data, size_t offset, __private half *p);\n"
40634"void __ovld vstorea_half4_rte(double4 data, size_t offset, __private half *p);\n"
40635"void __ovld vstorea_half8_rte(double8 data, size_t offset, __private half *p);\n"
40636"void __ovld vstorea_half16_rte(double16 data, size_t offset, __private half *p);\n"
40637"\n"
40638"void __ovld vstorea_half_rtz(double data, size_t offset, __private half *p);\n"
40639"void __ovld vstorea_half2_rtz(double2 data, size_t offset, __private half *p);\n"
40640"void __ovld vstorea_half3_rtz(double3 data, size_t offset, __private half *p);\n"
40641"void __ovld vstorea_half4_rtz(double4 data, size_t offset, __private half *p);\n"
40642"void __ovld vstorea_half8_rtz(double8 data, size_t offset, __private half *p);\n"
40643"void __ovld vstorea_half16_rtz(double16 data, size_t offset, __private half *p);\n"
40644"\n"
40645"void __ovld vstorea_half_rtp(double data, size_t offset, __private half *p);\n"
40646"void __ovld vstorea_half2_rtp(double2 data, size_t offset, __private half *p);\n"
40647"void __ovld vstorea_half3_rtp(double3 data, size_t offset, __private half *p);\n"
40648"void __ovld vstorea_half4_rtp(double4 data, size_t offset, __private half *p);\n"
40649"void __ovld vstorea_half8_rtp(double8 data, size_t offset, __private half *p);\n"
40650"void __ovld vstorea_half16_rtp(double16 data, size_t offset, __private half *p);\n"
40651"\n"
40652"void __ovld vstorea_half_rtn(double data, size_t offset, __private half *p);\n"
40653"void __ovld vstorea_half2_rtn(double2 data,size_t offset, __private half *p);\n"
40654"void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p);\n"
40655"void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);\n"
40656"void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p);\n"
40657"void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);\n"
40658"#endif //cl_khr_fp64\n"
40659"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40660"\n"
40661"// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions\n"
40662"\n"
40663"// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence\n"
40664"typedef uint cl_mem_fence_flags;\n"
40665"\n"
40666"/**\n"
40667" * Queue a memory fence to ensure correct\n"
40668" * ordering of memory operations to local memory\n"
40669" */\n"
40670"#define CLK_LOCAL_MEM_FENCE 0x01\n"
40671"\n"
40672"/**\n"
40673" * Queue a memory fence to ensure correct\n"
40674" * ordering of memory operations to global memory\n"
40675" */\n"
40676"#define CLK_GLOBAL_MEM_FENCE 0x02\n"
40677"\n"
40678"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40679"/**\n"
40680" * Queue a memory fence to ensure correct ordering of memory\n"
40681" * operations between work-items of a work-group to\n"
40682" * image memory.\n"
40683" */\n"
40684"#define CLK_IMAGE_MEM_FENCE 0x04\n"
40685"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40686"\n"
40687"/**\n"
40688" * All work-items in a work-group executing the kernel\n"
40689" * on a processor must execute this function before any\n"
40690" * are allowed to continue execution beyond the barrier.\n"
40691" * This function must be encountered by all work-items in\n"
40692" * a work-group executing the kernel.\n"
40693" * If barrier is inside a conditional statement, then all\n"
40694" * work-items must enter the conditional if any work-item\n"
40695" * enters the conditional statement and executes the\n"
40696" * barrier.\n"
40697" * If barrer is inside a loop, all work-items must execute\n"
40698" * the barrier for each iteration of the loop before any are\n"
40699" * allowed to continue execution beyond the barrier.\n"
40700" * The barrier function also queues a memory fence\n"
40701" * (reads and writes) to ensure correct ordering of\n"
40702" * memory operations to local or global memory.\n"
40703" * The flags argument specifies the memory address space\n"
40704" * and can be set to a combination of the following literal\n"
40705" * values.\n"
40706" * CLK_LOCAL_MEM_FENCE - The barrier function\n"
40707" * will either flush any variables stored in local memory\n"
40708" * or queue a memory fence to ensure correct ordering of\n"
40709" * memory operations to local memory.\n"
40710" * CLK_GLOBAL_MEM_FENCE - The barrier function\n"
40711" * will queue a memory fence to ensure correct ordering\n"
40712" * of memory operations to global memory. This can be\n"
40713" * useful when work-items, for example, write to buffer or\n"
40714" * image objects and then want to read the updated data.\n"
40715" */\n"
40716"\n"
40717"void __ovld __conv barrier(cl_mem_fence_flags flags);\n"
40718"\n"
40719"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40720"\n"
40721"typedef enum memory_scope {\n"
40722" memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,\n"
40723" memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,\n"
40724" memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,\n"
40725" memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,\n"
40726"#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n"
40727" memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP\n"
40728"#endif\n"
40729"} memory_scope;\n"
40730"\n"
40731"void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n"
40732"void __ovld __conv work_group_barrier(cl_mem_fence_flags flags);\n"
40733"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40734"\n"
40735"// OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions\n"
40736"\n"
40737"/**\n"
40738" * Orders loads and stores of a work-item\n"
40739" * executing a kernel. This means that loads\n"
40740" * and stores preceding the mem_fence will\n"
40741" * be committed to memory before any loads\n"
40742" * and stores following the mem_fence.\n"
40743" * The flags argument specifies the memory\n"
40744" * address space and can be set to a\n"
40745" * combination of the following literal\n"
40746" * values:\n"
40747" * CLK_LOCAL_MEM_FENCE\n"
40748" * CLK_GLOBAL_MEM_FENCE.\n"
40749" */\n"
40750"void __ovld mem_fence(cl_mem_fence_flags flags);\n"
40751"\n"
40752"/**\n"
40753" * Read memory barrier that orders only\n"
40754" * loads.\n"
40755" * The flags argument specifies the memory\n"
40756" * address space and can be set to a\n"
40757" * combination of the following literal\n"
40758" * values:\n"
40759" * CLK_LOCAL_MEM_FENCE\n"
40760" * CLK_GLOBAL_MEM_FENCE.\n"
40761" */\n"
40762"void __ovld read_mem_fence(cl_mem_fence_flags flags);\n"
40763"\n"
40764"/**\n"
40765" * Write memory barrier that orders only\n"
40766" * stores.\n"
40767" * The flags argument specifies the memory\n"
40768" * address space and can be set to a\n"
40769" * combination of the following literal\n"
40770" * values:\n"
40771" * CLK_LOCAL_MEM_FENCE\n"
40772" * CLK_GLOBAL_MEM_FENCE.\n"
40773" */\n"
40774"void __ovld write_mem_fence(cl_mem_fence_flags flags);\n"
40775"\n"
40776"// OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions\n"
40777"\n"
40778"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40779"cl_mem_fence_flags __ovld get_fence(const void *ptr);\n"
40780"cl_mem_fence_flags __ovld get_fence(void *ptr);\n"
40781"\n"
40782"/**\n"
40783" * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions\n"
40784" * and checked in Sema since they should be declared as\n"
40785" * addr gentype* to_addr (gentype*);\n"
40786" * where gentype is builtin type or user defined type.\n"
40787" */\n"
40788"\n"
40789"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40790"\n"
40791"// OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch\n"
40792"\n"
40793"/**\n"
40794" * event_t async_work_group_copy (\n"
40795" * __global gentype *dst,\n"
40796" * const __local gentype *src,\n"
40797" * size_t num_elements,\n"
40798" * event_t event)\n"
40799" * Perform an async copy of num_elements\n"
40800" * gentype elements from src to dst. The async\n"
40801" * copy is performed by all work-items in a workgroup\n"
40802" * and this built-in function must therefore\n"
40803" * be encountered by all work-items in a workgroup\n"
40804" * executing the kernel with the same\n"
40805" * argument values; otherwise the results are\n"
40806" * undefined.\n"
40807" * Returns an event object that can be used by\n"
40808" * wait_group_events to wait for the async copy\n"
40809" * to finish. The event argument can also be used\n"
40810" * to associate the async_work_group_copy with\n"
40811" * a previous async copy allowing an event to be\n"
40812" * shared by multiple async copies; otherwise event\n"
40813" * should be zero.\n"
40814" * If event argument is non-zero, the event object\n"
40815" * supplied in event argument will be returned.\n"
40816" * This function does not perform any implicit\n"
40817" * synchronization of source data such as using a\n"
40818" * barrier before performing the copy.\n"
40819" */\n"
40820"event_t __ovld async_work_group_copy(__local char *dst, const __global char *src, size_t num_elements, event_t event);\n"
40821"event_t __ovld async_work_group_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, event_t event);\n"
40822"event_t __ovld async_work_group_copy(__local short *dst, const __global short *src, size_t num_elements, event_t event);\n"
40823"event_t __ovld async_work_group_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, event_t event);\n"
40824"event_t __ovld async_work_group_copy(__local int *dst, const __global int *src, size_t num_elements, event_t event);\n"
40825"event_t __ovld async_work_group_copy(__local uint *dst, const __global uint *src, size_t num_elements, event_t event);\n"
40826"event_t __ovld async_work_group_copy(__local long *dst, const __global long *src, size_t num_elements, event_t event);\n"
40827"event_t __ovld async_work_group_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, event_t event);\n"
40828"event_t __ovld async_work_group_copy(__local float *dst, const __global float *src, size_t num_elements, event_t event);\n"
40829"event_t __ovld async_work_group_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, event_t event);\n"
40830"event_t __ovld async_work_group_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, event_t event);\n"
40831"event_t __ovld async_work_group_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, event_t event);\n"
40832"event_t __ovld async_work_group_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, event_t event);\n"
40833"event_t __ovld async_work_group_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, event_t event);\n"
40834"event_t __ovld async_work_group_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, event_t event);\n"
40835"event_t __ovld async_work_group_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, event_t event);\n"
40836"event_t __ovld async_work_group_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, event_t event);\n"
40837"event_t __ovld async_work_group_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, event_t event);\n"
40838"event_t __ovld async_work_group_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, event_t event);\n"
40839"event_t __ovld async_work_group_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, event_t event);\n"
40840"event_t __ovld async_work_group_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, event_t event);\n"
40841"event_t __ovld async_work_group_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, event_t event);\n"
40842"event_t __ovld async_work_group_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, event_t event);\n"
40843"event_t __ovld async_work_group_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, event_t event);\n"
40844"event_t __ovld async_work_group_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, event_t event);\n"
40845"event_t __ovld async_work_group_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, event_t event);\n"
40846"event_t __ovld async_work_group_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, event_t event);\n"
40847"event_t __ovld async_work_group_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, event_t event);\n"
40848"event_t __ovld async_work_group_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, event_t event);\n"
40849"event_t __ovld async_work_group_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, event_t event);\n"
40850"event_t __ovld async_work_group_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, event_t event);\n"
40851"event_t __ovld async_work_group_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, event_t event);\n"
40852"event_t __ovld async_work_group_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, event_t event);\n"
40853"event_t __ovld async_work_group_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, event_t event);\n"
40854"event_t __ovld async_work_group_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, event_t event);\n"
40855"event_t __ovld async_work_group_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, event_t event);\n"
40856"event_t __ovld async_work_group_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, event_t event);\n"
40857"event_t __ovld async_work_group_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, event_t event);\n"
40858"event_t __ovld async_work_group_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, event_t event);\n"
40859"event_t __ovld async_work_group_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, event_t event);\n"
40860"event_t __ovld async_work_group_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, event_t event);\n"
40861"event_t __ovld async_work_group_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, event_t event);\n"
40862"event_t __ovld async_work_group_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, event_t event);\n"
40863"event_t __ovld async_work_group_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, event_t event);\n"
40864"event_t __ovld async_work_group_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, event_t event);\n"
40865"event_t __ovld async_work_group_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, event_t event);\n"
40866"event_t __ovld async_work_group_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, event_t event);\n"
40867"event_t __ovld async_work_group_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, event_t event);\n"
40868"event_t __ovld async_work_group_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, event_t event);\n"
40869"event_t __ovld async_work_group_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, event_t event);\n"
40870"event_t __ovld async_work_group_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, event_t event);\n"
40871"event_t __ovld async_work_group_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, event_t event);\n"
40872"event_t __ovld async_work_group_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, event_t event);\n"
40873"event_t __ovld async_work_group_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, event_t event);\n"
40874"event_t __ovld async_work_group_copy(__global char *dst, const __local char *src, size_t num_elements, event_t event);\n"
40875"event_t __ovld async_work_group_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, event_t event);\n"
40876"event_t __ovld async_work_group_copy(__global short *dst, const __local short *src, size_t num_elements, event_t event);\n"
40877"event_t __ovld async_work_group_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, event_t event);\n"
40878"event_t __ovld async_work_group_copy(__global int *dst, const __local int *src, size_t num_elements, event_t event);\n"
40879"event_t __ovld async_work_group_copy(__global uint *dst, const __local uint *src, size_t num_elements, event_t event);\n"
40880"event_t __ovld async_work_group_copy(__global long *dst, const __local long *src, size_t num_elements, event_t event);\n"
40881"event_t __ovld async_work_group_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, event_t event);\n"
40882"event_t __ovld async_work_group_copy(__global float *dst, const __local float *src, size_t num_elements, event_t event);\n"
40883"event_t __ovld async_work_group_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, event_t event);\n"
40884"event_t __ovld async_work_group_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, event_t event);\n"
40885"event_t __ovld async_work_group_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, event_t event);\n"
40886"event_t __ovld async_work_group_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, event_t event);\n"
40887"event_t __ovld async_work_group_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, event_t event);\n"
40888"event_t __ovld async_work_group_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, event_t event);\n"
40889"event_t __ovld async_work_group_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, event_t event);\n"
40890"event_t __ovld async_work_group_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, event_t event);\n"
40891"event_t __ovld async_work_group_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, event_t event);\n"
40892"event_t __ovld async_work_group_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, event_t event);\n"
40893"event_t __ovld async_work_group_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, event_t event);\n"
40894"event_t __ovld async_work_group_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, event_t event);\n"
40895"event_t __ovld async_work_group_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, event_t event);\n"
40896"event_t __ovld async_work_group_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, event_t event);\n"
40897"event_t __ovld async_work_group_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, event_t event);\n"
40898"event_t __ovld async_work_group_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, event_t event);\n"
40899"event_t __ovld async_work_group_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, event_t event);\n"
40900"event_t __ovld async_work_group_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, event_t event);\n"
40901"event_t __ovld async_work_group_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, event_t event);\n"
40902"event_t __ovld async_work_group_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, event_t event);\n"
40903"event_t __ovld async_work_group_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, event_t event);\n"
40904"event_t __ovld async_work_group_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, event_t event);\n"
40905"event_t __ovld async_work_group_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, event_t event);\n"
40906"event_t __ovld async_work_group_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, event_t event);\n"
40907"event_t __ovld async_work_group_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, event_t event);\n"
40908"event_t __ovld async_work_group_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, event_t event);\n"
40909"event_t __ovld async_work_group_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, event_t event);\n"
40910"event_t __ovld async_work_group_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, event_t event);\n"
40911"event_t __ovld async_work_group_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, event_t event);\n"
40912"event_t __ovld async_work_group_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, event_t event);\n"
40913"event_t __ovld async_work_group_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, event_t event);\n"
40914"event_t __ovld async_work_group_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, event_t event);\n"
40915"event_t __ovld async_work_group_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, event_t event);\n"
40916"event_t __ovld async_work_group_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, event_t event);\n"
40917"event_t __ovld async_work_group_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, event_t event);\n"
40918"event_t __ovld async_work_group_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, event_t event);\n"
40919"event_t __ovld async_work_group_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, event_t event);\n"
40920"event_t __ovld async_work_group_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, event_t event);\n"
40921"event_t __ovld async_work_group_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, event_t event);\n"
40922"event_t __ovld async_work_group_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, event_t event);\n"
40923"event_t __ovld async_work_group_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, event_t event);\n"
40924"event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, event_t event);\n"
40925"event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event);\n"
40926"event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event);\n"
40927"event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event);\n"
40928"#ifdef cl_khr_fp64\n"
40929"event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event);\n"
40930"event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event);\n"
40931"event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event);\n"
40932"event_t __ovld async_work_group_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, event_t event);\n"
40933"event_t __ovld async_work_group_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, event_t event);\n"
40934"event_t __ovld async_work_group_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, event_t event);\n"
40935"event_t __ovld async_work_group_copy(__global double *dst, const __local double *src, size_t num_elements, event_t event);\n"
40936"event_t __ovld async_work_group_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, event_t event);\n"
40937"event_t __ovld async_work_group_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, event_t event);\n"
40938"event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event);\n"
40939"event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event);\n"
40940"event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event);\n"
40941"#endif //cl_khr_fp64\n"
40942"#ifdef cl_khr_fp16\n"
40943"event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event);\n"
40944"event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event);\n"
40945"event_t __ovld async_work_group_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, event_t event);\n"
40946"event_t __ovld async_work_group_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, event_t event);\n"
40947"event_t __ovld async_work_group_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, event_t event);\n"
40948"event_t __ovld async_work_group_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, event_t event);\n"
40949"event_t __ovld async_work_group_copy(__global half *dst, const __local half *src, size_t num_elements, event_t event);\n"
40950"event_t __ovld async_work_group_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, event_t event);\n"
40951"event_t __ovld async_work_group_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, event_t event);\n"
40952"event_t __ovld async_work_group_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, event_t event);\n"
40953"event_t __ovld async_work_group_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, event_t event);\n"
40954"event_t __ovld async_work_group_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, event_t event);\n"
40955"#endif //cl_khr_fp16\n"
40956"\n"
40957"/**\n"
40958" * Perform an async gather of num_elements\n"
40959" * gentype elements from src to dst. The\n"
40960" * src_stride is the stride in elements for each\n"
40961" * gentype element read from src. The dst_stride\n"
40962" * is the stride in elements for each gentype\n"
40963" * element written to dst. The async gather is\n"
40964" * performed by all work-items in a work-group.\n"
40965" * This built-in function must therefore be\n"
40966" * encountered by all work-items in a work-group\n"
40967" * executing the kernel with the same argument\n"
40968" * values; otherwise the results are undefined.\n"
40969" * Returns an event object that can be used by\n"
40970" * wait_group_events to wait for the async copy\n"
40971" * to finish. The event argument can also be used\n"
40972" * to associate the\n"
40973" * async_work_group_strided_copy with a\n"
40974" * previous async copy allowing an event to be\n"
40975" * shared by multiple async copies; otherwise event\n"
40976" * should be zero.\n"
40977" * If event argument is non-zero, the event object\n"
40978" * supplied in event argument will be returned.\n"
40979" * This function does not perform any implicit\n"
40980" * synchronization of source data such as using a\n"
40981" * barrier before performing the copy.\n"
40982" */\n"
40983"event_t __ovld async_work_group_strided_copy(__local char *dst, const __global char *src, size_t num_elements, size_t src_stride, event_t event);\n"
40984"event_t __ovld async_work_group_strided_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, size_t src_stride, event_t event);\n"
40985"event_t __ovld async_work_group_strided_copy(__local short *dst, const __global short *src, size_t num_elements, size_t src_stride, event_t event);\n"
40986"event_t __ovld async_work_group_strided_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, size_t src_stride, event_t event);\n"
40987"event_t __ovld async_work_group_strided_copy(__local int *dst, const __global int *src, size_t num_elements, size_t src_stride, event_t event);\n"
40988"event_t __ovld async_work_group_strided_copy(__local uint *dst, const __global uint *src, size_t num_elements, size_t src_stride, event_t event);\n"
40989"event_t __ovld async_work_group_strided_copy(__local long *dst, const __global long *src, size_t num_elements, size_t src_stride, event_t event);\n"
40990"event_t __ovld async_work_group_strided_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, size_t src_stride, event_t event);\n"
40991"event_t __ovld async_work_group_strided_copy(__local float *dst, const __global float *src, size_t num_elements, size_t src_stride, event_t event);\n"
40992"event_t __ovld async_work_group_strided_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40993"event_t __ovld async_work_group_strided_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40994"event_t __ovld async_work_group_strided_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40995"event_t __ovld async_work_group_strided_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40996"event_t __ovld async_work_group_strided_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40997"event_t __ovld async_work_group_strided_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40998"event_t __ovld async_work_group_strided_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40999"event_t __ovld async_work_group_strided_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41000"event_t __ovld async_work_group_strided_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41001"event_t __ovld async_work_group_strided_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41002"event_t __ovld async_work_group_strided_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41003"event_t __ovld async_work_group_strided_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41004"event_t __ovld async_work_group_strided_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41005"event_t __ovld async_work_group_strided_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41006"event_t __ovld async_work_group_strided_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41007"event_t __ovld async_work_group_strided_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41008"event_t __ovld async_work_group_strided_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41009"event_t __ovld async_work_group_strided_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41010"event_t __ovld async_work_group_strided_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41011"event_t __ovld async_work_group_strided_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41012"event_t __ovld async_work_group_strided_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41013"event_t __ovld async_work_group_strided_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41014"event_t __ovld async_work_group_strided_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41015"event_t __ovld async_work_group_strided_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41016"event_t __ovld async_work_group_strided_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41017"event_t __ovld async_work_group_strided_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41018"event_t __ovld async_work_group_strided_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41019"event_t __ovld async_work_group_strided_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41020"event_t __ovld async_work_group_strided_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41021"event_t __ovld async_work_group_strided_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41022"event_t __ovld async_work_group_strided_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41023"event_t __ovld async_work_group_strided_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41024"event_t __ovld async_work_group_strided_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41025"event_t __ovld async_work_group_strided_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41026"event_t __ovld async_work_group_strided_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41027"event_t __ovld async_work_group_strided_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41028"event_t __ovld async_work_group_strided_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41029"event_t __ovld async_work_group_strided_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41030"event_t __ovld async_work_group_strided_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41031"event_t __ovld async_work_group_strided_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41032"event_t __ovld async_work_group_strided_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41033"event_t __ovld async_work_group_strided_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41034"event_t __ovld async_work_group_strided_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41035"event_t __ovld async_work_group_strided_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41036"event_t __ovld async_work_group_strided_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41037"event_t __ovld async_work_group_strided_copy(__global char *dst, const __local char *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41038"event_t __ovld async_work_group_strided_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41039"event_t __ovld async_work_group_strided_copy(__global short *dst, const __local short *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41040"event_t __ovld async_work_group_strided_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41041"event_t __ovld async_work_group_strided_copy(__global int *dst, const __local int *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41042"event_t __ovld async_work_group_strided_copy(__global uint *dst, const __local uint *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41043"event_t __ovld async_work_group_strided_copy(__global long *dst, const __local long *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41044"event_t __ovld async_work_group_strided_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41045"event_t __ovld async_work_group_strided_copy(__global float *dst, const __local float *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41046"event_t __ovld async_work_group_strided_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41047"event_t __ovld async_work_group_strided_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41048"event_t __ovld async_work_group_strided_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41049"event_t __ovld async_work_group_strided_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41050"event_t __ovld async_work_group_strided_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41051"event_t __ovld async_work_group_strided_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41052"event_t __ovld async_work_group_strided_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41053"event_t __ovld async_work_group_strided_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41054"event_t __ovld async_work_group_strided_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41055"event_t __ovld async_work_group_strided_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41056"event_t __ovld async_work_group_strided_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41057"event_t __ovld async_work_group_strided_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41058"event_t __ovld async_work_group_strided_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41059"event_t __ovld async_work_group_strided_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41060"event_t __ovld async_work_group_strided_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41061"event_t __ovld async_work_group_strided_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41062"event_t __ovld async_work_group_strided_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41063"event_t __ovld async_work_group_strided_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41064"event_t __ovld async_work_group_strided_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41065"event_t __ovld async_work_group_strided_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41066"event_t __ovld async_work_group_strided_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41067"event_t __ovld async_work_group_strided_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41068"event_t __ovld async_work_group_strided_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41069"event_t __ovld async_work_group_strided_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41070"event_t __ovld async_work_group_strided_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41071"event_t __ovld async_work_group_strided_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41072"event_t __ovld async_work_group_strided_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41073"event_t __ovld async_work_group_strided_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41074"event_t __ovld async_work_group_strided_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41075"event_t __ovld async_work_group_strided_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41076"event_t __ovld async_work_group_strided_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41077"event_t __ovld async_work_group_strided_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41078"event_t __ovld async_work_group_strided_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41079"event_t __ovld async_work_group_strided_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41080"event_t __ovld async_work_group_strided_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41081"event_t __ovld async_work_group_strided_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41082"event_t __ovld async_work_group_strided_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41083"event_t __ovld async_work_group_strided_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41084"event_t __ovld async_work_group_strided_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41085"event_t __ovld async_work_group_strided_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41086"event_t __ovld async_work_group_strided_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41087"event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41088"event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41089"event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41090"event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41091"#ifdef cl_khr_fp64\n"
41092"event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event);\n"
41093"event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41094"event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41095"event_t __ovld async_work_group_strided_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41096"event_t __ovld async_work_group_strided_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41097"event_t __ovld async_work_group_strided_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41098"event_t __ovld async_work_group_strided_copy(__global double *dst, const __local double *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41099"event_t __ovld async_work_group_strided_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41100"event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41101"event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41102"event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41103"event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41104"#endif //cl_khr_fp64\n"
41105"#ifdef cl_khr_fp16\n"
41106"event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event);\n"
41107"event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41108"event_t __ovld async_work_group_strided_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41109"event_t __ovld async_work_group_strided_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41110"event_t __ovld async_work_group_strided_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41111"event_t __ovld async_work_group_strided_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41112"event_t __ovld async_work_group_strided_copy(__global half *dst, const __local half *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41113"event_t __ovld async_work_group_strided_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41114"event_t __ovld async_work_group_strided_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41115"event_t __ovld async_work_group_strided_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41116"event_t __ovld async_work_group_strided_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41117"event_t __ovld async_work_group_strided_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41118"#endif //cl_khr_fp16\n"
41119"\n"
41120"/**\n"
41121" * Wait for events that identify the\n"
41122" * async_work_group_copy operations to\n"
41123" * complete. The event objects specified in\n"
41124" * event_list will be released after the wait is\n"
41125" * performed.\n"
41126" * This function must be encountered by all workitems\n"
41127" * in a work-group executing the kernel with\n"
41128" * the same num_events and event objects specified\n"
41129" * in event_list; otherwise the results are undefined.\n"
41130" */\n"
41131"void __ovld wait_group_events(int num_events, event_t *event_list);\n"
41132"\n"
41133"/**\n"
41134" * Prefetch num_elements * sizeof(gentype)\n"
41135" * bytes into the global cache. The prefetch\n"
41136" * instruction is applied to a work-item in a workgroup\n"
41137" * and does not affect the functional\n"
41138" * behavior of the kernel.\n"
41139" */\n"
41140"void __ovld prefetch(const __global char *p, size_t num_elements);\n"
41141"void __ovld prefetch(const __global uchar *p, size_t num_elements);\n"
41142"void __ovld prefetch(const __global short *p, size_t num_elements);\n"
41143"void __ovld prefetch(const __global ushort *p, size_t num_elements);\n"
41144"void __ovld prefetch(const __global int *p, size_t num_elements);\n"
41145"void __ovld prefetch(const __global uint *p, size_t num_elements);\n"
41146"void __ovld prefetch(const __global long *p, size_t num_elements);\n"
41147"void __ovld prefetch(const __global ulong *p, size_t num_elements);\n"
41148"void __ovld prefetch(const __global float *p, size_t num_elements);\n"
41149"void __ovld prefetch(const __global char2 *p, size_t num_elements);\n"
41150"void __ovld prefetch(const __global uchar2 *p, size_t num_elements);\n"
41151"void __ovld prefetch(const __global short2 *p, size_t num_elements);\n"
41152"void __ovld prefetch(const __global ushort2 *p, size_t num_elements);\n"
41153"void __ovld prefetch(const __global int2 *p, size_t num_elements);\n"
41154"void __ovld prefetch(const __global uint2 *p, size_t num_elements);\n"
41155"void __ovld prefetch(const __global long2 *p, size_t num_elements);\n"
41156"void __ovld prefetch(const __global ulong2 *p, size_t num_elements);\n"
41157"void __ovld prefetch(const __global float2 *p, size_t num_elements);\n"
41158"void __ovld prefetch(const __global char3 *p, size_t num_elements);\n"
41159"void __ovld prefetch(const __global uchar3 *p, size_t num_elements);\n"
41160"void __ovld prefetch(const __global short3 *p, size_t num_elements);\n"
41161"void __ovld prefetch(const __global ushort3 *p, size_t num_elements);\n"
41162"void __ovld prefetch(const __global int3 *p, size_t num_elements);\n"
41163"void __ovld prefetch(const __global uint3 *p, size_t num_elements);\n"
41164"void __ovld prefetch(const __global long3 *p, size_t num_elements);\n"
41165"void __ovld prefetch(const __global ulong3 *p, size_t num_elements);\n"
41166"void __ovld prefetch(const __global float3 *p, size_t num_elements);\n"
41167"void __ovld prefetch(const __global char4 *p, size_t num_elements);\n"
41168"void __ovld prefetch(const __global uchar4 *p, size_t num_elements);\n"
41169"void __ovld prefetch(const __global short4 *p, size_t num_elements);\n"
41170"void __ovld prefetch(const __global ushort4 *p, size_t num_elements);\n"
41171"void __ovld prefetch(const __global int4 *p, size_t num_elements);\n"
41172"void __ovld prefetch(const __global uint4 *p, size_t num_elements);\n"
41173"void __ovld prefetch(const __global long4 *p, size_t num_elements);\n"
41174"void __ovld prefetch(const __global ulong4 *p, size_t num_elements);\n"
41175"void __ovld prefetch(const __global float4 *p, size_t num_elements);\n"
41176"void __ovld prefetch(const __global char8 *p, size_t num_elements);\n"
41177"void __ovld prefetch(const __global uchar8 *p, size_t num_elements);\n"
41178"void __ovld prefetch(const __global short8 *p, size_t num_elements);\n"
41179"void __ovld prefetch(const __global ushort8 *p, size_t num_elements);\n"
41180"void __ovld prefetch(const __global int8 *p, size_t num_elements);\n"
41181"void __ovld prefetch(const __global uint8 *p, size_t num_elements);\n"
41182"void __ovld prefetch(const __global long8 *p, size_t num_elements);\n"
41183"void __ovld prefetch(const __global ulong8 *p, size_t num_elements);\n"
41184"void __ovld prefetch(const __global float8 *p, size_t num_elements);\n"
41185"void __ovld prefetch(const __global char16 *p, size_t num_elements);\n"
41186"void __ovld prefetch(const __global uchar16 *p, size_t num_elements);\n"
41187"void __ovld prefetch(const __global short16 *p, size_t num_elements);\n"
41188"void __ovld prefetch(const __global ushort16 *p, size_t num_elements);\n"
41189"void __ovld prefetch(const __global int16 *p, size_t num_elements);\n"
41190"void __ovld prefetch(const __global uint16 *p, size_t num_elements);\n"
41191"void __ovld prefetch(const __global long16 *p, size_t num_elements);\n"
41192"void __ovld prefetch(const __global ulong16 *p, size_t num_elements);\n"
41193"void __ovld prefetch(const __global float16 *p, size_t num_elements);\n"
41194"#ifdef cl_khr_fp64\n"
41195"void __ovld prefetch(const __global double *p, size_t num_elements);\n"
41196"void __ovld prefetch(const __global double2 *p, size_t num_elements);\n"
41197"void __ovld prefetch(const __global double3 *p, size_t num_elements);\n"
41198"void __ovld prefetch(const __global double4 *p, size_t num_elements);\n"
41199"void __ovld prefetch(const __global double8 *p, size_t num_elements);\n"
41200"void __ovld prefetch(const __global double16 *p, size_t num_elements);\n"
41201"#endif //cl_khr_fp64\n"
41202"#ifdef cl_khr_fp16\n"
41203"void __ovld prefetch(const __global half *p, size_t num_elements);\n"
41204"void __ovld prefetch(const __global half2 *p, size_t num_elements);\n"
41205"void __ovld prefetch(const __global half3 *p, size_t num_elements);\n"
41206"void __ovld prefetch(const __global half4 *p, size_t num_elements);\n"
41207"void __ovld prefetch(const __global half8 *p, size_t num_elements);\n"
41208"void __ovld prefetch(const __global half16 *p, size_t num_elements);\n"
41209"#endif // cl_khr_fp16\n"
41210"\n"
41211"// OpenCL v1.1 s6.11.1, v1.2 s6.12.11 - Atomic Functions\n"
41212"\n"
41213"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41214"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
41215"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
41216"#endif\n"
41217"/**\n"
41218" * Read the 32-bit value (referred to as old)\n"
41219" * stored at location pointed by p. Compute\n"
41220" * (old + val) and store result at location\n"
41221" * pointed by p. The function returns old.\n"
41222" */\n"
41223"int __ovld atomic_add(volatile __global int *p, int val);\n"
41224"unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val);\n"
41225"int __ovld atomic_add(volatile __local int *p, int val);\n"
41226"unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val);\n"
41227"\n"
41228"#if defined(cl_khr_global_int32_base_atomics)\n"
41229"int __ovld atom_add(volatile __global int *p, int val);\n"
41230"unsigned int __ovld atom_add(volatile __global unsigned int *p, unsigned int val);\n"
41231"#endif\n"
41232"#if defined(cl_khr_local_int32_base_atomics)\n"
41233"int __ovld atom_add(volatile __local int *p, int val);\n"
41234"unsigned int __ovld atom_add(volatile __local unsigned int *p, unsigned int val);\n"
41235"#endif\n"
41236"\n"
41237"#if defined(cl_khr_int64_base_atomics)\n"
41238"long __ovld atom_add(volatile __global long *p, long val);\n"
41239"unsigned long __ovld atom_add(volatile __global unsigned long *p, unsigned long val);\n"
41240"long __ovld atom_add(volatile __local long *p, long val);\n"
41241"unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long val);\n"
41242"#endif\n"
41243"\n"
41244"/**\n"
41245" * Read the 32-bit value (referred to as old) stored at location pointed by p.\n"
41246" * Compute (old - val) and store result at location pointed by p. The function\n"
41247" * returns old.\n"
41248" */\n"
41249"int __ovld atomic_sub(volatile __global int *p, int val);\n"
41250"unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val);\n"
41251"int __ovld atomic_sub(volatile __local int *p, int val);\n"
41252"unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val);\n"
41253"\n"
41254"#if defined(cl_khr_global_int32_base_atomics)\n"
41255"int __ovld atom_sub(volatile __global int *p, int val);\n"
41256"unsigned int __ovld atom_sub(volatile __global unsigned int *p, unsigned int val);\n"
41257"#endif\n"
41258"#if defined(cl_khr_local_int32_base_atomics)\n"
41259"int __ovld atom_sub(volatile __local int *p, int val);\n"
41260"unsigned int __ovld atom_sub(volatile __local unsigned int *p, unsigned int val);\n"
41261"#endif\n"
41262"\n"
41263"#if defined(cl_khr_int64_base_atomics)\n"
41264"long __ovld atom_sub(volatile __global long *p, long val);\n"
41265"unsigned long __ovld atom_sub(volatile __global unsigned long *p, unsigned long val);\n"
41266"long __ovld atom_sub(volatile __local long *p, long val);\n"
41267"unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long val);\n"
41268"#endif\n"
41269"\n"
41270"/**\n"
41271" * Swaps the old value stored at location p\n"
41272" * with new value given by val. Returns old\n"
41273" * value.\n"
41274" */\n"
41275"int __ovld atomic_xchg(volatile __global int *p, int val);\n"
41276"unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val);\n"
41277"int __ovld atomic_xchg(volatile __local int *p, int val);\n"
41278"unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val);\n"
41279"float __ovld atomic_xchg(volatile __global float *p, float val);\n"
41280"float __ovld atomic_xchg(volatile __local float *p, float val);\n"
41281"\n"
41282"#if defined(cl_khr_global_int32_base_atomics)\n"
41283"int __ovld atom_xchg(volatile __global int *p, int val);\n"
41284"unsigned int __ovld atom_xchg(volatile __global unsigned int *p, unsigned int val);\n"
41285"#endif\n"
41286"#if defined(cl_khr_local_int32_base_atomics)\n"
41287"int __ovld atom_xchg(volatile __local int *p, int val);\n"
41288"unsigned int __ovld atom_xchg(volatile __local unsigned int *p, unsigned int val);\n"
41289"#endif\n"
41290"\n"
41291"#if defined(cl_khr_int64_base_atomics)\n"
41292"long __ovld atom_xchg(volatile __global long *p, long val);\n"
41293"long __ovld atom_xchg(volatile __local long *p, long val);\n"
41294"unsigned long __ovld atom_xchg(volatile __global unsigned long *p, unsigned long val);\n"
41295"unsigned long __ovld atom_xchg(volatile __local unsigned long *p, unsigned long val);\n"
41296"#endif\n"
41297"\n"
41298"/**\n"
41299" * Read the 32-bit value (referred to as old)\n"
41300" * stored at location pointed by p. Compute\n"
41301" * (old + 1) and store result at location\n"
41302" * pointed by p. The function returns old.\n"
41303" */\n"
41304"int __ovld atomic_inc(volatile __global int *p);\n"
41305"unsigned int __ovld atomic_inc(volatile __global unsigned int *p);\n"
41306"int __ovld atomic_inc(volatile __local int *p);\n"
41307"unsigned int __ovld atomic_inc(volatile __local unsigned int *p);\n"
41308"\n"
41309"#if defined(cl_khr_global_int32_base_atomics)\n"
41310"int __ovld atom_inc(volatile __global int *p);\n"
41311"unsigned int __ovld atom_inc(volatile __global unsigned int *p);\n"
41312"#endif\n"
41313"#if defined(cl_khr_local_int32_base_atomics)\n"
41314"int __ovld atom_inc(volatile __local int *p);\n"
41315"unsigned int __ovld atom_inc(volatile __local unsigned int *p);\n"
41316"#endif\n"
41317"\n"
41318"#if defined(cl_khr_int64_base_atomics)\n"
41319"long __ovld atom_inc(volatile __global long *p);\n"
41320"unsigned long __ovld atom_inc(volatile __global unsigned long *p);\n"
41321"long __ovld atom_inc(volatile __local long *p);\n"
41322"unsigned long __ovld atom_inc(volatile __local unsigned long *p);\n"
41323"#endif\n"
41324"\n"
41325"/**\n"
41326" * Read the 32-bit value (referred to as old)\n"
41327" * stored at location pointed by p. Compute\n"
41328" * (old - 1) and store result at location\n"
41329" * pointed by p. The function returns old.\n"
41330" */\n"
41331"int __ovld atomic_dec(volatile __global int *p);\n"
41332"unsigned int __ovld atomic_dec(volatile __global unsigned int *p);\n"
41333"int __ovld atomic_dec(volatile __local int *p);\n"
41334"unsigned int __ovld atomic_dec(volatile __local unsigned int *p);\n"
41335"\n"
41336"#if defined(cl_khr_global_int32_base_atomics)\n"
41337"int __ovld atom_dec(volatile __global int *p);\n"
41338"unsigned int __ovld atom_dec(volatile __global unsigned int *p);\n"
41339"#endif\n"
41340"#if defined(cl_khr_local_int32_base_atomics)\n"
41341"int __ovld atom_dec(volatile __local int *p);\n"
41342"unsigned int __ovld atom_dec(volatile __local unsigned int *p);\n"
41343"#endif\n"
41344"\n"
41345"#if defined(cl_khr_int64_base_atomics)\n"
41346"long __ovld atom_dec(volatile __global long *p);\n"
41347"unsigned long __ovld atom_dec(volatile __global unsigned long *p);\n"
41348"long __ovld atom_dec(volatile __local long *p);\n"
41349"unsigned long __ovld atom_dec(volatile __local unsigned long *p);\n"
41350"#endif\n"
41351"\n"
41352"/**\n"
41353" * Read the 32-bit value (referred to as old)\n"
41354" * stored at location pointed by p. Compute\n"
41355" * (old == cmp) ? val : old and store result at\n"
41356" * location pointed by p. The function\n"
41357" * returns old.\n"
41358" */\n"
41359"int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val);\n"
41360"unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n"
41361"int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val);\n"
41362"unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n"
41363"\n"
41364"#if defined(cl_khr_global_int32_base_atomics)\n"
41365"int __ovld atom_cmpxchg(volatile __global int *p, int cmp, int val);\n"
41366"unsigned int __ovld atom_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n"
41367"#endif\n"
41368"#if defined(cl_khr_local_int32_base_atomics)\n"
41369"int __ovld atom_cmpxchg(volatile __local int *p, int cmp, int val);\n"
41370"unsigned int __ovld atom_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n"
41371"#endif\n"
41372"\n"
41373"#if defined(cl_khr_int64_base_atomics)\n"
41374"long __ovld atom_cmpxchg(volatile __global long *p, long cmp, long val);\n"
41375"unsigned long __ovld atom_cmpxchg(volatile __global unsigned long *p, unsigned long cmp, unsigned long val);\n"
41376"long __ovld atom_cmpxchg(volatile __local long *p, long cmp, long val);\n"
41377"unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned long cmp, unsigned long val);\n"
41378"#endif\n"
41379"\n"
41380"/**\n"
41381" * Read the 32-bit value (referred to as old)\n"
41382" * stored at location pointed by p. Compute\n"
41383" * min(old, val) and store minimum value at\n"
41384" * location pointed by p. The function\n"
41385" * returns old.\n"
41386" */\n"
41387"int __ovld atomic_min(volatile __global int *p, int val);\n"
41388"unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val);\n"
41389"int __ovld atomic_min(volatile __local int *p, int val);\n"
41390"unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val);\n"
41391"\n"
41392"#if defined(cl_khr_global_int32_extended_atomics)\n"
41393"int __ovld atom_min(volatile __global int *p, int val);\n"
41394"unsigned int __ovld atom_min(volatile __global unsigned int *p, unsigned int val);\n"
41395"#endif\n"
41396"#if defined(cl_khr_local_int32_extended_atomics)\n"
41397"int __ovld atom_min(volatile __local int *p, int val);\n"
41398"unsigned int __ovld atom_min(volatile __local unsigned int *p, unsigned int val);\n"
41399"#endif\n"
41400"\n"
41401"#if defined(cl_khr_int64_extended_atomics)\n"
41402"long __ovld atom_min(volatile __global long *p, long val);\n"
41403"unsigned long __ovld atom_min(volatile __global unsigned long *p, unsigned long val);\n"
41404"long __ovld atom_min(volatile __local long *p, long val);\n"
41405"unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long val);\n"
41406"#endif\n"
41407"\n"
41408"/**\n"
41409" * Read the 32-bit value (referred to as old)\n"
41410" * stored at location pointed by p. Compute\n"
41411" * max(old, val) and store maximum value at\n"
41412" * location pointed by p. The function\n"
41413" * returns old.\n"
41414" */\n"
41415"int __ovld atomic_max(volatile __global int *p, int val);\n"
41416"unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val);\n"
41417"int __ovld atomic_max(volatile __local int *p, int val);\n"
41418"unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val);\n"
41419"\n"
41420"#if defined(cl_khr_global_int32_extended_atomics)\n"
41421"int __ovld atom_max(volatile __global int *p, int val);\n"
41422"unsigned int __ovld atom_max(volatile __global unsigned int *p, unsigned int val);\n"
41423"#endif\n"
41424"#if defined(cl_khr_local_int32_extended_atomics)\n"
41425"int __ovld atom_max(volatile __local int *p, int val);\n"
41426"unsigned int __ovld atom_max(volatile __local unsigned int *p, unsigned int val);\n"
41427"#endif\n"
41428"\n"
41429"#if defined(cl_khr_int64_extended_atomics)\n"
41430"long __ovld atom_max(volatile __global long *p, long val);\n"
41431"unsigned long __ovld atom_max(volatile __global unsigned long *p, unsigned long val);\n"
41432"long __ovld atom_max(volatile __local long *p, long val);\n"
41433"unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long val);\n"
41434"#endif\n"
41435"\n"
41436"/**\n"
41437" * Read the 32-bit value (referred to as old)\n"
41438" * stored at location pointed by p. Compute\n"
41439" * (old & val) and store result at location\n"
41440" * pointed by p. The function returns old.\n"
41441" */\n"
41442"int __ovld atomic_and(volatile __global int *p, int val);\n"
41443"unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val);\n"
41444"int __ovld atomic_and(volatile __local int *p, int val);\n"
41445"unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val);\n"
41446"\n"
41447"#if defined(cl_khr_global_int32_extended_atomics)\n"
41448"int __ovld atom_and(volatile __global int *p, int val);\n"
41449"unsigned int __ovld atom_and(volatile __global unsigned int *p, unsigned int val);\n"
41450"#endif\n"
41451"#if defined(cl_khr_local_int32_extended_atomics)\n"
41452"int __ovld atom_and(volatile __local int *p, int val);\n"
41453"unsigned int __ovld atom_and(volatile __local unsigned int *p, unsigned int val);\n"
41454"#endif\n"
41455"\n"
41456"#if defined(cl_khr_int64_extended_atomics)\n"
41457"long __ovld atom_and(volatile __global long *p, long val);\n"
41458"unsigned long __ovld atom_and(volatile __global unsigned long *p, unsigned long val);\n"
41459"long __ovld atom_and(volatile __local long *p, long val);\n"
41460"unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long val);\n"
41461"#endif\n"
41462"\n"
41463"/**\n"
41464" * Read the 32-bit value (referred to as old)\n"
41465" * stored at location pointed by p. Compute\n"
41466" * (old | val) and store result at location\n"
41467" * pointed by p. The function returns old.\n"
41468" */\n"
41469"int __ovld atomic_or(volatile __global int *p, int val);\n"
41470"unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val);\n"
41471"int __ovld atomic_or(volatile __local int *p, int val);\n"
41472"unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val);\n"
41473"\n"
41474"#if defined(cl_khr_global_int32_extended_atomics)\n"
41475"int __ovld atom_or(volatile __global int *p, int val);\n"
41476"unsigned int __ovld atom_or(volatile __global unsigned int *p, unsigned int val);\n"
41477"#endif\n"
41478"#if defined(cl_khr_local_int32_extended_atomics)\n"
41479"int __ovld atom_or(volatile __local int *p, int val);\n"
41480"unsigned int __ovld atom_or(volatile __local unsigned int *p, unsigned int val);\n"
41481"#endif\n"
41482"\n"
41483"#if defined(cl_khr_int64_extended_atomics)\n"
41484"long __ovld atom_or(volatile __global long *p, long val);\n"
41485"unsigned long __ovld atom_or(volatile __global unsigned long *p, unsigned long val);\n"
41486"long __ovld atom_or(volatile __local long *p, long val);\n"
41487"unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long val);\n"
41488"#endif\n"
41489"\n"
41490"/**\n"
41491" * Read the 32-bit value (referred to as old)\n"
41492" * stored at location pointed by p. Compute\n"
41493" * (old ^ val) and store result at location\n"
41494" * pointed by p. The function returns old.\n"
41495" */\n"
41496"int __ovld atomic_xor(volatile __global int *p, int val);\n"
41497"unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val);\n"
41498"int __ovld atomic_xor(volatile __local int *p, int val);\n"
41499"unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val);\n"
41500"\n"
41501"#if defined(cl_khr_global_int32_extended_atomics)\n"
41502"int __ovld atom_xor(volatile __global int *p, int val);\n"
41503"unsigned int __ovld atom_xor(volatile __global unsigned int *p, unsigned int val);\n"
41504"#endif\n"
41505"#if defined(cl_khr_local_int32_extended_atomics)\n"
41506"int __ovld atom_xor(volatile __local int *p, int val);\n"
41507"unsigned int __ovld atom_xor(volatile __local unsigned int *p, unsigned int val);\n"
41508"#endif\n"
41509"\n"
41510"#if defined(cl_khr_int64_extended_atomics)\n"
41511"long __ovld atom_xor(volatile __global long *p, long val);\n"
41512"unsigned long __ovld atom_xor(volatile __global unsigned long *p, unsigned long val);\n"
41513"long __ovld atom_xor(volatile __local long *p, long val);\n"
41514"unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long val);\n"
41515"#endif\n"
41516"\n"
41517"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41518"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable\n"
41519"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : disable\n"
41520"#endif\n"
41521"\n"
41522"// OpenCL v2.0 s6.13.11 - Atomics Functions\n"
41523"\n"
41524"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41525"#ifndef ATOMIC_VAR_INIT\n"
41526"#define ATOMIC_VAR_INIT(x) (x)\n"
41527"#endif //ATOMIC_VAR_INIT\n"
41528"#define ATOMIC_FLAG_INIT 0\n"
41529"\n"
41530"// enum values aligned with what clang uses in EmitAtomicExpr()\n"
41531"typedef enum memory_order\n"
41532"{\n"
41533" memory_order_relaxed = __ATOMIC_RELAXED,\n"
41534" memory_order_acquire = __ATOMIC_ACQUIRE,\n"
41535" memory_order_release = __ATOMIC_RELEASE,\n"
41536" memory_order_acq_rel = __ATOMIC_ACQ_REL,\n"
41537" memory_order_seq_cst = __ATOMIC_SEQ_CST\n"
41538"} memory_order;\n"
41539"\n"
41540"// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics\n"
41541"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41542"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
41543"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
41544"#endif\n"
41545"\n"
41546"// atomic_init()\n"
41547"void __ovld atomic_init(volatile atomic_int *object, int value);\n"
41548"void __ovld atomic_init(volatile atomic_uint *object, uint value);\n"
41549"void __ovld atomic_init(volatile atomic_float *object, float value);\n"
41550"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41551"void __ovld atomic_init(volatile atomic_long *object, long value);\n"
41552"void __ovld atomic_init(volatile atomic_ulong *object, ulong value);\n"
41553"#ifdef cl_khr_fp64\n"
41554"void __ovld atomic_init(volatile atomic_double *object, double value);\n"
41555"#endif //cl_khr_fp64\n"
41556"#endif\n"
41557"\n"
41558"// atomic_work_item_fence()\n"
41559"void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope);\n"
41560"\n"
41561"// atomic_fetch()\n"
41562"\n"
41563"int __ovld atomic_fetch_add(volatile atomic_int *object, int operand);\n"
41564"int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41565"int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41566"uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand);\n"
41567"uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41568"uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41569"int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand);\n"
41570"int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41571"int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41572"uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand);\n"
41573"uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41574"uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41575"int __ovld atomic_fetch_or(volatile atomic_int *object, int operand);\n"
41576"int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41577"int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41578"uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand);\n"
41579"uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41580"uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41581"int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand);\n"
41582"int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41583"int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41584"uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand);\n"
41585"uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41586"uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41587"int __ovld atomic_fetch_and(volatile atomic_int *object, int operand);\n"
41588"int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41589"int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41590"uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand);\n"
41591"uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41592"uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41593"int __ovld atomic_fetch_min(volatile atomic_int *object, int operand);\n"
41594"int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41595"int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41596"uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand);\n"
41597"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41598"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41599"uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand);\n"
41600"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order);\n"
41601"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n"
41602"int __ovld atomic_fetch_max(volatile atomic_int *object, int operand);\n"
41603"int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41604"int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41605"uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand);\n"
41606"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41607"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41608"uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand);\n"
41609"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order);\n"
41610"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n"
41611"\n"
41612"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41613"long __ovld atomic_fetch_add(volatile atomic_long *object, long operand);\n"
41614"long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41615"long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41616"ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand);\n"
41617"ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41618"ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41619"long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand);\n"
41620"long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41621"long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41622"ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand);\n"
41623"ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41624"ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41625"long __ovld atomic_fetch_or(volatile atomic_long *object, long operand);\n"
41626"long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41627"long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41628"ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand);\n"
41629"ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41630"ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41631"long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand);\n"
41632"long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41633"long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41634"ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand);\n"
41635"ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41636"ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41637"long __ovld atomic_fetch_and(volatile atomic_long *object, long operand);\n"
41638"long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41639"long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41640"ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand);\n"
41641"ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41642"ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41643"long __ovld atomic_fetch_min(volatile atomic_long *object, long operand);\n"
41644"long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41645"long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41646"ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand);\n"
41647"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41648"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41649"ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand);\n"
41650"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n"
41651"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n"
41652"long __ovld atomic_fetch_max(volatile atomic_long *object, long operand);\n"
41653"long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41654"long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41655"ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand);\n"
41656"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41657"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41658"ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand);\n"
41659"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n"
41660"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n"
41661"#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41662"\n"
41663"// OpenCL v2.0 s6.13.11.7.5:\n"
41664"// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.\n"
41665"// or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.\n"
41666"\n"
41667"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41668"uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n"
41669"uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n"
41670"uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n"
41671"uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n"
41672"uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n"
41673"uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n"
41674"\n"
41675"uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand);\n"
41676"uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n"
41677"uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n"
41678"uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand);\n"
41679"uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n"
41680"uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n"
41681"uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand);\n"
41682"uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n"
41683"uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n"
41684"uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax);\n"
41685"uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n"
41686"uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n"
41687"uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax);\n"
41688"uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n"
41689"uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n"
41690"\n"
41691"intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand);\n"
41692"intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n"
41693"intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n"
41694"intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand);\n"
41695"intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n"
41696"intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n"
41697"intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand);\n"
41698"intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n"
41699"intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n"
41700"intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax);\n"
41701"intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n"
41702"intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n"
41703"intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax);\n"
41704"intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n"
41705"intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n"
41706"#endif\n"
41707"\n"
41708"// atomic_store()\n"
41709"\n"
41710"void __ovld atomic_store(volatile atomic_int *object, int desired);\n"
41711"void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order);\n"
41712"void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n"
41713"void __ovld atomic_store(volatile atomic_uint *object, uint desired);\n"
41714"void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n"
41715"void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n"
41716"void __ovld atomic_store(volatile atomic_float *object, float desired);\n"
41717"void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order);\n"
41718"void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n"
41719"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41720"#ifdef cl_khr_fp64\n"
41721"void __ovld atomic_store(volatile atomic_double *object, double desired);\n"
41722"void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order);\n"
41723"void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n"
41724"#endif //cl_khr_fp64\n"
41725"void __ovld atomic_store(volatile atomic_long *object, long desired);\n"
41726"void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order);\n"
41727"void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n"
41728"void __ovld atomic_store(volatile atomic_ulong *object, ulong desired);\n"
41729"void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n"
41730"void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n"
41731"#endif\n"
41732"\n"
41733"// atomic_load()\n"
41734"\n"
41735"int __ovld atomic_load(volatile atomic_int *object);\n"
41736"int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order);\n"
41737"int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope);\n"
41738"uint __ovld atomic_load(volatile atomic_uint *object);\n"
41739"uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order);\n"
41740"uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope);\n"
41741"float __ovld atomic_load(volatile atomic_float *object);\n"
41742"float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order);\n"
41743"float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope);\n"
41744"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41745"#ifdef cl_khr_fp64\n"
41746"double __ovld atomic_load(volatile atomic_double *object);\n"
41747"double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order);\n"
41748"double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope);\n"
41749"#endif //cl_khr_fp64\n"
41750"long __ovld atomic_load(volatile atomic_long *object);\n"
41751"long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order);\n"
41752"long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope);\n"
41753"ulong __ovld atomic_load(volatile atomic_ulong *object);\n"
41754"ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order);\n"
41755"ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope);\n"
41756"#endif\n"
41757"\n"
41758"// atomic_exchange()\n"
41759"\n"
41760"int __ovld atomic_exchange(volatile atomic_int *object, int desired);\n"
41761"int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order);\n"
41762"int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n"
41763"uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired);\n"
41764"uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n"
41765"uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n"
41766"float __ovld atomic_exchange(volatile atomic_float *object, float desired);\n"
41767"float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order);\n"
41768"float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n"
41769"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41770"#ifdef cl_khr_fp64\n"
41771"double __ovld atomic_exchange(volatile atomic_double *object, double desired);\n"
41772"double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order);\n"
41773"double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n"
41774"#endif //cl_khr_fp64\n"
41775"long __ovld atomic_exchange(volatile atomic_long *object, long desired);\n"
41776"long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order);\n"
41777"long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n"
41778"ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired);\n"
41779"ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n"
41780"ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n"
41781"#endif\n"
41782"\n"
41783"// atomic_compare_exchange_strong() and atomic_compare_exchange_weak()\n"
41784"\n"
41785"bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired);\n"
41786"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n"
41787" int desired, memory_order success, memory_order failure);\n"
41788"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n"
41789" int desired, memory_order success, memory_order failure, memory_scope scope);\n"
41790"bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired);\n"
41791"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n"
41792" uint desired, memory_order success, memory_order failure);\n"
41793"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n"
41794" uint desired, memory_order success, memory_order failure, memory_scope scope);\n"
41795"bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired);\n"
41796"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n"
41797" int desired, memory_order success, memory_order failure);\n"
41798"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n"
41799" int desired, memory_order success, memory_order failure, memory_scope scope);\n"
41800"bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired);\n"
41801"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n"
41802" uint desired, memory_order success, memory_order failure);\n"
41803"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n"
41804" uint desired, memory_order success, memory_order failure, memory_scope scope);\n"
41805"bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired);\n"
41806"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n"
41807" float desired, memory_order success, memory_order failure);\n"
41808"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n"
41809" float desired, memory_order success, memory_order failure, memory_scope scope);\n"
41810"bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired);\n"
41811"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n"
41812" float desired, memory_order success, memory_order failure);\n"
41813"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n"
41814" float desired, memory_order success, memory_order failure, memory_scope scope);\n"
41815"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41816"#ifdef cl_khr_fp64\n"
41817"bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired);\n"
41818"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n"
41819" double desired, memory_order success, memory_order failure);\n"
41820"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n"
41821" double desired, memory_order success, memory_order failure, memory_scope scope);\n"
41822"bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired);\n"
41823"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n"
41824" double desired, memory_order success, memory_order failure);\n"
41825"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n"
41826" double desired, memory_order success, memory_order failure, memory_scope scope);\n"
41827"#endif //cl_khr_fp64\n"
41828"bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired);\n"
41829"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n"
41830" long desired, memory_order success, memory_order failure);\n"
41831"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n"
41832" long desired, memory_order success, memory_order failure, memory_scope scope);\n"
41833"bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired);\n"
41834"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n"
41835" long desired, memory_order success, memory_order failure);\n"
41836"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n"
41837" long desired, memory_order success, memory_order failure, memory_scope scope);\n"
41838"bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired);\n"
41839"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n"
41840" ulong desired, memory_order success, memory_order failure);\n"
41841"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n"
41842" ulong desired, memory_order success, memory_order failure, memory_scope scope);\n"
41843"bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired);\n"
41844"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n"
41845" ulong desired, memory_order success, memory_order failure);\n"
41846"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n"
41847" ulong desired, memory_order success, memory_order failure, memory_scope scope);\n"
41848"#endif\n"
41849"\n"
41850"// atomic_flag_test_and_set() and atomic_flag_clear()\n"
41851"\n"
41852"bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object);\n"
41853"bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order);\n"
41854"bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n"
41855"void __ovld atomic_flag_clear(volatile atomic_flag *object);\n"
41856"void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order);\n"
41857"void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n"
41858"\n"
41859"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41860"\n"
41861"// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions\n"
41862"\n"
41863"/**\n"
41864" * The shuffle and shuffle2 built-in functions construct\n"
41865" * a permutation of elements from one or two input\n"
41866" * vectors respectively that are of the same type,\n"
41867" * returning a vector with the same element type as the\n"
41868" * input and length that is the same as the shuffle mask.\n"
41869" * The size of each element in the mask must match the\n"
41870" * size of each element in the result. For shuffle, only\n"
41871" * the ilogb(2m-1) least significant bits of each mask\n"
41872" * element are considered. For shuffle2, only the\n"
41873" * ilogb(2m-1)+1 least significant bits of each mask\n"
41874" * element are considered. Other bits in the mask shall\n"
41875" * be ignored.\n"
41876" * The elements of the input vectors are numbered from\n"
41877" * left to right across one or both of the vectors. For this\n"
41878" * purpose, the number of elements in a vector is given\n"
41879" * by vec_step(gentypem). The shuffle mask operand\n"
41880" * specifies, for each element of the result vector, which\n"
41881" * element of the one or two input vectors the result\n"
41882" * element gets.\n"
41883" * Examples:\n"
41884" * uint4 mask = (uint4)(3, 2,\n"
41885" * 1, 0);\n"
41886" * float4 a;\n"
41887" * float4 r = shuffle(a, mask);\n"
41888" * // r.s0123 = a.wzyx\n"
41889" * uint8 mask = (uint8)(0, 1, 2, 3,\n"
41890" * 4, 5, 6, 7);\n"
41891" * float4 a, b;\n"
41892" * float8 r = shuffle2(a, b, mask);\n"
41893" * // r.s0123 = a.xyzw\n"
41894" * // r.s4567 = b.xyzw\n"
41895" * uint4 mask;\n"
41896" * float8 a;\n"
41897" * float4 b;\n"
41898" * b = shuffle(a, mask);\n"
41899" * Examples that are not valid are:\n"
41900" * uint8 mask;\n"
41901" * short16 a;\n"
41902" * short8 b;\n"
41903" * b = shuffle(a, mask); <- not valid\n"
41904" */\n"
41905"char2 __ovld __cnfn shuffle(char2 x, uchar2 mask);\n"
41906"char2 __ovld __cnfn shuffle(char4 x, uchar2 mask);\n"
41907"char2 __ovld __cnfn shuffle(char8 x, uchar2 mask);\n"
41908"char2 __ovld __cnfn shuffle(char16 x, uchar2 mask);\n"
41909"\n"
41910"uchar2 __ovld __cnfn shuffle(uchar2 x, uchar2 mask);\n"
41911"uchar2 __ovld __cnfn shuffle(uchar4 x, uchar2 mask);\n"
41912"uchar2 __ovld __cnfn shuffle(uchar8 x, uchar2 mask);\n"
41913"uchar2 __ovld __cnfn shuffle(uchar16 x, uchar2 mask);\n"
41914"\n"
41915"short2 __ovld __cnfn shuffle(short2 x, ushort2 mask);\n"
41916"short2 __ovld __cnfn shuffle(short4 x, ushort2 mask);\n"
41917"short2 __ovld __cnfn shuffle(short8 x, ushort2 mask);\n"
41918"short2 __ovld __cnfn shuffle(short16 x, ushort2 mask);\n"
41919"\n"
41920"ushort2 __ovld __cnfn shuffle(ushort2 x, ushort2 mask);\n"
41921"ushort2 __ovld __cnfn shuffle(ushort4 x, ushort2 mask);\n"
41922"ushort2 __ovld __cnfn shuffle(ushort8 x, ushort2 mask);\n"
41923"ushort2 __ovld __cnfn shuffle(ushort16 x, ushort2 mask);\n"
41924"\n"
41925"int2 __ovld __cnfn shuffle(int2 x, uint2 mask);\n"
41926"int2 __ovld __cnfn shuffle(int4 x, uint2 mask);\n"
41927"int2 __ovld __cnfn shuffle(int8 x, uint2 mask);\n"
41928"int2 __ovld __cnfn shuffle(int16 x, uint2 mask);\n"
41929"\n"
41930"uint2 __ovld __cnfn shuffle(uint2 x, uint2 mask);\n"
41931"uint2 __ovld __cnfn shuffle(uint4 x, uint2 mask);\n"
41932"uint2 __ovld __cnfn shuffle(uint8 x, uint2 mask);\n"
41933"uint2 __ovld __cnfn shuffle(uint16 x, uint2 mask);\n"
41934"\n"
41935"long2 __ovld __cnfn shuffle(long2 x, ulong2 mask);\n"
41936"long2 __ovld __cnfn shuffle(long4 x, ulong2 mask);\n"
41937"long2 __ovld __cnfn shuffle(long8 x, ulong2 mask);\n"
41938"long2 __ovld __cnfn shuffle(long16 x, ulong2 mask);\n"
41939"\n"
41940"ulong2 __ovld __cnfn shuffle(ulong2 x, ulong2 mask);\n"
41941"ulong2 __ovld __cnfn shuffle(ulong4 x, ulong2 mask);\n"
41942"ulong2 __ovld __cnfn shuffle(ulong8 x, ulong2 mask);\n"
41943"ulong2 __ovld __cnfn shuffle(ulong16 x, ulong2 mask);\n"
41944"\n"
41945"float2 __ovld __cnfn shuffle(float2 x, uint2 mask);\n"
41946"float2 __ovld __cnfn shuffle(float4 x, uint2 mask);\n"
41947"float2 __ovld __cnfn shuffle(float8 x, uint2 mask);\n"
41948"float2 __ovld __cnfn shuffle(float16 x, uint2 mask);\n"
41949"\n"
41950"char4 __ovld __cnfn shuffle(char2 x, uchar4 mask);\n"
41951"char4 __ovld __cnfn shuffle(char4 x, uchar4 mask);\n"
41952"char4 __ovld __cnfn shuffle(char8 x, uchar4 mask);\n"
41953"char4 __ovld __cnfn shuffle(char16 x, uchar4 mask);\n"
41954"\n"
41955"uchar4 __ovld __cnfn shuffle(uchar2 x, uchar4 mask);\n"
41956"uchar4 __ovld __cnfn shuffle(uchar4 x, uchar4 mask);\n"
41957"uchar4 __ovld __cnfn shuffle(uchar8 x, uchar4 mask);\n"
41958"uchar4 __ovld __cnfn shuffle(uchar16 x, uchar4 mask);\n"
41959"\n"
41960"short4 __ovld __cnfn shuffle(short2 x, ushort4 mask);\n"
41961"short4 __ovld __cnfn shuffle(short4 x, ushort4 mask);\n"
41962"short4 __ovld __cnfn shuffle(short8 x, ushort4 mask);\n"
41963"short4 __ovld __cnfn shuffle(short16 x, ushort4 mask);\n"
41964"\n"
41965"ushort4 __ovld __cnfn shuffle(ushort2 x, ushort4 mask);\n"
41966"ushort4 __ovld __cnfn shuffle(ushort4 x, ushort4 mask);\n"
41967"ushort4 __ovld __cnfn shuffle(ushort8 x, ushort4 mask);\n"
41968"ushort4 __ovld __cnfn shuffle(ushort16 x, ushort4 mask);\n"
41969"\n"
41970"int4 __ovld __cnfn shuffle(int2 x, uint4 mask);\n"
41971"int4 __ovld __cnfn shuffle(int4 x, uint4 mask);\n"
41972"int4 __ovld __cnfn shuffle(int8 x, uint4 mask);\n"
41973"int4 __ovld __cnfn shuffle(int16 x, uint4 mask);\n"
41974"\n"
41975"uint4 __ovld __cnfn shuffle(uint2 x, uint4 mask);\n"
41976"uint4 __ovld __cnfn shuffle(uint4 x, uint4 mask);\n"
41977"uint4 __ovld __cnfn shuffle(uint8 x, uint4 mask);\n"
41978"uint4 __ovld __cnfn shuffle(uint16 x, uint4 mask);\n"
41979"\n"
41980"long4 __ovld __cnfn shuffle(long2 x, ulong4 mask);\n"
41981"long4 __ovld __cnfn shuffle(long4 x, ulong4 mask);\n"
41982"long4 __ovld __cnfn shuffle(long8 x, ulong4 mask);\n"
41983"long4 __ovld __cnfn shuffle(long16 x, ulong4 mask);\n"
41984"\n"
41985"ulong4 __ovld __cnfn shuffle(ulong2 x, ulong4 mask);\n"
41986"ulong4 __ovld __cnfn shuffle(ulong4 x, ulong4 mask);\n"
41987"ulong4 __ovld __cnfn shuffle(ulong8 x, ulong4 mask);\n"
41988"ulong4 __ovld __cnfn shuffle(ulong16 x, ulong4 mask);\n"
41989"\n"
41990"float4 __ovld __cnfn shuffle(float2 x, uint4 mask);\n"
41991"float4 __ovld __cnfn shuffle(float4 x, uint4 mask);\n"
41992"float4 __ovld __cnfn shuffle(float8 x, uint4 mask);\n"
41993"float4 __ovld __cnfn shuffle(float16 x, uint4 mask);\n"
41994"\n"
41995"char8 __ovld __cnfn shuffle(char2 x, uchar8 mask);\n"
41996"char8 __ovld __cnfn shuffle(char4 x, uchar8 mask);\n"
41997"char8 __ovld __cnfn shuffle(char8 x, uchar8 mask);\n"
41998"char8 __ovld __cnfn shuffle(char16 x, uchar8 mask);\n"
41999"\n"
42000"uchar8 __ovld __cnfn shuffle(uchar2 x, uchar8 mask);\n"
42001"uchar8 __ovld __cnfn shuffle(uchar4 x, uchar8 mask);\n"
42002"uchar8 __ovld __cnfn shuffle(uchar8 x, uchar8 mask);\n"
42003"uchar8 __ovld __cnfn shuffle(uchar16 x, uchar8 mask);\n"
42004"\n"
42005"short8 __ovld __cnfn shuffle(short2 x, ushort8 mask);\n"
42006"short8 __ovld __cnfn shuffle(short4 x, ushort8 mask);\n"
42007"short8 __ovld __cnfn shuffle(short8 x, ushort8 mask);\n"
42008"short8 __ovld __cnfn shuffle(short16 x, ushort8 mask);\n"
42009"\n"
42010"ushort8 __ovld __cnfn shuffle(ushort2 x, ushort8 mask);\n"
42011"ushort8 __ovld __cnfn shuffle(ushort4 x, ushort8 mask);\n"
42012"ushort8 __ovld __cnfn shuffle(ushort8 x, ushort8 mask);\n"
42013"ushort8 __ovld __cnfn shuffle(ushort16 x, ushort8 mask);\n"
42014"\n"
42015"int8 __ovld __cnfn shuffle(int2 x, uint8 mask);\n"
42016"int8 __ovld __cnfn shuffle(int4 x, uint8 mask);\n"
42017"int8 __ovld __cnfn shuffle(int8 x, uint8 mask);\n"
42018"int8 __ovld __cnfn shuffle(int16 x, uint8 mask);\n"
42019"\n"
42020"uint8 __ovld __cnfn shuffle(uint2 x, uint8 mask);\n"
42021"uint8 __ovld __cnfn shuffle(uint4 x, uint8 mask);\n"
42022"uint8 __ovld __cnfn shuffle(uint8 x, uint8 mask);\n"
42023"uint8 __ovld __cnfn shuffle(uint16 x, uint8 mask);\n"
42024"\n"
42025"long8 __ovld __cnfn shuffle(long2 x, ulong8 mask);\n"
42026"long8 __ovld __cnfn shuffle(long4 x, ulong8 mask);\n"
42027"long8 __ovld __cnfn shuffle(long8 x, ulong8 mask);\n"
42028"long8 __ovld __cnfn shuffle(long16 x, ulong8 mask);\n"
42029"\n"
42030"ulong8 __ovld __cnfn shuffle(ulong2 x, ulong8 mask);\n"
42031"ulong8 __ovld __cnfn shuffle(ulong4 x, ulong8 mask);\n"
42032"ulong8 __ovld __cnfn shuffle(ulong8 x, ulong8 mask);\n"
42033"ulong8 __ovld __cnfn shuffle(ulong16 x, ulong8 mask);\n"
42034"\n"
42035"float8 __ovld __cnfn shuffle(float2 x, uint8 mask);\n"
42036"float8 __ovld __cnfn shuffle(float4 x, uint8 mask);\n"
42037"float8 __ovld __cnfn shuffle(float8 x, uint8 mask);\n"
42038"float8 __ovld __cnfn shuffle(float16 x, uint8 mask);\n"
42039"\n"
42040"char16 __ovld __cnfn shuffle(char2 x, uchar16 mask);\n"
42041"char16 __ovld __cnfn shuffle(char4 x, uchar16 mask);\n"
42042"char16 __ovld __cnfn shuffle(char8 x, uchar16 mask);\n"
42043"char16 __ovld __cnfn shuffle(char16 x, uchar16 mask);\n"
42044"\n"
42045"uchar16 __ovld __cnfn shuffle(uchar2 x, uchar16 mask);\n"
42046"uchar16 __ovld __cnfn shuffle(uchar4 x, uchar16 mask);\n"
42047"uchar16 __ovld __cnfn shuffle(uchar8 x, uchar16 mask);\n"
42048"uchar16 __ovld __cnfn shuffle(uchar16 x, uchar16 mask);\n"
42049"\n"
42050"short16 __ovld __cnfn shuffle(short2 x, ushort16 mask);\n"
42051"short16 __ovld __cnfn shuffle(short4 x, ushort16 mask);\n"
42052"short16 __ovld __cnfn shuffle(short8 x, ushort16 mask);\n"
42053"short16 __ovld __cnfn shuffle(short16 x, ushort16 mask);\n"
42054"\n"
42055"ushort16 __ovld __cnfn shuffle(ushort2 x, ushort16 mask);\n"
42056"ushort16 __ovld __cnfn shuffle(ushort4 x, ushort16 mask);\n"
42057"ushort16 __ovld __cnfn shuffle(ushort8 x, ushort16 mask);\n"
42058"ushort16 __ovld __cnfn shuffle(ushort16 x, ushort16 mask);\n"
42059"\n"
42060"int16 __ovld __cnfn shuffle(int2 x, uint16 mask);\n"
42061"int16 __ovld __cnfn shuffle(int4 x, uint16 mask);\n"
42062"int16 __ovld __cnfn shuffle(int8 x, uint16 mask);\n"
42063"int16 __ovld __cnfn shuffle(int16 x, uint16 mask);\n"
42064"\n"
42065"uint16 __ovld __cnfn shuffle(uint2 x, uint16 mask);\n"
42066"uint16 __ovld __cnfn shuffle(uint4 x, uint16 mask);\n"
42067"uint16 __ovld __cnfn shuffle(uint8 x, uint16 mask);\n"
42068"uint16 __ovld __cnfn shuffle(uint16 x, uint16 mask);\n"
42069"\n"
42070"long16 __ovld __cnfn shuffle(long2 x, ulong16 mask);\n"
42071"long16 __ovld __cnfn shuffle(long4 x, ulong16 mask);\n"
42072"long16 __ovld __cnfn shuffle(long8 x, ulong16 mask);\n"
42073"long16 __ovld __cnfn shuffle(long16 x, ulong16 mask);\n"
42074"\n"
42075"ulong16 __ovld __cnfn shuffle(ulong2 x, ulong16 mask);\n"
42076"ulong16 __ovld __cnfn shuffle(ulong4 x, ulong16 mask);\n"
42077"ulong16 __ovld __cnfn shuffle(ulong8 x, ulong16 mask);\n"
42078"ulong16 __ovld __cnfn shuffle(ulong16 x, ulong16 mask);\n"
42079"\n"
42080"float16 __ovld __cnfn shuffle(float2 x, uint16 mask);\n"
42081"float16 __ovld __cnfn shuffle(float4 x, uint16 mask);\n"
42082"float16 __ovld __cnfn shuffle(float8 x, uint16 mask);\n"
42083"float16 __ovld __cnfn shuffle(float16 x, uint16 mask);\n"
42084"\n"
42085"#ifdef cl_khr_fp64\n"
42086"double2 __ovld __cnfn shuffle(double2 x, ulong2 mask);\n"
42087"double2 __ovld __cnfn shuffle(double4 x, ulong2 mask);\n"
42088"double2 __ovld __cnfn shuffle(double8 x, ulong2 mask);\n"
42089"double2 __ovld __cnfn shuffle(double16 x, ulong2 mask);\n"
42090"\n"
42091"double4 __ovld __cnfn shuffle(double2 x, ulong4 mask);\n"
42092"double4 __ovld __cnfn shuffle(double4 x, ulong4 mask);\n"
42093"double4 __ovld __cnfn shuffle(double8 x, ulong4 mask);\n"
42094"double4 __ovld __cnfn shuffle(double16 x, ulong4 mask);\n"
42095"\n"
42096"double8 __ovld __cnfn shuffle(double2 x, ulong8 mask);\n"
42097"double8 __ovld __cnfn shuffle(double4 x, ulong8 mask);\n"
42098"double8 __ovld __cnfn shuffle(double8 x, ulong8 mask);\n"
42099"double8 __ovld __cnfn shuffle(double16 x, ulong8 mask);\n"
42100"\n"
42101"double16 __ovld __cnfn shuffle(double2 x, ulong16 mask);\n"
42102"double16 __ovld __cnfn shuffle(double4 x, ulong16 mask);\n"
42103"double16 __ovld __cnfn shuffle(double8 x, ulong16 mask);\n"
42104"double16 __ovld __cnfn shuffle(double16 x, ulong16 mask);\n"
42105"#endif //cl_khr_fp64\n"
42106"\n"
42107"#ifdef cl_khr_fp16\n"
42108"half2 __ovld __cnfn shuffle(half2 x, ushort2 mask);\n"
42109"half2 __ovld __cnfn shuffle(half4 x, ushort2 mask);\n"
42110"half2 __ovld __cnfn shuffle(half8 x, ushort2 mask);\n"
42111"half2 __ovld __cnfn shuffle(half16 x, ushort2 mask);\n"
42112"\n"
42113"half4 __ovld __cnfn shuffle(half2 x, ushort4 mask);\n"
42114"half4 __ovld __cnfn shuffle(half4 x, ushort4 mask);\n"
42115"half4 __ovld __cnfn shuffle(half8 x, ushort4 mask);\n"
42116"half4 __ovld __cnfn shuffle(half16 x, ushort4 mask);\n"
42117"\n"
42118"half8 __ovld __cnfn shuffle(half2 x, ushort8 mask);\n"
42119"half8 __ovld __cnfn shuffle(half4 x, ushort8 mask);\n"
42120"half8 __ovld __cnfn shuffle(half8 x, ushort8 mask);\n"
42121"half8 __ovld __cnfn shuffle(half16 x, ushort8 mask);\n"
42122"\n"
42123"half16 __ovld __cnfn shuffle(half2 x, ushort16 mask);\n"
42124"half16 __ovld __cnfn shuffle(half4 x, ushort16 mask);\n"
42125"half16 __ovld __cnfn shuffle(half8 x, ushort16 mask);\n"
42126"half16 __ovld __cnfn shuffle(half16 x, ushort16 mask);\n"
42127"#endif //cl_khr_fp16\n"
42128"\n"
42129"char2 __ovld __cnfn shuffle2(char2 x, char2 y, uchar2 mask);\n"
42130"char2 __ovld __cnfn shuffle2(char4 x, char4 y, uchar2 mask);\n"
42131"char2 __ovld __cnfn shuffle2(char8 x, char8 y, uchar2 mask);\n"
42132"char2 __ovld __cnfn shuffle2(char16 x, char16 y, uchar2 mask);\n"
42133"\n"
42134"uchar2 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar2 mask);\n"
42135"uchar2 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar2 mask);\n"
42136"uchar2 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar2 mask);\n"
42137"uchar2 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar2 mask);\n"
42138"\n"
42139"short2 __ovld __cnfn shuffle2(short2 x, short2 y, ushort2 mask);\n"
42140"short2 __ovld __cnfn shuffle2(short4 x, short4 y, ushort2 mask);\n"
42141"short2 __ovld __cnfn shuffle2(short8 x, short8 y, ushort2 mask);\n"
42142"short2 __ovld __cnfn shuffle2(short16 x, short16 y, ushort2 mask);\n"
42143"\n"
42144"ushort2 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort2 mask);\n"
42145"ushort2 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort2 mask);\n"
42146"ushort2 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort2 mask);\n"
42147"ushort2 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort2 mask);\n"
42148"\n"
42149"int2 __ovld __cnfn shuffle2(int2 x, int2 y, uint2 mask);\n"
42150"int2 __ovld __cnfn shuffle2(int4 x, int4 y, uint2 mask);\n"
42151"int2 __ovld __cnfn shuffle2(int8 x, int8 y, uint2 mask);\n"
42152"int2 __ovld __cnfn shuffle2(int16 x, int16 y, uint2 mask);\n"
42153"\n"
42154"uint2 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint2 mask);\n"
42155"uint2 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint2 mask);\n"
42156"uint2 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint2 mask);\n"
42157"uint2 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint2 mask);\n"
42158"\n"
42159"long2 __ovld __cnfn shuffle2(long2 x, long2 y, ulong2 mask);\n"
42160"long2 __ovld __cnfn shuffle2(long4 x, long4 y, ulong2 mask);\n"
42161"long2 __ovld __cnfn shuffle2(long8 x, long8 y, ulong2 mask);\n"
42162"long2 __ovld __cnfn shuffle2(long16 x, long16 y, ulong2 mask);\n"
42163"\n"
42164"ulong2 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong2 mask);\n"
42165"ulong2 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong2 mask);\n"
42166"ulong2 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong2 mask);\n"
42167"ulong2 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong2 mask);\n"
42168"\n"
42169"float2 __ovld __cnfn shuffle2(float2 x, float2 y, uint2 mask);\n"
42170"float2 __ovld __cnfn shuffle2(float4 x, float4 y, uint2 mask);\n"
42171"float2 __ovld __cnfn shuffle2(float8 x, float8 y, uint2 mask);\n"
42172"float2 __ovld __cnfn shuffle2(float16 x, float16 y, uint2 mask);\n"
42173"\n"
42174"char4 __ovld __cnfn shuffle2(char2 x, char2 y, uchar4 mask);\n"
42175"char4 __ovld __cnfn shuffle2(char4 x, char4 y, uchar4 mask);\n"
42176"char4 __ovld __cnfn shuffle2(char8 x, char8 y, uchar4 mask);\n"
42177"char4 __ovld __cnfn shuffle2(char16 x, char16 y, uchar4 mask);\n"
42178"\n"
42179"uchar4 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar4 mask);\n"
42180"uchar4 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar4 mask);\n"
42181"uchar4 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar4 mask);\n"
42182"uchar4 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar4 mask);\n"
42183"\n"
42184"short4 __ovld __cnfn shuffle2(short2 x, short2 y, ushort4 mask);\n"
42185"short4 __ovld __cnfn shuffle2(short4 x, short4 y, ushort4 mask);\n"
42186"short4 __ovld __cnfn shuffle2(short8 x, short8 y, ushort4 mask);\n"
42187"short4 __ovld __cnfn shuffle2(short16 x, short16 y, ushort4 mask);\n"
42188"\n"
42189"ushort4 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort4 mask);\n"
42190"ushort4 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort4 mask);\n"
42191"ushort4 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort4 mask);\n"
42192"ushort4 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort4 mask);\n"
42193"\n"
42194"int4 __ovld __cnfn shuffle2(int2 x, int2 y, uint4 mask);\n"
42195"int4 __ovld __cnfn shuffle2(int4 x, int4 y, uint4 mask);\n"
42196"int4 __ovld __cnfn shuffle2(int8 x, int8 y, uint4 mask);\n"
42197"int4 __ovld __cnfn shuffle2(int16 x, int16 y, uint4 mask);\n"
42198"\n"
42199"uint4 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint4 mask);\n"
42200"uint4 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint4 mask);\n"
42201"uint4 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint4 mask);\n"
42202"uint4 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint4 mask);\n"
42203"\n"
42204"long4 __ovld __cnfn shuffle2(long2 x, long2 y, ulong4 mask);\n"
42205"long4 __ovld __cnfn shuffle2(long4 x, long4 y, ulong4 mask);\n"
42206"long4 __ovld __cnfn shuffle2(long8 x, long8 y, ulong4 mask);\n"
42207"long4 __ovld __cnfn shuffle2(long16 x, long16 y, ulong4 mask);\n"
42208"\n"
42209"ulong4 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong4 mask);\n"
42210"ulong4 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong4 mask);\n"
42211"ulong4 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong4 mask);\n"
42212"ulong4 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong4 mask);\n"
42213"\n"
42214"float4 __ovld __cnfn shuffle2(float2 x, float2 y, uint4 mask);\n"
42215"float4 __ovld __cnfn shuffle2(float4 x, float4 y, uint4 mask);\n"
42216"float4 __ovld __cnfn shuffle2(float8 x, float8 y, uint4 mask);\n"
42217"float4 __ovld __cnfn shuffle2(float16 x, float16 y, uint4 mask);\n"
42218"\n"
42219"char8 __ovld __cnfn shuffle2(char2 x, char2 y, uchar8 mask);\n"
42220"char8 __ovld __cnfn shuffle2(char4 x, char4 y, uchar8 mask);\n"
42221"char8 __ovld __cnfn shuffle2(char8 x, char8 y, uchar8 mask);\n"
42222"char8 __ovld __cnfn shuffle2(char16 x, char16 y, uchar8 mask);\n"
42223"\n"
42224"uchar8 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar8 mask);\n"
42225"uchar8 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar8 mask);\n"
42226"uchar8 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar8 mask);\n"
42227"uchar8 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar8 mask);\n"
42228"\n"
42229"short8 __ovld __cnfn shuffle2(short2 x, short2 y, ushort8 mask);\n"
42230"short8 __ovld __cnfn shuffle2(short4 x, short4 y, ushort8 mask);\n"
42231"short8 __ovld __cnfn shuffle2(short8 x, short8 y, ushort8 mask);\n"
42232"short8 __ovld __cnfn shuffle2(short16 x, short16 y, ushort8 mask);\n"
42233"\n"
42234"ushort8 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort8 mask);\n"
42235"ushort8 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort8 mask);\n"
42236"ushort8 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort8 mask);\n"
42237"ushort8 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort8 mask);\n"
42238"\n"
42239"int8 __ovld __cnfn shuffle2(int2 x, int2 y, uint8 mask);\n"
42240"int8 __ovld __cnfn shuffle2(int4 x, int4 y, uint8 mask);\n"
42241"int8 __ovld __cnfn shuffle2(int8 x, int8 y, uint8 mask);\n"
42242"int8 __ovld __cnfn shuffle2(int16 x, int16 y, uint8 mask);\n"
42243"\n"
42244"uint8 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint8 mask);\n"
42245"uint8 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint8 mask);\n"
42246"uint8 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint8 mask);\n"
42247"uint8 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint8 mask);\n"
42248"\n"
42249"long8 __ovld __cnfn shuffle2(long2 x, long2 y, ulong8 mask);\n"
42250"long8 __ovld __cnfn shuffle2(long4 x, long4 y, ulong8 mask);\n"
42251"long8 __ovld __cnfn shuffle2(long8 x, long8 y, ulong8 mask);\n"
42252"long8 __ovld __cnfn shuffle2(long16 x, long16 y, ulong8 mask);\n"
42253"\n"
42254"ulong8 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong8 mask);\n"
42255"ulong8 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong8 mask);\n"
42256"ulong8 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong8 mask);\n"
42257"ulong8 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong8 mask);\n"
42258"\n"
42259"float8 __ovld __cnfn shuffle2(float2 x, float2 y, uint8 mask);\n"
42260"float8 __ovld __cnfn shuffle2(float4 x, float4 y, uint8 mask);\n"
42261"float8 __ovld __cnfn shuffle2(float8 x, float8 y, uint8 mask);\n"
42262"float8 __ovld __cnfn shuffle2(float16 x, float16 y, uint8 mask);\n"
42263"\n"
42264"char16 __ovld __cnfn shuffle2(char2 x, char2 y, uchar16 mask);\n"
42265"char16 __ovld __cnfn shuffle2(char4 x, char4 y, uchar16 mask);\n"
42266"char16 __ovld __cnfn shuffle2(char8 x, char8 y, uchar16 mask);\n"
42267"char16 __ovld __cnfn shuffle2(char16 x, char16 y, uchar16 mask);\n"
42268"\n"
42269"uchar16 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar16 mask);\n"
42270"uchar16 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar16 mask);\n"
42271"uchar16 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar16 mask);\n"
42272"uchar16 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar16 mask);\n"
42273"\n"
42274"short16 __ovld __cnfn shuffle2(short2 x, short2 y, ushort16 mask);\n"
42275"short16 __ovld __cnfn shuffle2(short4 x, short4 y, ushort16 mask);\n"
42276"short16 __ovld __cnfn shuffle2(short8 x, short8 y, ushort16 mask);\n"
42277"short16 __ovld __cnfn shuffle2(short16 x, short16 y, ushort16 mask);\n"
42278"\n"
42279"ushort16 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort16 mask);\n"
42280"ushort16 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort16 mask);\n"
42281"ushort16 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort16 mask);\n"
42282"ushort16 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort16 mask);\n"
42283"\n"
42284"int16 __ovld __cnfn shuffle2(int2 x, int2 y, uint16 mask);\n"
42285"int16 __ovld __cnfn shuffle2(int4 x, int4 y, uint16 mask);\n"
42286"int16 __ovld __cnfn shuffle2(int8 x, int8 y, uint16 mask);\n"
42287"int16 __ovld __cnfn shuffle2(int16 x, int16 y, uint16 mask);\n"
42288"\n"
42289"uint16 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint16 mask);\n"
42290"uint16 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint16 mask);\n"
42291"uint16 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint16 mask);\n"
42292"uint16 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint16 mask);\n"
42293"\n"
42294"long16 __ovld __cnfn shuffle2(long2 x, long2 y, ulong16 mask);\n"
42295"long16 __ovld __cnfn shuffle2(long4 x, long4 y, ulong16 mask);\n"
42296"long16 __ovld __cnfn shuffle2(long8 x, long8 y, ulong16 mask);\n"
42297"long16 __ovld __cnfn shuffle2(long16 x, long16 y, ulong16 mask);\n"
42298"\n"
42299"ulong16 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong16 mask);\n"
42300"ulong16 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong16 mask);\n"
42301"ulong16 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong16 mask);\n"
42302"ulong16 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong16 mask);\n"
42303"\n"
42304"float16 __ovld __cnfn shuffle2(float2 x, float2 y, uint16 mask);\n"
42305"float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask);\n"
42306"float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask);\n"
42307"float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask);\n"
42308"\n"
42309"#ifdef cl_khr_fp64\n"
42310"double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask);\n"
42311"double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask);\n"
42312"double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask);\n"
42313"double2 __ovld __cnfn shuffle2(double16 x, double16 y, ulong2 mask);\n"
42314"\n"
42315"double4 __ovld __cnfn shuffle2(double2 x, double2 y, ulong4 mask);\n"
42316"double4 __ovld __cnfn shuffle2(double4 x, double4 y, ulong4 mask);\n"
42317"double4 __ovld __cnfn shuffle2(double8 x, double8 y, ulong4 mask);\n"
42318"double4 __ovld __cnfn shuffle2(double16 x, double16 y, ulong4 mask);\n"
42319"\n"
42320"double8 __ovld __cnfn shuffle2(double2 x, double2 y, ulong8 mask);\n"
42321"double8 __ovld __cnfn shuffle2(double4 x, double4 y, ulong8 mask);\n"
42322"double8 __ovld __cnfn shuffle2(double8 x, double8 y, ulong8 mask);\n"
42323"double8 __ovld __cnfn shuffle2(double16 x, double16 y, ulong8 mask);\n"
42324"\n"
42325"double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask);\n"
42326"double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask);\n"
42327"double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask);\n"
42328"double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask);\n"
42329"#endif //cl_khr_fp64\n"
42330"\n"
42331"#ifdef cl_khr_fp16\n"
42332"half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask);\n"
42333"half2 __ovld __cnfn shuffle2(half4 x, half4 y, ushort2 mask);\n"
42334"half2 __ovld __cnfn shuffle2(half8 x, half8 y, ushort2 mask);\n"
42335"half2 __ovld __cnfn shuffle2(half16 x, half16 y, ushort2 mask);\n"
42336"\n"
42337"half4 __ovld __cnfn shuffle2(half2 x, half2 y, ushort4 mask);\n"
42338"half4 __ovld __cnfn shuffle2(half4 x, half4 y, ushort4 mask);\n"
42339"half4 __ovld __cnfn shuffle2(half8 x, half8 y, ushort4 mask);\n"
42340"half4 __ovld __cnfn shuffle2(half16 x, half16 y, ushort4 mask);\n"
42341"\n"
42342"half8 __ovld __cnfn shuffle2(half2 x, half2 y, ushort8 mask);\n"
42343"half8 __ovld __cnfn shuffle2(half4 x, half4 y, ushort8 mask);\n"
42344"half8 __ovld __cnfn shuffle2(half8 x, half8 y, ushort8 mask);\n"
42345"half8 __ovld __cnfn shuffle2(half16 x, half16 y, ushort8 mask);\n"
42346"\n"
42347"half16 __ovld __cnfn shuffle2(half2 x, half2 y, ushort16 mask);\n"
42348"half16 __ovld __cnfn shuffle2(half4 x, half4 y, ushort16 mask);\n"
42349"half16 __ovld __cnfn shuffle2(half8 x, half8 y, ushort16 mask);\n"
42350"half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask);\n"
42351"#endif //cl_khr_fp16\n"
42352"\n"
42353"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42354"// OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf\n"
42355"\n"
42356"int printf(__constant const char* st, ...);\n"
42357"#endif\n"
42358"\n"
42359"// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions\n"
42360"\n"
42361"// These values need to match the runtime equivalent\n"
42362"//\n"
42363"// Addressing Mode.\n"
42364"//\n"
42365"#define CLK_ADDRESS_NONE 0\n"
42366"#define CLK_ADDRESS_CLAMP_TO_EDGE 2\n"
42367"#define CLK_ADDRESS_CLAMP 4\n"
42368"#define CLK_ADDRESS_REPEAT 6\n"
42369"#define CLK_ADDRESS_MIRRORED_REPEAT 8\n"
42370"\n"
42371"//\n"
42372"// Coordination Normalization\n"
42373"//\n"
42374"#define CLK_NORMALIZED_COORDS_FALSE 0\n"
42375"#define CLK_NORMALIZED_COORDS_TRUE 1\n"
42376"\n"
42377"//\n"
42378"// Filtering Mode.\n"
42379"//\n"
42380"#define CLK_FILTER_NEAREST 0x10\n"
42381"#define CLK_FILTER_LINEAR 0x20\n"
42382"\n"
42383"#ifdef cl_khr_gl_msaa_sharing\n"
42384"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
42385"#endif //cl_khr_gl_msaa_sharing\n"
42386"\n"
42387"/**\n"
42388" * Use the coordinate (coord.xy) to do an element lookup in\n"
42389" * the 2D image object specified by image.\n"
42390" *\n"
42391" * Use the coordinate (coord.x, coord.y, coord.z) to do\n"
42392" * an element lookup in the 3D image object specified\n"
42393" * by image. coord.w is ignored.\n"
42394" *\n"
42395" * Use the coordinate (coord.z) to index into the\n"
42396" * 2D image array object specified by image_array\n"
42397" * and (coord.x, coord.y) to do an element lookup in\n"
42398" * the 2D image object specified by image.\n"
42399" *\n"
42400" * Use the coordinate (x) to do an element lookup in\n"
42401" * the 1D image object specified by image.\n"
42402" *\n"
42403" * Use the coordinate (coord.y) to index into the\n"
42404" * 1D image array object specified by image_array\n"
42405" * and (coord.x) to do an element lookup in\n"
42406" * the 1D image object specified by image.\n"
42407" *\n"
42408" * Use the coordinate (cood.xy) and sample to do an\n"
42409" * element lookup in the 2D multi-sample image specified\n"
42410" * by image.\n"
42411" *\n"
42412" * Use coord.xy and sample to do an element\n"
42413" * lookup in the 2D multi-sample image layer\n"
42414" * identified by index coord.z in the 2D multi-sample\n"
42415" * image array specified by image.\n"
42416" *\n"
42417" * For mipmap images, use the mip-level specified by\n"
42418" * the Level-of-Detail (lod) or use gradients for LOD\n"
42419" * computation.\n"
42420" *\n"
42421" * read_imagef returns floating-point values in the\n"
42422" * range [0.0 ... 1.0] for image objects created with\n"
42423" * image_channel_data_type set to one of the predefined\n"
42424" * packed formats or CL_UNORM_INT8, or\n"
42425" * CL_UNORM_INT16.\n"
42426" *\n"
42427" * read_imagef returns floating-point values in the\n"
42428" * range [-1.0 ... 1.0] for image objects created with\n"
42429" * image_channel_data_type set to CL_SNORM_INT8,\n"
42430" * or CL_SNORM_INT16.\n"
42431" *\n"
42432" * read_imagef returns floating-point values for image\n"
42433" * objects created with image_channel_data_type set to\n"
42434" * CL_HALF_FLOAT or CL_FLOAT.\n"
42435" *\n"
42436" * read_imagei and read_imageui return\n"
42437" * unnormalized signed integer and unsigned integer\n"
42438" * values respectively. Each channel will be stored in a\n"
42439" * 32-bit integer.\n"
42440" *\n"
42441" * read_imagei can only be used with image objects\n"
42442" * created with image_channel_data_type set to one of\n"
42443" * the following values:\n"
42444" * CL_SIGNED_INT8,\n"
42445" * CL_SIGNED_INT16 and\n"
42446" * CL_SIGNED_INT32.\n"
42447" * If the image_channel_data_type is not one of the\n"
42448" * above values, the values returned by read_imagei\n"
42449" * are undefined.\n"
42450" *\n"
42451" * read_imageui can only be used with image objects\n"
42452" * created with image_channel_data_type set to one of\n"
42453" * the following values:\n"
42454" * CL_UNSIGNED_INT8,\n"
42455" * CL_UNSIGNED_INT16 and\n"
42456" * CL_UNSIGNED_INT32.\n"
42457" * If the image_channel_data_type is not one of the\n"
42458" * above values, the values returned by read_imageui\n"
42459" * are undefined.\n"
42460" *\n"
42461" * The read_image{i|ui} calls support a nearest filter\n"
42462" * only. The filter_mode specified in sampler\n"
42463" * must be set to CLK_FILTER_NEAREST; otherwise\n"
42464" * the values returned are undefined.\n"
42465"\n"
42466" * The read_image{f|i|ui} calls that take\n"
42467" * integer coordinates must use a sampler with\n"
42468" * normalized coordinates set to\n"
42469" * CLK_NORMALIZED_COORDS_FALSE and\n"
42470" * addressing mode set to\n"
42471" * CLK_ADDRESS_CLAMP_TO_EDGE,\n"
42472" * CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE;\n"
42473" * otherwise the values returned are undefined.\n"
42474" *\n"
42475" * Values returned by read_imagef for image objects\n"
42476" * with image_channel_data_type values not specified\n"
42477" * in the description above are undefined.\n"
42478" */\n"
42479"\n"
42480"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
42481"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
42482"\n"
42483"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
42484"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
42485"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
42486"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
42487"\n"
42488"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
42489"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
42490"\n"
42491"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
42492"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
42493"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
42494"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
42495"\n"
42496"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42497"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n"
42498"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n"
42499"\n"
42500"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n"
42501"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n"
42502"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n"
42503"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n"
42504"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42505"\n"
42506"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);\n"
42507"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);\n"
42508"\n"
42509"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, int coord);\n"
42510"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord);\n"
42511"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);\n"
42512"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);\n"
42513"\n"
42514"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42515"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n"
42516"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n"
42517"\n"
42518"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n"
42519"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n"
42520"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n"
42521"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n"
42522"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42523"\n"
42524"#ifdef cl_khr_depth_images\n"
42525"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);\n"
42526"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, int2 coord);\n"
42527"\n"
42528"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord);\n"
42529"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, int4 coord);\n"
42530"#endif //cl_khr_depth_images\n"
42531"\n"
42532"#if defined(cl_khr_gl_msaa_sharing)\n"
42533"float4 __purefn __ovld read_imagef(read_only image2d_msaa_t image, int2 coord, int sample);\n"
42534"int4 __purefn __ovld read_imagei(read_only image2d_msaa_t image, int2 coord, int sample);\n"
42535"uint4 __purefn __ovld read_imageui(read_only image2d_msaa_t image, int2 coord, int sample);\n"
42536"\n"
42537"float __purefn __ovld read_imagef(read_only image2d_msaa_depth_t image, int2 coord, int sample);\n"
42538"\n"
42539"float4 __purefn __ovld read_imagef(read_only image2d_array_msaa_t image, int4 coord, int sample);\n"
42540"int4 __purefn __ovld read_imagei(read_only image2d_array_msaa_t image, int4 coord, int sample);\n"
42541"uint4 __purefn __ovld read_imageui(read_only image2d_array_msaa_t image, int4 coord, int sample);\n"
42542"\n"
42543"float __purefn __ovld read_imagef(read_only image2d_array_msaa_depth_t image, int4 coord, int sample);\n"
42544"#endif //cl_khr_gl_msaa_sharing\n"
42545"\n"
42546"// OpenCL Extension v2.0 s9.18 - Mipmaps\n"
42547"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42548"#ifdef cl_khr_mipmap_image\n"
42549"\n"
42550"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42551"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42552"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42553"\n"
42554"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42555"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42556"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42557"\n"
42558"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42559"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42560"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42561"\n"
42562"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
42563"\n"
42564"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42565"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42566"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42567"\n"
42568"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
42569"\n"
42570"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42571"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42572"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42573"\n"
42574"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42575"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42576"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42577"\n"
42578"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42579"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42580"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42581"\n"
42582"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42583"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42584"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42585"\n"
42586"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42587"\n"
42588"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42589"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42590"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42591"\n"
42592"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42593"\n"
42594"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42595"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42596"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42597"\n"
42598"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42599"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42600"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42601"\n"
42602"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42603"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42604"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42605"\n"
42606"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42607"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42608"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42609"\n"
42610"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
42611"\n"
42612"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42613"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42614"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42615"\n"
42616"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
42617"\n"
42618"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42619"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42620"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42621"\n"
42622"#endif //cl_khr_mipmap_image\n"
42623"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42624"\n"
42625"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42626"\n"
42627"/**\n"
42628"* Sampler-less Image Access\n"
42629"*/\n"
42630"\n"
42631"float4 __purefn __ovld read_imagef(read_only image1d_t image, int coord);\n"
42632"int4 __purefn __ovld read_imagei(read_only image1d_t image, int coord);\n"
42633"uint4 __purefn __ovld read_imageui(read_only image1d_t image, int coord);\n"
42634"\n"
42635"float4 __purefn __ovld read_imagef(read_only image1d_buffer_t image, int coord);\n"
42636"int4 __purefn __ovld read_imagei(read_only image1d_buffer_t image, int coord);\n"
42637"uint4 __purefn __ovld read_imageui(read_only image1d_buffer_t image, int coord);\n"
42638"\n"
42639"float4 __purefn __ovld read_imagef(read_only image1d_array_t image, int2 coord);\n"
42640"int4 __purefn __ovld read_imagei(read_only image1d_array_t image, int2 coord);\n"
42641"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image, int2 coord);\n"
42642"\n"
42643"float4 __purefn __ovld read_imagef(read_only image2d_t image, int2 coord);\n"
42644"int4 __purefn __ovld read_imagei(read_only image2d_t image, int2 coord);\n"
42645"uint4 __purefn __ovld read_imageui(read_only image2d_t image, int2 coord);\n"
42646"\n"
42647"float4 __purefn __ovld read_imagef(read_only image2d_array_t image, int4 coord);\n"
42648"int4 __purefn __ovld read_imagei(read_only image2d_array_t image, int4 coord);\n"
42649"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image, int4 coord);\n"
42650"\n"
42651"#ifdef cl_khr_depth_images\n"
42652"float __purefn __ovld read_imagef(read_only image2d_depth_t image, int2 coord);\n"
42653"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, int4 coord);\n"
42654"#endif //cl_khr_depth_images\n"
42655"\n"
42656"float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);\n"
42657"int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);\n"
42658"uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);\n"
42659"\n"
42660"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42661"\n"
42662"// Image read functions returning half4 type\n"
42663"#ifdef cl_khr_fp16\n"
42664"half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, int coord);\n"
42665"half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, float coord);\n"
42666"half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
42667"half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
42668"half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
42669"half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
42670"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42671"half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);\n"
42672"half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);\n"
42673"half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);\n"
42674"half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, float4 coord);\n"
42675"/**\n"
42676" * Sampler-less Image Access\n"
42677" */\n"
42678"half4 __purefn __ovld read_imageh(read_only image1d_t image, int coord);\n"
42679"half4 __purefn __ovld read_imageh(read_only image2d_t image, int2 coord);\n"
42680"half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);\n"
42681"half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);\n"
42682"half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);\n"
42683"half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);\n"
42684"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42685"#endif //cl_khr_fp16\n"
42686"\n"
42687"// Image read functions for read_write images\n"
42688"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42689"float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);\n"
42690"int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);\n"
42691"uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);\n"
42692"\n"
42693"float4 __purefn __ovld read_imagef(read_write image1d_buffer_t image, int coord);\n"
42694"int4 __purefn __ovld read_imagei(read_write image1d_buffer_t image, int coord);\n"
42695"uint4 __purefn __ovld read_imageui(read_write image1d_buffer_t image, int coord);\n"
42696"\n"
42697"float4 __purefn __ovld read_imagef(read_write image1d_array_t image, int2 coord);\n"
42698"int4 __purefn __ovld read_imagei(read_write image1d_array_t image, int2 coord);\n"
42699"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image, int2 coord);\n"
42700"\n"
42701"float4 __purefn __ovld read_imagef(read_write image2d_t image, int2 coord);\n"
42702"int4 __purefn __ovld read_imagei(read_write image2d_t image, int2 coord);\n"
42703"uint4 __purefn __ovld read_imageui(read_write image2d_t image, int2 coord);\n"
42704"\n"
42705"float4 __purefn __ovld read_imagef(read_write image2d_array_t image, int4 coord);\n"
42706"int4 __purefn __ovld read_imagei(read_write image2d_array_t image, int4 coord);\n"
42707"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image, int4 coord);\n"
42708"\n"
42709"float4 __purefn __ovld read_imagef(read_write image3d_t image, int4 coord);\n"
42710"int4 __purefn __ovld read_imagei(read_write image3d_t image, int4 coord);\n"
42711"uint4 __purefn __ovld read_imageui(read_write image3d_t image, int4 coord);\n"
42712"\n"
42713"#ifdef cl_khr_depth_images\n"
42714"float __purefn __ovld read_imagef(read_write image2d_depth_t image, int2 coord);\n"
42715"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, int4 coord);\n"
42716"#endif //cl_khr_depth_images\n"
42717"\n"
42718"#if cl_khr_gl_msaa_sharing\n"
42719"float4 __purefn __ovld read_imagef(read_write image2d_msaa_t image, int2 coord, int sample);\n"
42720"int4 __purefn __ovld read_imagei(read_write image2d_msaa_t image, int2 coord, int sample);\n"
42721"uint4 __purefn __ovld read_imageui(read_write image2d_msaa_t image, int2 coord, int sample);\n"
42722"\n"
42723"float4 __purefn __ovld read_imagef(read_write image2d_array_msaa_t image, int4 coord, int sample);\n"
42724"int4 __purefn __ovld read_imagei(read_write image2d_array_msaa_t image, int4 coord, int sample);\n"
42725"uint4 __purefn __ovld read_imageui(read_write image2d_array_msaa_t image, int4 coord, int sample);\n"
42726"\n"
42727"float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 coord, int sample);\n"
42728"float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);\n"
42729"#endif //cl_khr_gl_msaa_sharing\n"
42730"\n"
42731"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42732"#ifdef cl_khr_mipmap_image\n"
42733"float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42734"int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42735"uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42736"\n"
42737"float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42738"int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42739"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42740"\n"
42741"float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42742"int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42743"uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42744"\n"
42745"float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
42746"\n"
42747"float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42748"int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42749"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42750"\n"
42751"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
42752"\n"
42753"float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42754"int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42755"uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42756"\n"
42757"float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42758"int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42759"uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42760"\n"
42761"float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42762"int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42763"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42764"\n"
42765"float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42766"int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42767"uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42768"\n"
42769"float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42770"\n"
42771"float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42772"int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42773"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42774"\n"
42775"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42776"\n"
42777"float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42778"int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42779"uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42780"\n"
42781"float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42782"int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42783"uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42784"\n"
42785"float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42786"int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42787"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42788"\n"
42789"float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42790"int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42791"uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42792"\n"
42793"float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
42794"\n"
42795"float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42796"int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42797"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42798"\n"
42799"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
42800"\n"
42801"float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42802"int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42803"uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42804"#endif //cl_khr_mipmap_image\n"
42805"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42806"\n"
42807"// Image read functions returning half4 type\n"
42808"#ifdef cl_khr_fp16\n"
42809"half4 __purefn __ovld read_imageh(read_write image1d_t image, int coord);\n"
42810"half4 __purefn __ovld read_imageh(read_write image2d_t image, int2 coord);\n"
42811"half4 __purefn __ovld read_imageh(read_write image3d_t image, int4 coord);\n"
42812"half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);\n"
42813"half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);\n"
42814"half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);\n"
42815"#endif //cl_khr_fp16\n"
42816"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42817"\n"
42818"/**\n"
42819" * Write color value to location specified by coordinate\n"
42820" * (coord.x, coord.y) in the 2D image object specified by image.\n"
42821" * (coord.x, coord.y) are considered to be unnormalized coordinates\n"
42822" * and must be in the range 0 ... image width - 1, and 0\n"
42823" * ... image height - 1.\n"
42824"\n"
42825" * Write color value to location specified by coordinate\n"
42826" * (coord.x, coord.y) in the 2D image object specified by index\n"
42827" * (coord.z) of the 2D image array object image_array.\n"
42828" * (coord.x, coord.y) are considered to be unnormalized\n"
42829" * coordinates and must be in the range 0 ... image width\n"
42830" * - 1.\n"
42831" *\n"
42832" * Write color value to location specified by coordinate\n"
42833" * (coord) in the 1D image (buffer) object specified by image.\n"
42834" * coord is considered to be unnormalized coordinates\n"
42835" * and must be in the range 0 ... image width - 1.\n"
42836" *\n"
42837" * Write color value to location specified by coordinate\n"
42838" * (coord.x) in the 1D image object specified by index\n"
42839" * (coord.y) of the 1D image array object image_array.\n"
42840" * x is considered to be unnormalized coordinates\n"
42841" * and must be in the range 0 ... image width - 1.\n"
42842" *\n"
42843" * Write color value to location specified by coordinate\n"
42844" * (coord.x, coord.y, coord.z) in the 3D image object specified by image.\n"
42845" * coord.x & coord.y are considered to be unnormalized coordinates\n"
42846" * and must be in the range 0 ... image width - 1, and 0\n"
42847" * ... image height - 1.\n"
42848" *\n"
42849" * For mipmap images, use mip-level specified by lod.\n"
42850" *\n"
42851" * Appropriate data format conversion to the specified\n"
42852" * image format is done before writing the color value.\n"
42853" *\n"
42854" * write_imagef can only be used with image objects\n"
42855" * created with image_channel_data_type set to one of\n"
42856" * the pre-defined packed formats or set to\n"
42857" * CL_SNORM_INT8, CL_UNORM_INT8,\n"
42858" * CL_SNORM_INT16, CL_UNORM_INT16,\n"
42859" * CL_HALF_FLOAT or CL_FLOAT. Appropriate data\n"
42860" * format conversion will be done to convert channel\n"
42861" * data from a floating-point value to actual data format\n"
42862" * in which the channels are stored.\n"
42863" *\n"
42864" * write_imagei can only be used with image objects\n"
42865" * created with image_channel_data_type set to one of\n"
42866" * the following values:\n"
42867" * CL_SIGNED_INT8,\n"
42868" * CL_SIGNED_INT16 and\n"
42869" * CL_SIGNED_INT32.\n"
42870" *\n"
42871" * write_imageui can only be used with image objects\n"
42872" * created with image_channel_data_type set to one of\n"
42873" * the following values:\n"
42874" * CL_UNSIGNED_INT8,\n"
42875" * CL_UNSIGNED_INT16 and\n"
42876" * CL_UNSIGNED_INT32.\n"
42877" *\n"
42878" * The behavior of write_imagef, write_imagei and\n"
42879" * write_imageui for image objects created with\n"
42880" * image_channel_data_type values not specified in\n"
42881" * the description above or with (x, y) coordinate\n"
42882" * values that are not in the range (0 ... image width -1,\n"
42883" * 0 ... image height - 1), respectively, is undefined.\n"
42884" */\n"
42885"void __ovld write_imagef(write_only image2d_t image, int2 coord, float4 color);\n"
42886"void __ovld write_imagei(write_only image2d_t image, int2 coord, int4 color);\n"
42887"void __ovld write_imageui(write_only image2d_t image, int2 coord, uint4 color);\n"
42888"\n"
42889"void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, float4 color);\n"
42890"void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int4 color);\n"
42891"void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, uint4 color);\n"
42892"\n"
42893"void __ovld write_imagef(write_only image1d_t image, int coord, float4 color);\n"
42894"void __ovld write_imagei(write_only image1d_t image, int coord, int4 color);\n"
42895"void __ovld write_imageui(write_only image1d_t image, int coord, uint4 color);\n"
42896"\n"
42897"void __ovld write_imagef(write_only image1d_buffer_t image, int coord, float4 color);\n"
42898"void __ovld write_imagei(write_only image1d_buffer_t image, int coord, int4 color);\n"
42899"void __ovld write_imageui(write_only image1d_buffer_t image, int coord, uint4 color);\n"
42900"\n"
42901"void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, float4 color);\n"
42902"void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color);\n"
42903"void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color);\n"
42904"\n"
42905"#ifdef cl_khr_3d_image_writes\n"
42906"void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color);\n"
42907"void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color);\n"
42908"void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color);\n"
42909"#endif\n"
42910"\n"
42911"#ifdef cl_khr_depth_images\n"
42912"void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, float color);\n"
42913"void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, float color);\n"
42914"#endif //cl_khr_depth_images\n"
42915"\n"
42916"// OpenCL Extension v2.0 s9.18 - Mipmaps\n"
42917"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42918"#ifdef cl_khr_mipmap_image\n"
42919"void __ovld write_imagef(write_only image1d_t image, int coord, int lod, float4 color);\n"
42920"void __ovld write_imagei(write_only image1d_t image, int coord, int lod, int4 color);\n"
42921"void __ovld write_imageui(write_only image1d_t image, int coord, int lod, uint4 color);\n"
42922"\n"
42923"void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, int lod, float4 color);\n"
42924"void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int lod, int4 color);\n"
42925"void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, int lod, uint4 color);\n"
42926"\n"
42927"void __ovld write_imagef(write_only image2d_t image, int2 coord, int lod, float4 color);\n"
42928"void __ovld write_imagei(write_only image2d_t image, int2 coord, int lod, int4 color);\n"
42929"void __ovld write_imageui(write_only image2d_t image, int2 coord, int lod, uint4 color);\n"
42930"\n"
42931"void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, int lod, float4 color);\n"
42932"void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int lod, int4 color);\n"
42933"void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, int lod, uint4 color);\n"
42934"\n"
42935"void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float color);\n"
42936"void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float color);\n"
42937"\n"
42938"#ifdef cl_khr_3d_image_writes\n"
42939"void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color);\n"
42940"void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color);\n"
42941"void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color);\n"
42942"#endif\n"
42943"#endif //cl_khr_mipmap_image\n"
42944"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42945"\n"
42946"// Image write functions for half4 type\n"
42947"#ifdef cl_khr_fp16\n"
42948"void __ovld write_imageh(write_only image1d_t image, int coord, half4 color);\n"
42949"void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color);\n"
42950"#ifdef cl_khr_3d_image_writes\n"
42951"void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color);\n"
42952"#endif\n"
42953"void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color);\n"
42954"void __ovld write_imageh(write_only image2d_array_t image, int4 coord, half4 color);\n"
42955"void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 color);\n"
42956"#endif //cl_khr_fp16\n"
42957"\n"
42958"// Image write functions for read_write images\n"
42959"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42960"void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);\n"
42961"void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);\n"
42962"void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);\n"
42963"\n"
42964"void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, float4 color);\n"
42965"void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int4 color);\n"
42966"void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, uint4 color);\n"
42967"\n"
42968"void __ovld write_imagef(read_write image1d_t image, int coord, float4 color);\n"
42969"void __ovld write_imagei(read_write image1d_t image, int coord, int4 color);\n"
42970"void __ovld write_imageui(read_write image1d_t image, int coord, uint4 color);\n"
42971"\n"
42972"void __ovld write_imagef(read_write image1d_buffer_t image, int coord, float4 color);\n"
42973"void __ovld write_imagei(read_write image1d_buffer_t image, int coord, int4 color);\n"
42974"void __ovld write_imageui(read_write image1d_buffer_t image, int coord, uint4 color);\n"
42975"\n"
42976"void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, float4 color);\n"
42977"void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color);\n"
42978"void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color);\n"
42979"\n"
42980"#ifdef cl_khr_3d_image_writes\n"
42981"void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color);\n"
42982"void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color);\n"
42983"void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color);\n"
42984"#endif\n"
42985"\n"
42986"#ifdef cl_khr_depth_images\n"
42987"void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float color);\n"
42988"void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);\n"
42989"#endif //cl_khr_depth_images\n"
42990"\n"
42991"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42992"#ifdef cl_khr_mipmap_image\n"
42993"void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);\n"
42994"void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);\n"
42995"void __ovld write_imageui(read_write image1d_t image, int coord, int lod, uint4 color);\n"
42996"\n"
42997"void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, int lod, float4 color);\n"
42998"void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int lod, int4 color);\n"
42999"void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, int lod, uint4 color);\n"
43000"\n"
43001"void __ovld write_imagef(read_write image2d_t image, int2 coord, int lod, float4 color);\n"
43002"void __ovld write_imagei(read_write image2d_t image, int2 coord, int lod, int4 color);\n"
43003"void __ovld write_imageui(read_write image2d_t image, int2 coord, int lod, uint4 color);\n"
43004"\n"
43005"void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, int lod, float4 color);\n"
43006"void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int lod, int4 color);\n"
43007"void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, int lod, uint4 color);\n"
43008"\n"
43009"void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color);\n"
43010"void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color);\n"
43011"\n"
43012"#ifdef cl_khr_3d_image_writes\n"
43013"void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color);\n"
43014"void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color);\n"
43015"void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color);\n"
43016"#endif\n"
43017"#endif //cl_khr_mipmap_image\n"
43018"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43019"\n"
43020"// Image write functions for half4 type\n"
43021"#ifdef cl_khr_fp16\n"
43022"void __ovld write_imageh(read_write image1d_t image, int coord, half4 color);\n"
43023"void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color);\n"
43024"#ifdef cl_khr_3d_image_writes\n"
43025"void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color);\n"
43026"#endif\n"
43027"void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color);\n"
43028"void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);\n"
43029"void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);\n"
43030"#endif //cl_khr_fp16\n"
43031"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43032"\n"
43033"// Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have\n"
43034"// access qualifier, which by default assume read_only access qualifier. Image query builtin\n"
43035"// functions with write_only image argument should also be declared.\n"
43036"\n"
43037"/**\n"
43038" * Return the image width in pixels.\n"
43039" *\n"
43040" */\n"
43041"int __ovld __cnfn get_image_width(read_only image1d_t image);\n"
43042"int __ovld __cnfn get_image_width(read_only image1d_buffer_t image);\n"
43043"int __ovld __cnfn get_image_width(read_only image2d_t image);\n"
43044"#ifdef cl_khr_3d_image_writes\n"
43045"int __ovld __cnfn get_image_width(read_only image3d_t image);\n"
43046"#endif\n"
43047"int __ovld __cnfn get_image_width(read_only image1d_array_t image);\n"
43048"int __ovld __cnfn get_image_width(read_only image2d_array_t image);\n"
43049"#ifdef cl_khr_depth_images\n"
43050"int __ovld __cnfn get_image_width(read_only image2d_depth_t image);\n"
43051"int __ovld __cnfn get_image_width(read_only image2d_array_depth_t image);\n"
43052"#endif //cl_khr_depth_images\n"
43053"#if defined(cl_khr_gl_msaa_sharing)\n"
43054"int __ovld __cnfn get_image_width(read_only image2d_msaa_t image);\n"
43055"int __ovld __cnfn get_image_width(read_only image2d_msaa_depth_t image);\n"
43056"int __ovld __cnfn get_image_width(read_only image2d_array_msaa_t image);\n"
43057"int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image);\n"
43058"#endif //cl_khr_gl_msaa_sharing\n"
43059"\n"
43060"int __ovld __cnfn get_image_width(write_only image1d_t image);\n"
43061"int __ovld __cnfn get_image_width(write_only image1d_buffer_t image);\n"
43062"int __ovld __cnfn get_image_width(write_only image2d_t image);\n"
43063"#ifdef cl_khr_3d_image_writes\n"
43064"int __ovld __cnfn get_image_width(write_only image3d_t image);\n"
43065"#endif\n"
43066"int __ovld __cnfn get_image_width(write_only image1d_array_t image);\n"
43067"int __ovld __cnfn get_image_width(write_only image2d_array_t image);\n"
43068"#ifdef cl_khr_depth_images\n"
43069"int __ovld __cnfn get_image_width(write_only image2d_depth_t image);\n"
43070"int __ovld __cnfn get_image_width(write_only image2d_array_depth_t image);\n"
43071"#endif //cl_khr_depth_images\n"
43072"#if defined(cl_khr_gl_msaa_sharing)\n"
43073"int __ovld __cnfn get_image_width(write_only image2d_msaa_t image);\n"
43074"int __ovld __cnfn get_image_width(write_only image2d_msaa_depth_t image);\n"
43075"int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image);\n"
43076"int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);\n"
43077"#endif //cl_khr_gl_msaa_sharing\n"
43078"\n"
43079"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43080"int __ovld __cnfn get_image_width(read_write image1d_t image);\n"
43081"int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);\n"
43082"int __ovld __cnfn get_image_width(read_write image2d_t image);\n"
43083"int __ovld __cnfn get_image_width(read_write image3d_t image);\n"
43084"int __ovld __cnfn get_image_width(read_write image1d_array_t image);\n"
43085"int __ovld __cnfn get_image_width(read_write image2d_array_t image);\n"
43086"#ifdef cl_khr_depth_images\n"
43087"int __ovld __cnfn get_image_width(read_write image2d_depth_t image);\n"
43088"int __ovld __cnfn get_image_width(read_write image2d_array_depth_t image);\n"
43089"#endif //cl_khr_depth_images\n"
43090"#if defined(cl_khr_gl_msaa_sharing)\n"
43091"int __ovld __cnfn get_image_width(read_write image2d_msaa_t image);\n"
43092"int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);\n"
43093"int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);\n"
43094"int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);\n"
43095"#endif //cl_khr_gl_msaa_sharing\n"
43096"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43097"\n"
43098"/**\n"
43099" * Return the image height in pixels.\n"
43100" */\n"
43101"int __ovld __cnfn get_image_height(read_only image2d_t image);\n"
43102"int __ovld __cnfn get_image_height(read_only image3d_t image);\n"
43103"int __ovld __cnfn get_image_height(read_only image2d_array_t image);\n"
43104"#ifdef cl_khr_depth_images\n"
43105"int __ovld __cnfn get_image_height(read_only image2d_depth_t image);\n"
43106"int __ovld __cnfn get_image_height(read_only image2d_array_depth_t image);\n"
43107"#endif //cl_khr_depth_images\n"
43108"#if defined(cl_khr_gl_msaa_sharing)\n"
43109"int __ovld __cnfn get_image_height(read_only image2d_msaa_t image);\n"
43110"int __ovld __cnfn get_image_height(read_only image2d_msaa_depth_t image);\n"
43111"int __ovld __cnfn get_image_height(read_only image2d_array_msaa_t image);\n"
43112"int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image);\n"
43113"#endif //cl_khr_gl_msaa_sharing\n"
43114"\n"
43115"int __ovld __cnfn get_image_height(write_only image2d_t image);\n"
43116"#ifdef cl_khr_3d_image_writes\n"
43117"int __ovld __cnfn get_image_height(write_only image3d_t image);\n"
43118"#endif\n"
43119"int __ovld __cnfn get_image_height(write_only image2d_array_t image);\n"
43120"#ifdef cl_khr_depth_images\n"
43121"int __ovld __cnfn get_image_height(write_only image2d_depth_t image);\n"
43122"int __ovld __cnfn get_image_height(write_only image2d_array_depth_t image);\n"
43123"#endif //cl_khr_depth_images\n"
43124"#if defined(cl_khr_gl_msaa_sharing)\n"
43125"int __ovld __cnfn get_image_height(write_only image2d_msaa_t image);\n"
43126"int __ovld __cnfn get_image_height(write_only image2d_msaa_depth_t image);\n"
43127"int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image);\n"
43128"int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);\n"
43129"#endif //cl_khr_gl_msaa_sharing\n"
43130"\n"
43131"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43132"int __ovld __cnfn get_image_height(read_write image2d_t image);\n"
43133"int __ovld __cnfn get_image_height(read_write image3d_t image);\n"
43134"int __ovld __cnfn get_image_height(read_write image2d_array_t image);\n"
43135"#ifdef cl_khr_depth_images\n"
43136"int __ovld __cnfn get_image_height(read_write image2d_depth_t image);\n"
43137"int __ovld __cnfn get_image_height(read_write image2d_array_depth_t image);\n"
43138"#endif //cl_khr_depth_images\n"
43139"#if defined(cl_khr_gl_msaa_sharing)\n"
43140"int __ovld __cnfn get_image_height(read_write image2d_msaa_t image);\n"
43141"int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);\n"
43142"int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);\n"
43143"int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);\n"
43144"#endif //cl_khr_gl_msaa_sharing\n"
43145"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43146"\n"
43147"/**\n"
43148" * Return the image depth in pixels.\n"
43149" */\n"
43150"int __ovld __cnfn get_image_depth(read_only image3d_t image);\n"
43151"\n"
43152"#ifdef cl_khr_3d_image_writes\n"
43153"int __ovld __cnfn get_image_depth(write_only image3d_t image);\n"
43154"#endif\n"
43155"\n"
43156"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43157"int __ovld __cnfn get_image_depth(read_write image3d_t image);\n"
43158"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43159"\n"
43160"// OpenCL Extension v2.0 s9.18 - Mipmaps\n"
43161"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43162"#ifdef cl_khr_mipmap_image\n"
43163"/**\n"
43164" * Return the image miplevels.\n"
43165" */\n"
43166"\n"
43167"int __ovld get_image_num_mip_levels(read_only image1d_t image);\n"
43168"int __ovld get_image_num_mip_levels(read_only image2d_t image);\n"
43169"int __ovld get_image_num_mip_levels(read_only image3d_t image);\n"
43170"\n"
43171"int __ovld get_image_num_mip_levels(write_only image1d_t image);\n"
43172"int __ovld get_image_num_mip_levels(write_only image2d_t image);\n"
43173"#ifdef cl_khr_3d_image_writes\n"
43174"int __ovld get_image_num_mip_levels(write_only image3d_t image);\n"
43175"#endif\n"
43176"\n"
43177"int __ovld get_image_num_mip_levels(read_write image1d_t image);\n"
43178"int __ovld get_image_num_mip_levels(read_write image2d_t image);\n"
43179"int __ovld get_image_num_mip_levels(read_write image3d_t image);\n"
43180"\n"
43181"int __ovld get_image_num_mip_levels(read_only image1d_array_t image);\n"
43182"int __ovld get_image_num_mip_levels(read_only image2d_array_t image);\n"
43183"int __ovld get_image_num_mip_levels(read_only image2d_array_depth_t image);\n"
43184"int __ovld get_image_num_mip_levels(read_only image2d_depth_t image);\n"
43185"\n"
43186"int __ovld get_image_num_mip_levels(write_only image1d_array_t image);\n"
43187"int __ovld get_image_num_mip_levels(write_only image2d_array_t image);\n"
43188"int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image);\n"
43189"int __ovld get_image_num_mip_levels(write_only image2d_depth_t image);\n"
43190"\n"
43191"int __ovld get_image_num_mip_levels(read_write image1d_array_t image);\n"
43192"int __ovld get_image_num_mip_levels(read_write image2d_array_t image);\n"
43193"int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);\n"
43194"int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);\n"
43195"\n"
43196"#endif //cl_khr_mipmap_image\n"
43197"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43198"\n"
43199"/**\n"
43200" * Return the channel data type. Valid values are:\n"
43201" * CLK_SNORM_INT8\n"
43202" * CLK_SNORM_INT16\n"
43203" * CLK_UNORM_INT8\n"
43204" * CLK_UNORM_INT16\n"
43205" * CLK_UNORM_SHORT_565\n"
43206" * CLK_UNORM_SHORT_555\n"
43207" * CLK_UNORM_SHORT_101010\n"
43208" * CLK_SIGNED_INT8\n"
43209" * CLK_SIGNED_INT16\n"
43210" * CLK_SIGNED_INT32\n"
43211" * CLK_UNSIGNED_INT8\n"
43212" * CLK_UNSIGNED_INT16\n"
43213" * CLK_UNSIGNED_INT32\n"
43214" * CLK_HALF_FLOAT\n"
43215" * CLK_FLOAT\n"
43216" */\n"
43217"\n"
43218"//\n"
43219"// Channel Datatype.\n"
43220"//\n"
43221"#define CLK_SNORM_INT8 0x10D0\n"
43222"#define CLK_SNORM_INT16 0x10D1\n"
43223"#define CLK_UNORM_INT8 0x10D2\n"
43224"#define CLK_UNORM_INT16 0x10D3\n"
43225"#define CLK_UNORM_SHORT_565 0x10D4\n"
43226"#define CLK_UNORM_SHORT_555 0x10D5\n"
43227"#define CLK_UNORM_INT_101010 0x10D6\n"
43228"#define CLK_SIGNED_INT8 0x10D7\n"
43229"#define CLK_SIGNED_INT16 0x10D8\n"
43230"#define CLK_SIGNED_INT32 0x10D9\n"
43231"#define CLK_UNSIGNED_INT8 0x10DA\n"
43232"#define CLK_UNSIGNED_INT16 0x10DB\n"
43233"#define CLK_UNSIGNED_INT32 0x10DC\n"
43234"#define CLK_HALF_FLOAT 0x10DD\n"
43235"#define CLK_FLOAT 0x10DE\n"
43236"#define CLK_UNORM_INT24 0x10DF\n"
43237"\n"
43238"int __ovld __cnfn get_image_channel_data_type(read_only image1d_t image);\n"
43239"int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t image);\n"
43240"int __ovld __cnfn get_image_channel_data_type(read_only image2d_t image);\n"
43241"int __ovld __cnfn get_image_channel_data_type(read_only image3d_t image);\n"
43242"int __ovld __cnfn get_image_channel_data_type(read_only image1d_array_t image);\n"
43243"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_t image);\n"
43244"#ifdef cl_khr_depth_images\n"
43245"int __ovld __cnfn get_image_channel_data_type(read_only image2d_depth_t image);\n"
43246"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_depth_t image);\n"
43247"#endif //cl_khr_depth_images\n"
43248"#if defined(cl_khr_gl_msaa_sharing)\n"
43249"int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_t image);\n"
43250"int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_depth_t image);\n"
43251"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_t image);\n"
43252"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth_t image);\n"
43253"#endif //cl_khr_gl_msaa_sharing\n"
43254"\n"
43255"int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image);\n"
43256"int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image);\n"
43257"int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image);\n"
43258"#ifdef cl_khr_3d_image_writes\n"
43259"int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image);\n"
43260"#endif\n"
43261"int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image);\n"
43262"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_t image);\n"
43263"#ifdef cl_khr_depth_images\n"
43264"int __ovld __cnfn get_image_channel_data_type(write_only image2d_depth_t image);\n"
43265"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_depth_t image);\n"
43266"#endif //cl_khr_depth_images\n"
43267"#if defined(cl_khr_gl_msaa_sharing)\n"
43268"int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_t image);\n"
43269"int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_depth_t image);\n"
43270"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t image);\n"
43271"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image);\n"
43272"#endif //cl_khr_gl_msaa_sharing\n"
43273"\n"
43274"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43275"int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);\n"
43276"int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);\n"
43277"int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);\n"
43278"int __ovld __cnfn get_image_channel_data_type(read_write image3d_t image);\n"
43279"int __ovld __cnfn get_image_channel_data_type(read_write image1d_array_t image);\n"
43280"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_t image);\n"
43281"#ifdef cl_khr_depth_images\n"
43282"int __ovld __cnfn get_image_channel_data_type(read_write image2d_depth_t image);\n"
43283"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_depth_t image);\n"
43284"#endif //cl_khr_depth_images\n"
43285"#if defined(cl_khr_gl_msaa_sharing)\n"
43286"int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_t image);\n"
43287"int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t image);\n"
43288"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);\n"
43289"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);\n"
43290"#endif //cl_khr_gl_msaa_sharing\n"
43291"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43292"\n"
43293"/**\n"
43294" * Return the image channel order. Valid values are:\n"
43295" * CLK_A\n"
43296" * CLK_R\n"
43297" * CLK_Rx\n"
43298" * CLK_RG\n"
43299" * CLK_RGx\n"
43300" * CLK_RA\n"
43301" * CLK_RGB\n"
43302" * CLK_RGBx\n"
43303" * CLK_RGBA\n"
43304" * CLK_ARGB\n"
43305" * CLK_BGRA\n"
43306" * CLK_INTENSITY\n"
43307" * CLK_LUMINANCE\n"
43308" */\n"
43309"// Channel order, numbering must be aligned with cl_channel_order in cl.h\n"
43310"//\n"
43311"#define CLK_R 0x10B0\n"
43312"#define CLK_A 0x10B1\n"
43313"#define CLK_RG 0x10B2\n"
43314"#define CLK_RA 0x10B3\n"
43315"#define CLK_RGB 0x10B4\n"
43316"#define CLK_RGBA 0x10B5\n"
43317"#define CLK_BGRA 0x10B6\n"
43318"#define CLK_ARGB 0x10B7\n"
43319"#define CLK_INTENSITY 0x10B8\n"
43320"#define CLK_LUMINANCE 0x10B9\n"
43321"#define CLK_Rx 0x10BA\n"
43322"#define CLK_RGx 0x10BB\n"
43323"#define CLK_RGBx 0x10BC\n"
43324"#define CLK_DEPTH 0x10BD\n"
43325"#define CLK_DEPTH_STENCIL 0x10BE\n"
43326"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43327"#define CLK_sRGB 0x10BF\n"
43328"#define CLK_sRGBx 0x10C0\n"
43329"#define CLK_sRGBA 0x10C1\n"
43330"#define CLK_sBGRA 0x10C2\n"
43331"#define CLK_ABGR 0x10C3\n"
43332"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43333"\n"
43334"int __ovld __cnfn get_image_channel_order(read_only image1d_t image);\n"
43335"int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t image);\n"
43336"int __ovld __cnfn get_image_channel_order(read_only image2d_t image);\n"
43337"int __ovld __cnfn get_image_channel_order(read_only image3d_t image);\n"
43338"int __ovld __cnfn get_image_channel_order(read_only image1d_array_t image);\n"
43339"int __ovld __cnfn get_image_channel_order(read_only image2d_array_t image);\n"
43340"#ifdef cl_khr_depth_images\n"
43341"int __ovld __cnfn get_image_channel_order(read_only image2d_depth_t image);\n"
43342"int __ovld __cnfn get_image_channel_order(read_only image2d_array_depth_t image);\n"
43343"#endif //cl_khr_depth_images\n"
43344"#if defined(cl_khr_gl_msaa_sharing)\n"
43345"int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_t image);\n"
43346"int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_depth_t image);\n"
43347"int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_t image);\n"
43348"int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t image);\n"
43349"#endif //cl_khr_gl_msaa_sharing\n"
43350"\n"
43351"int __ovld __cnfn get_image_channel_order(write_only image1d_t image);\n"
43352"int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image);\n"
43353"int __ovld __cnfn get_image_channel_order(write_only image2d_t image);\n"
43354"#ifdef cl_khr_3d_image_writes\n"
43355"int __ovld __cnfn get_image_channel_order(write_only image3d_t image);\n"
43356"#endif\n"
43357"int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image);\n"
43358"int __ovld __cnfn get_image_channel_order(write_only image2d_array_t image);\n"
43359"#ifdef cl_khr_depth_images\n"
43360"int __ovld __cnfn get_image_channel_order(write_only image2d_depth_t image);\n"
43361"int __ovld __cnfn get_image_channel_order(write_only image2d_array_depth_t image);\n"
43362"#endif //cl_khr_depth_images\n"
43363"#if defined(cl_khr_gl_msaa_sharing)\n"
43364"int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_t image);\n"
43365"int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_depth_t image);\n"
43366"int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image);\n"
43367"int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image);\n"
43368"#endif //cl_khr_gl_msaa_sharing\n"
43369"\n"
43370"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43371"int __ovld __cnfn get_image_channel_order(read_write image1d_t image);\n"
43372"int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);\n"
43373"int __ovld __cnfn get_image_channel_order(read_write image2d_t image);\n"
43374"int __ovld __cnfn get_image_channel_order(read_write image3d_t image);\n"
43375"int __ovld __cnfn get_image_channel_order(read_write image1d_array_t image);\n"
43376"int __ovld __cnfn get_image_channel_order(read_write image2d_array_t image);\n"
43377"#ifdef cl_khr_depth_images\n"
43378"int __ovld __cnfn get_image_channel_order(read_write image2d_depth_t image);\n"
43379"int __ovld __cnfn get_image_channel_order(read_write image2d_array_depth_t image);\n"
43380"#endif //cl_khr_depth_images\n"
43381"#if defined(cl_khr_gl_msaa_sharing)\n"
43382"int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_t image);\n"
43383"int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image);\n"
43384"int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);\n"
43385"int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);\n"
43386"#endif //cl_khr_gl_msaa_sharing\n"
43387"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43388"\n"
43389"/**\n"
43390" * Return the 2D image width and height as an int2\n"
43391" * type. The width is returned in the x component, and\n"
43392" * the height in the y component.\n"
43393" */\n"
43394"int2 __ovld __cnfn get_image_dim(read_only image2d_t image);\n"
43395"int2 __ovld __cnfn get_image_dim(read_only image2d_array_t image);\n"
43396"#ifdef cl_khr_depth_images\n"
43397"int2 __ovld __cnfn get_image_dim(read_only image2d_array_depth_t image);\n"
43398"int2 __ovld __cnfn get_image_dim(read_only image2d_depth_t image);\n"
43399"#endif //cl_khr_depth_images\n"
43400"#if defined(cl_khr_gl_msaa_sharing)\n"
43401"int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_t image);\n"
43402"int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_depth_t image);\n"
43403"int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_t image);\n"
43404"int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_depth_t image);\n"
43405"#endif //cl_khr_gl_msaa_sharing\n"
43406"\n"
43407"int2 __ovld __cnfn get_image_dim(write_only image2d_t image);\n"
43408"int2 __ovld __cnfn get_image_dim(write_only image2d_array_t image);\n"
43409"#ifdef cl_khr_depth_images\n"
43410"int2 __ovld __cnfn get_image_dim(write_only image2d_array_depth_t image);\n"
43411"int2 __ovld __cnfn get_image_dim(write_only image2d_depth_t image);\n"
43412"#endif //cl_khr_depth_images\n"
43413"#if defined(cl_khr_gl_msaa_sharing)\n"
43414"int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_t image);\n"
43415"int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_depth_t image);\n"
43416"int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image);\n"
43417"int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);\n"
43418"#endif //cl_khr_gl_msaa_sharing\n"
43419"\n"
43420"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43421"int2 __ovld __cnfn get_image_dim(read_write image2d_t image);\n"
43422"int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);\n"
43423"#ifdef cl_khr_depth_images\n"
43424"int2 __ovld __cnfn get_image_dim(read_write image2d_array_depth_t image);\n"
43425"int2 __ovld __cnfn get_image_dim(read_write image2d_depth_t image);\n"
43426"#endif //cl_khr_depth_images\n"
43427"#if defined(cl_khr_gl_msaa_sharing)\n"
43428"int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_t image);\n"
43429"int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);\n"
43430"int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);\n"
43431"int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);\n"
43432"#endif //cl_khr_gl_msaa_sharing\n"
43433"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43434"\n"
43435"/**\n"
43436" * Return the 3D image width, height, and depth as an\n"
43437" * int4 type. The width is returned in the x\n"
43438" * component, height in the y component, depth in the z\n"
43439" * component and the w component is 0.\n"
43440" */\n"
43441"int4 __ovld __cnfn get_image_dim(read_only image3d_t image);\n"
43442"#ifdef cl_khr_3d_image_writes\n"
43443"int4 __ovld __cnfn get_image_dim(write_only image3d_t image);\n"
43444"#endif\n"
43445"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43446"int4 __ovld __cnfn get_image_dim(read_write image3d_t image);\n"
43447"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43448"\n"
43449"/**\n"
43450" * Return the image array size.\n"
43451" */\n"
43452"\n"
43453"size_t __ovld __cnfn get_image_array_size(read_only image1d_array_t image_array);\n"
43454"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_t image_array);\n"
43455"#ifdef cl_khr_depth_images\n"
43456"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_depth_t image_array);\n"
43457"#endif //cl_khr_depth_images\n"
43458"#if defined(cl_khr_gl_msaa_sharing)\n"
43459"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_t image_array);\n"
43460"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_depth_t image_array);\n"
43461"#endif //cl_khr_gl_msaa_sharing\n"
43462"\n"
43463"size_t __ovld __cnfn get_image_array_size(write_only image1d_array_t image_array);\n"
43464"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_t image_array);\n"
43465"#ifdef cl_khr_depth_images\n"
43466"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_depth_t image_array);\n"
43467"#endif //cl_khr_depth_images\n"
43468"#if defined(cl_khr_gl_msaa_sharing)\n"
43469"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_array);\n"
43470"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array);\n"
43471"#endif //cl_khr_gl_msaa_sharing\n"
43472"\n"
43473"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43474"size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);\n"
43475"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);\n"
43476"#ifdef cl_khr_depth_images\n"
43477"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image_array);\n"
43478"#endif //cl_khr_depth_images\n"
43479"#if defined(cl_khr_gl_msaa_sharing)\n"
43480"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);\n"
43481"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);\n"
43482"#endif //cl_khr_gl_msaa_sharing\n"
43483"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43484"\n"
43485"/**\n"
43486"* Return the number of samples associated with image\n"
43487"*/\n"
43488"#if defined(cl_khr_gl_msaa_sharing)\n"
43489"int __ovld get_image_num_samples(read_only image2d_msaa_t image);\n"
43490"int __ovld get_image_num_samples(read_only image2d_msaa_depth_t image);\n"
43491"int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n"
43492"int __ovld get_image_num_samples(read_only image2d_array_msaa_t image);\n"
43493"int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n"
43494"\n"
43495"int __ovld get_image_num_samples(write_only image2d_msaa_t image);\n"
43496"int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);\n"
43497"int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n"
43498"int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);\n"
43499"int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n"
43500"\n"
43501"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43502"int __ovld get_image_num_samples(read_write image2d_msaa_t image);\n"
43503"int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);\n"
43504"int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n"
43505"int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);\n"
43506"int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n"
43507"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43508"#endif\n"
43509"\n"
43510"// OpenCL v2.0 s6.13.15 - Work-group Functions\n"
43511"\n"
43512"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43513"int __ovld __conv work_group_all(int predicate);\n"
43514"int __ovld __conv work_group_any(int predicate);\n"
43515"\n"
43516"#ifdef cl_khr_fp16\n"
43517"half __ovld __conv work_group_broadcast(half a, size_t local_id);\n"
43518"half __ovld __conv work_group_broadcast(half a, size_t x, size_t y);\n"
43519"half __ovld __conv work_group_broadcast(half a, size_t x, size_t y, size_t z);\n"
43520"#endif\n"
43521"int __ovld __conv work_group_broadcast(int a, size_t local_id);\n"
43522"int __ovld __conv work_group_broadcast(int a, size_t x, size_t y);\n"
43523"int __ovld __conv work_group_broadcast(int a, size_t x, size_t y, size_t z);\n"
43524"uint __ovld __conv work_group_broadcast(uint a, size_t local_id);\n"
43525"uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y);\n"
43526"uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y, size_t z);\n"
43527"long __ovld __conv work_group_broadcast(long a, size_t local_id);\n"
43528"long __ovld __conv work_group_broadcast(long a, size_t x, size_t y);\n"
43529"long __ovld __conv work_group_broadcast(long a, size_t x, size_t y, size_t z);\n"
43530"ulong __ovld __conv work_group_broadcast(ulong a, size_t local_id);\n"
43531"ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y);\n"
43532"ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z);\n"
43533"float __ovld __conv work_group_broadcast(float a, size_t local_id);\n"
43534"float __ovld __conv work_group_broadcast(float a, size_t x, size_t y);\n"
43535"float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z);\n"
43536"#ifdef cl_khr_fp64\n"
43537"double __ovld __conv work_group_broadcast(double a, size_t local_id);\n"
43538"double __ovld __conv work_group_broadcast(double a, size_t x, size_t y);\n"
43539"double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z);\n"
43540"#endif //cl_khr_fp64\n"
43541"\n"
43542"#ifdef cl_khr_fp16\n"
43543"half __ovld __conv work_group_reduce_add(half x);\n"
43544"half __ovld __conv work_group_reduce_min(half x);\n"
43545"half __ovld __conv work_group_reduce_max(half x);\n"
43546"half __ovld __conv work_group_scan_exclusive_add(half x);\n"
43547"half __ovld __conv work_group_scan_exclusive_min(half x);\n"
43548"half __ovld __conv work_group_scan_exclusive_max(half x);\n"
43549"half __ovld __conv work_group_scan_inclusive_add(half x);\n"
43550"half __ovld __conv work_group_scan_inclusive_min(half x);\n"
43551"half __ovld __conv work_group_scan_inclusive_max(half x);\n"
43552"#endif\n"
43553"int __ovld __conv work_group_reduce_add(int x);\n"
43554"int __ovld __conv work_group_reduce_min(int x);\n"
43555"int __ovld __conv work_group_reduce_max(int x);\n"
43556"int __ovld __conv work_group_scan_exclusive_add(int x);\n"
43557"int __ovld __conv work_group_scan_exclusive_min(int x);\n"
43558"int __ovld __conv work_group_scan_exclusive_max(int x);\n"
43559"int __ovld __conv work_group_scan_inclusive_add(int x);\n"
43560"int __ovld __conv work_group_scan_inclusive_min(int x);\n"
43561"int __ovld __conv work_group_scan_inclusive_max(int x);\n"
43562"uint __ovld __conv work_group_reduce_add(uint x);\n"
43563"uint __ovld __conv work_group_reduce_min(uint x);\n"
43564"uint __ovld __conv work_group_reduce_max(uint x);\n"
43565"uint __ovld __conv work_group_scan_exclusive_add(uint x);\n"
43566"uint __ovld __conv work_group_scan_exclusive_min(uint x);\n"
43567"uint __ovld __conv work_group_scan_exclusive_max(uint x);\n"
43568"uint __ovld __conv work_group_scan_inclusive_add(uint x);\n"
43569"uint __ovld __conv work_group_scan_inclusive_min(uint x);\n"
43570"uint __ovld __conv work_group_scan_inclusive_max(uint x);\n"
43571"long __ovld __conv work_group_reduce_add(long x);\n"
43572"long __ovld __conv work_group_reduce_min(long x);\n"
43573"long __ovld __conv work_group_reduce_max(long x);\n"
43574"long __ovld __conv work_group_scan_exclusive_add(long x);\n"
43575"long __ovld __conv work_group_scan_exclusive_min(long x);\n"
43576"long __ovld __conv work_group_scan_exclusive_max(long x);\n"
43577"long __ovld __conv work_group_scan_inclusive_add(long x);\n"
43578"long __ovld __conv work_group_scan_inclusive_min(long x);\n"
43579"long __ovld __conv work_group_scan_inclusive_max(long x);\n"
43580"ulong __ovld __conv work_group_reduce_add(ulong x);\n"
43581"ulong __ovld __conv work_group_reduce_min(ulong x);\n"
43582"ulong __ovld __conv work_group_reduce_max(ulong x);\n"
43583"ulong __ovld __conv work_group_scan_exclusive_add(ulong x);\n"
43584"ulong __ovld __conv work_group_scan_exclusive_min(ulong x);\n"
43585"ulong __ovld __conv work_group_scan_exclusive_max(ulong x);\n"
43586"ulong __ovld __conv work_group_scan_inclusive_add(ulong x);\n"
43587"ulong __ovld __conv work_group_scan_inclusive_min(ulong x);\n"
43588"ulong __ovld __conv work_group_scan_inclusive_max(ulong x);\n"
43589"float __ovld __conv work_group_reduce_add(float x);\n"
43590"float __ovld __conv work_group_reduce_min(float x);\n"
43591"float __ovld __conv work_group_reduce_max(float x);\n"
43592"float __ovld __conv work_group_scan_exclusive_add(float x);\n"
43593"float __ovld __conv work_group_scan_exclusive_min(float x);\n"
43594"float __ovld __conv work_group_scan_exclusive_max(float x);\n"
43595"float __ovld __conv work_group_scan_inclusive_add(float x);\n"
43596"float __ovld __conv work_group_scan_inclusive_min(float x);\n"
43597"float __ovld __conv work_group_scan_inclusive_max(float x);\n"
43598"#ifdef cl_khr_fp64\n"
43599"double __ovld __conv work_group_reduce_add(double x);\n"
43600"double __ovld __conv work_group_reduce_min(double x);\n"
43601"double __ovld __conv work_group_reduce_max(double x);\n"
43602"double __ovld __conv work_group_scan_exclusive_add(double x);\n"
43603"double __ovld __conv work_group_scan_exclusive_min(double x);\n"
43604"double __ovld __conv work_group_scan_exclusive_max(double x);\n"
43605"double __ovld __conv work_group_scan_inclusive_add(double x);\n"
43606"double __ovld __conv work_group_scan_inclusive_min(double x);\n"
43607"double __ovld __conv work_group_scan_inclusive_max(double x);\n"
43608"#endif //cl_khr_fp64\n"
43609"\n"
43610"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43611"\n"
43612"// OpenCL v2.0 s6.13.16 - Pipe Functions\n"
43613"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43614"#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))\n"
43615"bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);\n"
43616"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43617"\n"
43618"\n"
43619"// OpenCL v2.0 s6.13.17 - Enqueue Kernels\n"
43620"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43621"\n"
43622"#define CL_COMPLETE 0x0\n"
43623"#define CL_RUNNING 0x1\n"
43624"#define CL_SUBMITTED 0x2\n"
43625"#define CL_QUEUED 0x3\n"
43626"\n"
43627"#define CLK_SUCCESS 0\n"
43628"#define CLK_ENQUEUE_FAILURE -101\n"
43629"#define CLK_INVALID_QUEUE -102\n"
43630"#define CLK_INVALID_NDRANGE -160\n"
43631"#define CLK_INVALID_EVENT_WAIT_LIST -57\n"
43632"#define CLK_DEVICE_QUEUE_FULL -161\n"
43633"#define CLK_INVALID_ARG_SIZE -51\n"
43634"#define CLK_EVENT_ALLOCATION_FAILURE -100\n"
43635"#define CLK_OUT_OF_RESOURCES -5\n"
43636"\n"
43637"#define CLK_NULL_QUEUE 0\n"
43638"#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))\n"
43639"\n"
43640"// execution model related definitions\n"
43641"#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0\n"
43642"#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1\n"
43643"#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2\n"
43644"\n"
43645"typedef int kernel_enqueue_flags_t;\n"
43646"typedef int clk_profiling_info;\n"
43647"\n"
43648"// Profiling info name (see capture_event_profiling_info)\n"
43649"#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1\n"
43650"\n"
43651"#define MAX_WORK_DIM 3\n"
43652"\n"
43653"typedef struct {\n"
43654" unsigned int workDimension;\n"
43655" size_t globalWorkOffset[MAX_WORK_DIM];\n"
43656" size_t globalWorkSize[MAX_WORK_DIM];\n"
43657" size_t localWorkSize[MAX_WORK_DIM];\n"
43658"} ndrange_t;\n"
43659"\n"
43660"ndrange_t __ovld ndrange_1D(size_t);\n"
43661"ndrange_t __ovld ndrange_1D(size_t, size_t);\n"
43662"ndrange_t __ovld ndrange_1D(size_t, size_t, size_t);\n"
43663"\n"
43664"ndrange_t __ovld ndrange_2D(const size_t[2]);\n"
43665"ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2]);\n"
43666"ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2], const size_t[2]);\n"
43667"\n"
43668"ndrange_t __ovld ndrange_3D(const size_t[3]);\n"
43669"ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3]);\n"
43670"ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3], const size_t[3]);\n"
43671"\n"
43672"int __ovld enqueue_marker(queue_t, uint, const __private clk_event_t*, __private clk_event_t*);\n"
43673"\n"
43674"void __ovld retain_event(clk_event_t);\n"
43675"\n"
43676"void __ovld release_event(clk_event_t);\n"
43677"\n"
43678"clk_event_t __ovld create_user_event(void);\n"
43679"\n"
43680"void __ovld set_user_event_status(clk_event_t e, int state);\n"
43681"\n"
43682"bool __ovld is_valid_event (clk_event_t event);\n"
43683"\n"
43684"void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value);\n"
43685"\n"
43686"queue_t __ovld get_default_queue(void);\n"
43687"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43688"\n"
43689"// OpenCL Extension v2.0 s9.17 - Sub-groups\n"
43690"\n"
43691"#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n"
43692"// Shared Sub Group Functions\n"
43693"uint __ovld get_sub_group_size(void);\n"
43694"uint __ovld get_max_sub_group_size(void);\n"
43695"uint __ovld get_num_sub_groups(void);\n"
43696"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43697"uint __ovld get_enqueued_num_sub_groups(void);\n"
43698"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43699"uint __ovld get_sub_group_id(void);\n"
43700"uint __ovld get_sub_group_local_id(void);\n"
43701"\n"
43702"void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags);\n"
43703"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43704"void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n"
43705"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43706"\n"
43707"int __ovld __conv sub_group_all(int predicate);\n"
43708"int __ovld __conv sub_group_any(int predicate);\n"
43709"\n"
43710"int __ovld __conv sub_group_broadcast(int x, uint sub_group_local_id);\n"
43711"uint __ovld __conv sub_group_broadcast(uint x, uint sub_group_local_id);\n"
43712"long __ovld __conv sub_group_broadcast(long x, uint sub_group_local_id);\n"
43713"ulong __ovld __conv sub_group_broadcast(ulong x, uint sub_group_local_id);\n"
43714"float __ovld __conv sub_group_broadcast(float x, uint sub_group_local_id);\n"
43715"\n"
43716"int __ovld __conv sub_group_reduce_add(int x);\n"
43717"uint __ovld __conv sub_group_reduce_add(uint x);\n"
43718"long __ovld __conv sub_group_reduce_add(long x);\n"
43719"ulong __ovld __conv sub_group_reduce_add(ulong x);\n"
43720"float __ovld __conv sub_group_reduce_add(float x);\n"
43721"int __ovld __conv sub_group_reduce_min(int x);\n"
43722"uint __ovld __conv sub_group_reduce_min(uint x);\n"
43723"long __ovld __conv sub_group_reduce_min(long x);\n"
43724"ulong __ovld __conv sub_group_reduce_min(ulong x);\n"
43725"float __ovld __conv sub_group_reduce_min(float x);\n"
43726"int __ovld __conv sub_group_reduce_max(int x);\n"
43727"uint __ovld __conv sub_group_reduce_max(uint x);\n"
43728"long __ovld __conv sub_group_reduce_max(long x);\n"
43729"ulong __ovld __conv sub_group_reduce_max(ulong x);\n"
43730"float __ovld __conv sub_group_reduce_max(float x);\n"
43731"\n"
43732"int __ovld __conv sub_group_scan_exclusive_add(int x);\n"
43733"uint __ovld __conv sub_group_scan_exclusive_add(uint x);\n"
43734"long __ovld __conv sub_group_scan_exclusive_add(long x);\n"
43735"ulong __ovld __conv sub_group_scan_exclusive_add(ulong x);\n"
43736"float __ovld __conv sub_group_scan_exclusive_add(float x);\n"
43737"int __ovld __conv sub_group_scan_exclusive_min(int x);\n"
43738"uint __ovld __conv sub_group_scan_exclusive_min(uint x);\n"
43739"long __ovld __conv sub_group_scan_exclusive_min(long x);\n"
43740"ulong __ovld __conv sub_group_scan_exclusive_min(ulong x);\n"
43741"float __ovld __conv sub_group_scan_exclusive_min(float x);\n"
43742"int __ovld __conv sub_group_scan_exclusive_max(int x);\n"
43743"uint __ovld __conv sub_group_scan_exclusive_max(uint x);\n"
43744"long __ovld __conv sub_group_scan_exclusive_max(long x);\n"
43745"ulong __ovld __conv sub_group_scan_exclusive_max(ulong x);\n"
43746"float __ovld __conv sub_group_scan_exclusive_max(float x);\n"
43747"\n"
43748"int __ovld __conv sub_group_scan_inclusive_add(int x);\n"
43749"uint __ovld __conv sub_group_scan_inclusive_add(uint x);\n"
43750"long __ovld __conv sub_group_scan_inclusive_add(long x);\n"
43751"ulong __ovld __conv sub_group_scan_inclusive_add(ulong x);\n"
43752"float __ovld __conv sub_group_scan_inclusive_add(float x);\n"
43753"int __ovld __conv sub_group_scan_inclusive_min(int x);\n"
43754"uint __ovld __conv sub_group_scan_inclusive_min(uint x);\n"
43755"long __ovld __conv sub_group_scan_inclusive_min(long x);\n"
43756"ulong __ovld __conv sub_group_scan_inclusive_min(ulong x);\n"
43757"float __ovld __conv sub_group_scan_inclusive_min(float x);\n"
43758"int __ovld __conv sub_group_scan_inclusive_max(int x);\n"
43759"uint __ovld __conv sub_group_scan_inclusive_max(uint x);\n"
43760"long __ovld __conv sub_group_scan_inclusive_max(long x);\n"
43761"ulong __ovld __conv sub_group_scan_inclusive_max(ulong x);\n"
43762"float __ovld __conv sub_group_scan_inclusive_max(float x);\n"
43763"\n"
43764"#ifdef cl_khr_fp16\n"
43765"half __ovld __conv sub_group_broadcast(half x, uint sub_group_local_id);\n"
43766"half __ovld __conv sub_group_reduce_add(half x);\n"
43767"half __ovld __conv sub_group_reduce_min(half x);\n"
43768"half __ovld __conv sub_group_reduce_max(half x);\n"
43769"half __ovld __conv sub_group_scan_exclusive_add(half x);\n"
43770"half __ovld __conv sub_group_scan_exclusive_min(half x);\n"
43771"half __ovld __conv sub_group_scan_exclusive_max(half x);\n"
43772"half __ovld __conv sub_group_scan_inclusive_add(half x);\n"
43773"half __ovld __conv sub_group_scan_inclusive_min(half x);\n"
43774"half __ovld __conv sub_group_scan_inclusive_max(half x);\n"
43775"#endif //cl_khr_fp16\n"
43776"\n"
43777"#ifdef cl_khr_fp64\n"
43778"double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id);\n"
43779"double __ovld __conv sub_group_reduce_add(double x);\n"
43780"double __ovld __conv sub_group_reduce_min(double x);\n"
43781"double __ovld __conv sub_group_reduce_max(double x);\n"
43782"double __ovld __conv sub_group_scan_exclusive_add(double x);\n"
43783"double __ovld __conv sub_group_scan_exclusive_min(double x);\n"
43784"double __ovld __conv sub_group_scan_exclusive_max(double x);\n"
43785"double __ovld __conv sub_group_scan_inclusive_add(double x);\n"
43786"double __ovld __conv sub_group_scan_inclusive_min(double x);\n"
43787"double __ovld __conv sub_group_scan_inclusive_max(double x);\n"
43788"#endif //cl_khr_fp64\n"
43789"\n"
43790"#endif //cl_khr_subgroups cl_intel_subgroups\n"
43791"\n"
43792"#if defined(cl_intel_subgroups)\n"
43793"// Intel-Specific Sub Group Functions\n"
43794"float __ovld __conv intel_sub_group_shuffle( float x, uint c );\n"
43795"float2 __ovld __conv intel_sub_group_shuffle( float2 x, uint c );\n"
43796"float3 __ovld __conv intel_sub_group_shuffle( float3 x, uint c );\n"
43797"float4 __ovld __conv intel_sub_group_shuffle( float4 x, uint c );\n"
43798"float8 __ovld __conv intel_sub_group_shuffle( float8 x, uint c );\n"
43799"float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );\n"
43800"\n"
43801"int __ovld __conv intel_sub_group_shuffle( int x, uint c );\n"
43802"int2 __ovld __conv intel_sub_group_shuffle( int2 x, uint c );\n"
43803"int3 __ovld __conv intel_sub_group_shuffle( int3 x, uint c );\n"
43804"int4 __ovld __conv intel_sub_group_shuffle( int4 x, uint c );\n"
43805"int8 __ovld __conv intel_sub_group_shuffle( int8 x, uint c );\n"
43806"int16 __ovld __conv intel_sub_group_shuffle( int16 x, uint c );\n"
43807"\n"
43808"uint __ovld __conv intel_sub_group_shuffle( uint x, uint c );\n"
43809"uint2 __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );\n"
43810"uint3 __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );\n"
43811"uint4 __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );\n"
43812"uint8 __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );\n"
43813"uint16 __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );\n"
43814"\n"
43815"long __ovld __conv intel_sub_group_shuffle( long x, uint c );\n"
43816"ulong __ovld __conv intel_sub_group_shuffle( ulong x, uint c );\n"
43817"\n"
43818"float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint c );\n"
43819"float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );\n"
43820"float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );\n"
43821"float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );\n"
43822"float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );\n"
43823"float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );\n"
43824"\n"
43825"int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint c );\n"
43826"int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );\n"
43827"int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );\n"
43828"int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );\n"
43829"int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );\n"
43830"int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );\n"
43831"\n"
43832"uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint c );\n"
43833"uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );\n"
43834"uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );\n"
43835"uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );\n"
43836"uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );\n"
43837"uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );\n"
43838"\n"
43839"long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );\n"
43840"ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );\n"
43841"\n"
43842"float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint c );\n"
43843"float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );\n"
43844"float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );\n"
43845"float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );\n"
43846"float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );\n"
43847"float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );\n"
43848"\n"
43849"int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint c );\n"
43850"int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );\n"
43851"int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );\n"
43852"int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );\n"
43853"int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );\n"
43854"int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );\n"
43855"\n"
43856"uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint c );\n"
43857"uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );\n"
43858"uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );\n"
43859"uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );\n"
43860"uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );\n"
43861"uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );\n"
43862"\n"
43863"long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );\n"
43864"ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );\n"
43865"\n"
43866"float __ovld __conv intel_sub_group_shuffle_xor( float x, uint c );\n"
43867"float2 __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );\n"
43868"float3 __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );\n"
43869"float4 __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );\n"
43870"float8 __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );\n"
43871"float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );\n"
43872"\n"
43873"int __ovld __conv intel_sub_group_shuffle_xor( int x, uint c );\n"
43874"int2 __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );\n"
43875"int3 __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );\n"
43876"int4 __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );\n"
43877"int8 __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );\n"
43878"int16 __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );\n"
43879"\n"
43880"uint __ovld __conv intel_sub_group_shuffle_xor( uint x, uint c );\n"
43881"uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );\n"
43882"uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );\n"
43883"uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );\n"
43884"uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );\n"
43885"uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );\n"
43886"\n"
43887"long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );\n"
43888"ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );\n"
43889"\n"
43890"uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );\n"
43891"uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );\n"
43892"uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );\n"
43893"uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );\n"
43894"\n"
43895"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
43896"uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);\n"
43897"uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);\n"
43898"uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);\n"
43899"uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);\n"
43900"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
43901"\n"
43902"uint __ovld __conv intel_sub_group_block_read( const __global uint* p );\n"
43903"uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );\n"
43904"uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );\n"
43905"uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );\n"
43906"\n"
43907"void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);\n"
43908"void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);\n"
43909"void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);\n"
43910"void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);\n"
43911"\n"
43912"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
43913"void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);\n"
43914"void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);\n"
43915"void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);\n"
43916"void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);\n"
43917"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
43918"\n"
43919"void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );\n"
43920"void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );\n"
43921"void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );\n"
43922"void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );\n"
43923"\n"
43924"#ifdef cl_khr_fp16\n"
43925"half __ovld __conv intel_sub_group_shuffle( half x, uint c );\n"
43926"half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );\n"
43927"half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );\n"
43928"half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );\n"
43929"#endif\n"
43930"\n"
43931"#if defined(cl_khr_fp64)\n"
43932"double __ovld __conv intel_sub_group_shuffle( double x, uint c );\n"
43933"double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );\n"
43934"double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );\n"
43935"double __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );\n"
43936"#endif\n"
43937"\n"
43938"#endif //cl_intel_subgroups\n"
43939"\n"
43940"#if defined(cl_intel_subgroups_short)\n"
43941"short __ovld __conv intel_sub_group_broadcast( short x, uint sub_group_local_id );\n"
43942"short2 __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );\n"
43943"short3 __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );\n"
43944"short4 __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );\n"
43945"short8 __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );\n"
43946"\n"
43947"ushort __ovld __conv intel_sub_group_broadcast( ushort x, uint sub_group_local_id );\n"
43948"ushort2 __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );\n"
43949"ushort3 __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );\n"
43950"ushort4 __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );\n"
43951"ushort8 __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );\n"
43952"\n"
43953"short __ovld __conv intel_sub_group_shuffle( short x, uint c );\n"
43954"short2 __ovld __conv intel_sub_group_shuffle( short2 x, uint c );\n"
43955"short3 __ovld __conv intel_sub_group_shuffle( short3 x, uint c );\n"
43956"short4 __ovld __conv intel_sub_group_shuffle( short4 x, uint c );\n"
43957"short8 __ovld __conv intel_sub_group_shuffle( short8 x, uint c );\n"
43958"short16 __ovld __conv intel_sub_group_shuffle( short16 x, uint c);\n"
43959"\n"
43960"ushort __ovld __conv intel_sub_group_shuffle( ushort x, uint c );\n"
43961"ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 x, uint c );\n"
43962"ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 x, uint c );\n"
43963"ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 x, uint c );\n"
43964"ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 x, uint c );\n"
43965"ushort16 __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );\n"
43966"\n"
43967"short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint c );\n"
43968"short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint c );\n"
43969"short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint c );\n"
43970"short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint c );\n"
43971"short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint c );\n"
43972"short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );\n"
43973"\n"
43974"ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint c );\n"
43975"ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint c );\n"
43976"ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint c );\n"
43977"ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint c );\n"
43978"ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint c );\n"
43979"ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );\n"
43980"\n"
43981"short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint c );\n"
43982"short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint c );\n"
43983"short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint c );\n"
43984"short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint c );\n"
43985"short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint c );\n"
43986"short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );\n"
43987"\n"
43988"ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint c );\n"
43989"ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint c );\n"
43990"ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint c );\n"
43991"ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint c );\n"
43992"ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint c );\n"
43993"ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );\n"
43994"\n"
43995"short __ovld __conv intel_sub_group_shuffle_xor( short x, uint c );\n"
43996"short2 __ovld __conv intel_sub_group_shuffle_xor( short2 x, uint c );\n"
43997"short3 __ovld __conv intel_sub_group_shuffle_xor( short3 x, uint c );\n"
43998"short4 __ovld __conv intel_sub_group_shuffle_xor( short4 x, uint c );\n"
43999"short8 __ovld __conv intel_sub_group_shuffle_xor( short8 x, uint c );\n"
44000"short16 __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );\n"
44001"\n"
44002"ushort __ovld __conv intel_sub_group_shuffle_xor( ushort x, uint c );\n"
44003"ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 x, uint c );\n"
44004"ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 x, uint c );\n"
44005"ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 x, uint c );\n"
44006"ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 x, uint c );\n"
44007"ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );\n"
44008"\n"
44009"short __ovld __conv intel_sub_group_reduce_add( short x );\n"
44010"ushort __ovld __conv intel_sub_group_reduce_add( ushort x );\n"
44011"short __ovld __conv intel_sub_group_reduce_min( short x );\n"
44012"ushort __ovld __conv intel_sub_group_reduce_min( ushort x );\n"
44013"short __ovld __conv intel_sub_group_reduce_max( short x );\n"
44014"ushort __ovld __conv intel_sub_group_reduce_max( ushort x );\n"
44015"\n"
44016"short __ovld __conv intel_sub_group_scan_exclusive_add( short x );\n"
44017"ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x );\n"
44018"short __ovld __conv intel_sub_group_scan_exclusive_min( short x );\n"
44019"ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x );\n"
44020"short __ovld __conv intel_sub_group_scan_exclusive_max( short x );\n"
44021"ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x );\n"
44022"\n"
44023"short __ovld __conv intel_sub_group_scan_inclusive_add( short x );\n"
44024"ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x );\n"
44025"short __ovld __conv intel_sub_group_scan_inclusive_min( short x );\n"
44026"ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );\n"
44027"short __ovld __conv intel_sub_group_scan_inclusive_max( short x );\n"
44028"ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );\n"
44029"\n"
44030"uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );\n"
44031"uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );\n"
44032"uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );\n"
44033"uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );\n"
44034"\n"
44035"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44036"uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );\n"
44037"uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );\n"
44038"uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );\n"
44039"uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );\n"
44040"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44041"\n"
44042"uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );\n"
44043"uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );\n"
44044"uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );\n"
44045"uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );\n"
44046"\n"
44047"void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );\n"
44048"void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );\n"
44049"void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );\n"
44050"void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );\n"
44051"\n"
44052"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44053"void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );\n"
44054"void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );\n"
44055"void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );\n"
44056"void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );\n"
44057"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44058"\n"
44059"void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );\n"
44060"void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );\n"
44061"void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );\n"
44062"void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );\n"
44063"\n"
44064"ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );\n"
44065"ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );\n"
44066"ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );\n"
44067"ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );\n"
44068"\n"
44069"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44070"ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);\n"
44071"ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);\n"
44072"ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);\n"
44073"ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);\n"
44074"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44075"\n"
44076"ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );\n"
44077"ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );\n"
44078"ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );\n"
44079"ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );\n"
44080"\n"
44081"void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);\n"
44082"void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);\n"
44083"void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);\n"
44084"void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);\n"
44085"\n"
44086"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44087"void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);\n"
44088"void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);\n"
44089"void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);\n"
44090"void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);\n"
44091"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44092"\n"
44093"void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );\n"
44094"void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );\n"
44095"void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );\n"
44096"void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );\n"
44097"#endif // cl_intel_subgroups_short\n"
44098"\n"
44099"#ifdef cl_intel_device_side_avc_motion_estimation\n"
44100"#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin\n"
44101"\n"
44102"#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0\n"
44103"#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1\n"
44104"#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2\n"
44105"#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3\n"
44106"\n"
44107"#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0\n"
44108"#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1\n"
44109"#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2\n"
44110"#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3\n"
44111"\n"
44112"#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0\n"
44113"#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1\n"
44114"#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2\n"
44115"\n"
44116"#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0\n"
44117"#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E\n"
44118"#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D\n"
44119"#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B\n"
44120"#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77\n"
44121"#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F\n"
44122"#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F\n"
44123"#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F\n"
44124"\n"
44125"#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0\n"
44126"#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1\n"
44127"#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2\n"
44128"\n"
44129"#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0\n"
44130"#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1\n"
44131"#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2\n"
44132"#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3\n"
44133"#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4\n"
44134"#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5\n"
44135"#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6\n"
44136"#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7\n"
44137"#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8\n"
44138"\n"
44139"#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0\n"
44140"#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2\n"
44141"\n"
44142"#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0\n"
44143"#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1\n"
44144"#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3\n"
44145"\n"
44146"#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0\n"
44147"#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1\n"
44148"#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2\n"
44149"#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3\n"
44150"\n"
44151"#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10\n"
44152"#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15\n"
44153"#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20\n"
44154"#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B\n"
44155"#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30\n"
44156"\n"
44157"#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0\n"
44158"#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2\n"
44159"#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4\n"
44160"#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8\n"
44161"\n"
44162"#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0\n"
44163"#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1\n"
44164"#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2\n"
44165"\n"
44166"#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0\n"
44167"#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000\n"
44168"\n"
44169"#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24)\n"
44170"#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24)\n"
44171"#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24)\n"
44172"#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24)\n"
44173"#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24)\n"
44174"#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24)\n"
44175"#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24)\n"
44176"#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24)\n"
44177"#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26)\n"
44178"#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26)\n"
44179"#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28)\n"
44180"#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28)\n"
44181"#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30)\n"
44182"#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30)\n"
44183"\n"
44184"#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00\n"
44185"#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80\n"
44186"\n"
44187"#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0\n"
44188"#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6\n"
44189"#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5\n"
44190"#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3\n"
44191"\n"
44192"#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60\n"
44193"#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10\n"
44194"#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8\n"
44195"#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4\n"
44196"\n"
44197"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0\n"
44198"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1\n"
44199"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2\n"
44200"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3\n"
44201"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4\n"
44202"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4\n"
44203"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5\n"
44204"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6\n"
44205"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7\n"
44206"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8\n"
44207"#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0\n"
44208"#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1\n"
44209"#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2\n"
44210"#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3\n"
44211"\n"
44212"#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1\n"
44213"#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2\n"
44214"#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3\n"
44215"\n"
44216"#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0\n"
44217"#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1\n"
44218"\n"
44219"#define CLK_AVC_ME_INITIALIZE_INTEL 0x0\n"
44220"\n"
44221"#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0\n"
44222"#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0\n"
44223"#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0\n"
44224"\n"
44225"#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0\n"
44226"#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0\n"
44227"#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0\n"
44228"\n"
44229"#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0\n"
44230"#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0\n"
44231"#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0\n"
44232"#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0\n"
44233"\n"
44234"// MCE built-in functions\n"
44235"uchar __ovld\n"
44236"intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty(\n"
44237" uchar slice_type, uchar qp);\n"
44238"ulong __ovld intel_sub_group_avc_mce_get_default_inter_shape_penalty(\n"
44239" uchar slice_type, uchar qp);\n"
44240"uchar __ovld intel_sub_group_avc_mce_get_default_inter_direction_penalty(\n"
44241" uchar slice_type, uchar qp);\n"
44242"uint __ovld intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty(\n"
44243" uchar slice_type, uchar qp);\n"
44244"uint2 __ovld\n"
44245"intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table(\n"
44246" uchar slice_type, uchar qp);\n"
44247"uchar __ovld intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty(\n"
44248" uchar slice_type, uchar qp);\n"
44249"\n"
44250"uint2 __ovld intel_sub_group_avc_mce_get_default_high_penalty_cost_table();\n"
44251"uint2 __ovld intel_sub_group_avc_mce_get_default_medium_penalty_cost_table();\n"
44252"uint2 __ovld intel_sub_group_avc_mce_get_default_low_penalty_cost_table();\n"
44253"uint __ovld intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty();\n"
44254"uchar __ovld\n"
44255"intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty();\n"
44256"\n"
44257"intel_sub_group_avc_mce_payload_t __ovld\n"
44258"intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(\n"
44259" uchar reference_base_penalty, intel_sub_group_avc_mce_payload_t payload);\n"
44260"intel_sub_group_avc_mce_payload_t __ovld\n"
44261"intel_sub_group_avc_mce_set_inter_shape_penalty(\n"
44262" ulong packed_shape_penalty, intel_sub_group_avc_mce_payload_t payload);\n"
44263"intel_sub_group_avc_mce_payload_t __ovld\n"
44264"intel_sub_group_avc_mce_set_inter_direction_penalty(\n"
44265" uchar direction_cost, intel_sub_group_avc_mce_payload_t payload);\n"
44266"intel_sub_group_avc_mce_payload_t __ovld\n"
44267"intel_sub_group_avc_mce_set_motion_vector_cost_function(\n"
44268" ulong packed_cost_center_delta, uint2 packed_cost_table,\n"
44269" uchar cost_precision, intel_sub_group_avc_mce_payload_t payload);\n"
44270"intel_sub_group_avc_mce_payload_t __ovld\n"
44271"intel_sub_group_avc_mce_set_ac_only_haar(\n"
44272" intel_sub_group_avc_mce_payload_t payload);\n"
44273"intel_sub_group_avc_mce_payload_t __ovld\n"
44274"intel_sub_group_avc_mce_set_source_interlaced_field_polarity(\n"
44275" uchar src_field_polarity, intel_sub_group_avc_mce_payload_t payload);\n"
44276"intel_sub_group_avc_mce_payload_t __ovld\n"
44277"intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(\n"
44278" uchar ref_field_polarity, intel_sub_group_avc_mce_payload_t payload);\n"
44279"intel_sub_group_avc_mce_payload_t __ovld\n"
44280"intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(\n"
44281" uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n"
44282" intel_sub_group_avc_mce_payload_t payload);\n"
44283"\n"
44284"ulong __ovld intel_sub_group_avc_mce_get_motion_vectors(\n"
44285" intel_sub_group_avc_mce_result_t result);\n"
44286"ushort __ovld intel_sub_group_avc_mce_get_inter_distortions(\n"
44287" intel_sub_group_avc_mce_result_t result);\n"
44288"ushort __ovld intel_sub_group_avc_mce_get_best_inter_distortion(\n"
44289" intel_sub_group_avc_mce_result_t result);\n"
44290"uchar __ovld intel_sub_group_avc_mce_get_inter_major_shape(\n"
44291" intel_sub_group_avc_mce_result_t result);\n"
44292"uchar __ovld intel_sub_group_avc_mce_get_inter_minor_shapes(\n"
44293" intel_sub_group_avc_mce_result_t result);\n"
44294"uchar __ovld intel_sub_group_avc_mce_get_inter_directions(\n"
44295" intel_sub_group_avc_mce_result_t result);\n"
44296"uchar __ovld intel_sub_group_avc_mce_get_inter_motion_vector_count(\n"
44297" intel_sub_group_avc_mce_result_t result);\n"
44298"uint __ovld intel_sub_group_avc_mce_get_inter_reference_ids(\n"
44299" intel_sub_group_avc_mce_result_t result);\n"
44300"uchar __ovld\n"
44301"intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities(\n"
44302" uint packed_reference_ids, uint packed_reference_parameter_field_polarities,\n"
44303" intel_sub_group_avc_mce_result_t result);\n"
44304"\n"
44305"// IME built-in functions\n"
44306"intel_sub_group_avc_ime_payload_t __ovld\n"
44307"intel_sub_group_avc_ime_initialize(\n"
44308" ushort2 src_coord, uchar partition_mask, uchar sad_adjustment);\n"
44309"intel_sub_group_avc_ime_payload_t __ovld\n"
44310"intel_sub_group_avc_ime_set_single_reference(\n"
44311" short2 ref_offset, uchar search_window_config,\n"
44312" intel_sub_group_avc_ime_payload_t payload);\n"
44313"intel_sub_group_avc_ime_payload_t __ovld\n"
44314"intel_sub_group_avc_ime_set_dual_reference(\n"
44315" short2 fwd_ref_offset, short2 bwd_ref_offset, uchar search_window_config,\n"
44316" intel_sub_group_avc_ime_payload_t payload);\n"
44317"intel_sub_group_avc_ime_payload_t __ovld\n"
44318"intel_sub_group_avc_ime_set_max_motion_vector_count(\n"
44319" uchar max_motion_vector_count, intel_sub_group_avc_ime_payload_t payload);\n"
44320"intel_sub_group_avc_ime_payload_t __ovld\n"
44321"intel_sub_group_avc_ime_set_unidirectional_mix_disable(\n"
44322" intel_sub_group_avc_ime_payload_t payload);\n"
44323"intel_sub_group_avc_ime_payload_t __ovld\n"
44324"intel_sub_group_avc_ime_set_early_search_termination_threshold(\n"
44325" uchar threshold, intel_sub_group_avc_ime_payload_t payload);\n"
44326"intel_sub_group_avc_ime_payload_t __ovld\n"
44327"intel_sub_group_avc_ime_set_weighted_sad(\n"
44328" uint packed_sad_weights, intel_sub_group_avc_ime_payload_t payload);\n"
44329"\n"
44330"__attribute__((deprecated(\"If you use the latest Intel driver, please use \"\n"
44331" \"intel_sub_group_avc_ime_ref_window_size instead\",\n"
44332" \"intel_sub_group_avc_ime_ref_window_size\")))\n"
44333"ushort2 __ovld\n"
44334"intel_sub_group_ime_ref_window_size(uchar search_window_config, char dual_ref);\n"
44335"ushort2 __ovld intel_sub_group_avc_ime_ref_window_size(\n"
44336" uchar search_window_config, char dual_ref);\n"
44337"short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset(\n"
44338" short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size,\n"
44339" ushort2 image_size);\n"
44340"\n"
44341"intel_sub_group_avc_ime_result_t __ovld\n"
44342"intel_sub_group_avc_ime_evaluate_with_single_reference(\n"
44343" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44344" sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);\n"
44345"intel_sub_group_avc_ime_result_t __ovld\n"
44346"intel_sub_group_avc_ime_evaluate_with_dual_reference(\n"
44347" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44348" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44349" intel_sub_group_avc_ime_payload_t payload);\n"
44350"intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld\n"
44351"intel_sub_group_avc_ime_evaluate_with_single_reference_streamout(\n"
44352" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44353" sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);\n"
44354"intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld\n"
44355"intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout(\n"
44356" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44357" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44358" intel_sub_group_avc_ime_payload_t payload);\n"
44359"intel_sub_group_avc_ime_result_t __ovld\n"
44360"intel_sub_group_avc_ime_evaluate_with_single_reference_streamin(\n"
44361" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44362" sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,\n"
44363" intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);\n"
44364"intel_sub_group_avc_ime_result_t __ovld\n"
44365"intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin(\n"
44366" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44367" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44368" intel_sub_group_avc_ime_payload_t payload,\n"
44369" intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);\n"
44370"intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld\n"
44371"intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout(\n"
44372" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44373" sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,\n"
44374" intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);\n"
44375"intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld\n"
44376"intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(\n"
44377" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44378" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44379" intel_sub_group_avc_ime_payload_t payload,\n"
44380" intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);\n"
44381"\n"
44382"intel_sub_group_avc_ime_single_reference_streamin_t __ovld\n"
44383"intel_sub_group_avc_ime_get_single_reference_streamin(\n"
44384" intel_sub_group_avc_ime_result_single_reference_streamout_t result);\n"
44385"intel_sub_group_avc_ime_dual_reference_streamin_t __ovld\n"
44386"intel_sub_group_avc_ime_get_dual_reference_streamin(\n"
44387" intel_sub_group_avc_ime_result_dual_reference_streamout_t result);\n"
44388"intel_sub_group_avc_ime_result_t __ovld\n"
44389"intel_sub_group_avc_ime_strip_single_reference_streamout(\n"
44390" intel_sub_group_avc_ime_result_single_reference_streamout_t result);\n"
44391"intel_sub_group_avc_ime_result_t __ovld\n"
44392"intel_sub_group_avc_ime_strip_dual_reference_streamout(\n"
44393" intel_sub_group_avc_ime_result_dual_reference_streamout_t result);\n"
44394"\n"
44395"uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(\n"
44396" intel_sub_group_avc_ime_result_single_reference_streamout_t result,\n"
44397" uchar major_shape);\n"
44398"ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(\n"
44399" intel_sub_group_avc_ime_result_single_reference_streamout_t result,\n"
44400" uchar major_shape);\n"
44401"uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(\n"
44402" intel_sub_group_avc_ime_result_single_reference_streamout_t result,\n"
44403" uchar major_shape);\n"
44404"uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(\n"
44405" intel_sub_group_avc_ime_result_dual_reference_streamout_t result,\n"
44406" uchar major_shape, uchar direction);\n"
44407"ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(\n"
44408" intel_sub_group_avc_ime_result_dual_reference_streamout_t result,\n"
44409" uchar major_shape, uchar direction);\n"
44410"uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(\n"
44411" intel_sub_group_avc_ime_result_dual_reference_streamout_t result,\n"
44412" uchar major_shape, uchar direction);\n"
44413"\n"
44414"uchar __ovld intel_sub_group_avc_ime_get_border_reached(\n"
44415" uchar image_select, intel_sub_group_avc_ime_result_t result);\n"
44416"uchar __ovld intel_sub_group_avc_ime_get_truncated_search_indication(\n"
44417" intel_sub_group_avc_ime_result_t result);\n"
44418"uchar __ovld\n"
44419"intel_sub_group_avc_ime_get_unidirectional_early_search_termination(\n"
44420" intel_sub_group_avc_ime_result_t result);\n"
44421"uint __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector(\n"
44422" intel_sub_group_avc_ime_result_t result);\n"
44423"ushort __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion(\n"
44424" intel_sub_group_avc_ime_result_t result);\n"
44425"\n"
44426"// REF built-in functions\n"
44427"intel_sub_group_avc_ref_payload_t __ovld\n"
44428"intel_sub_group_avc_fme_initialize(\n"
44429" ushort2 src_coord, ulong motion_vectors, uchar major_shapes,\n"
44430" uchar minor_shapes, uchar directions, uchar pixel_resolution,\n"
44431" uchar sad_adjustment);\n"
44432"intel_sub_group_avc_ref_payload_t __ovld\n"
44433"intel_sub_group_avc_bme_initialize(\n"
44434" ushort2 src_coord, ulong motion_vectors, uchar major_shapes,\n"
44435" uchar minor_shapes, uchar directions, uchar pixel_resolution,\n"
44436" uchar bidirectional_weight, uchar sad_adjustment);\n"
44437"\n"
44438"intel_sub_group_avc_ref_payload_t __ovld\n"
44439"intel_sub_group_avc_ref_set_bidirectional_mix_disable(\n"
44440" intel_sub_group_avc_ref_payload_t payload);\n"
44441"intel_sub_group_avc_ref_payload_t __ovld\n"
44442"intel_sub_group_avc_ref_set_bilinear_filter_enable(\n"
44443" intel_sub_group_avc_ref_payload_t payload);\n"
44444"\n"
44445"intel_sub_group_avc_ref_result_t __ovld\n"
44446"intel_sub_group_avc_ref_evaluate_with_single_reference(\n"
44447" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44448" sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);\n"
44449"intel_sub_group_avc_ref_result_t __ovld\n"
44450"intel_sub_group_avc_ref_evaluate_with_dual_reference(\n"
44451" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44452" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44453" intel_sub_group_avc_ref_payload_t payload);\n"
44454"intel_sub_group_avc_ref_result_t __ovld\n"
44455"intel_sub_group_avc_ref_evaluate_with_multi_reference(\n"
44456" read_only image2d_t src_image, uint packed_reference_ids,\n"
44457" sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);\n"
44458"intel_sub_group_avc_ref_result_t __ovld\n"
44459"intel_sub_group_avc_ref_evaluate_with_multi_reference(\n"
44460" read_only image2d_t src_image, uint packed_reference_ids,\n"
44461" uchar packed_reference_field_polarities, sampler_t vme_media_sampler,\n"
44462" intel_sub_group_avc_ref_payload_t payload);\n"
44463"\n"
44464"// SIC built-in functions\n"
44465"intel_sub_group_avc_sic_payload_t __ovld\n"
44466"intel_sub_group_avc_sic_initialize(\n"
44467" ushort2 src_coord);\n"
44468"intel_sub_group_avc_sic_payload_t __ovld\n"
44469"intel_sub_group_avc_sic_configure_skc(\n"
44470" uint skip_block_partition_type, uint skip_motion_vector_mask,\n"
44471" ulong motion_vectors, uchar bidirectional_weight, uchar skip_sad_adjustment,\n"
44472" intel_sub_group_avc_sic_payload_t payload);\n"
44473"intel_sub_group_avc_sic_payload_t __ovld\n"
44474"intel_sub_group_avc_sic_configure_ipe(\n"
44475" uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,\n"
44476" uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,\n"
44477" uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,\n"
44478" uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload);\n"
44479"intel_sub_group_avc_sic_payload_t __ovld\n"
44480"intel_sub_group_avc_sic_configure_ipe(\n"
44481" uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,\n"
44482" uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,\n"
44483" uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,\n"
44484" ushort left_edge_chroma_pixels, ushort upper_left_corner_chroma_pixel,\n"
44485" ushort upper_edge_chroma_pixels, uchar intra_sad_adjustment,\n"
44486" intel_sub_group_avc_sic_payload_t payload);\n"
44487"uint __ovld\n"
44488"intel_sub_group_avc_sic_get_motion_vector_mask(\n"
44489" uint skip_block_partition_type, uchar direction);\n"
44490"\n"
44491"intel_sub_group_avc_sic_payload_t __ovld\n"
44492"intel_sub_group_avc_sic_set_intra_luma_shape_penalty(\n"
44493" uint packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);\n"
44494"intel_sub_group_avc_sic_payload_t __ovld\n"
44495"intel_sub_group_avc_sic_set_intra_luma_mode_cost_function(\n"
44496" uchar luma_mode_penalty, uint luma_packed_neighbor_modes,\n"
44497" uint luma_packed_non_dc_penalty, intel_sub_group_avc_sic_payload_t payload);\n"
44498"intel_sub_group_avc_sic_payload_t __ovld\n"
44499"intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function(\n"
44500" uchar chroma_mode_penalty, intel_sub_group_avc_sic_payload_t payload);\n"
44501"\n"
44502"intel_sub_group_avc_sic_payload_t __ovld\n"
44503"intel_sub_group_avc_sic_set_skc_bilinear_filter_enable(\n"
44504" intel_sub_group_avc_sic_payload_t payload);\n"
44505"intel_sub_group_avc_sic_payload_t __ovld\n"
44506"intel_sub_group_avc_sic_set_skc_forward_transform_enable(\n"
44507" ulong packed_sad_coefficients, intel_sub_group_avc_sic_payload_t payload);\n"
44508"intel_sub_group_avc_sic_payload_t __ovld\n"
44509"intel_sub_group_avc_sic_set_block_based_raw_skip_sad(\n"
44510" uchar block_based_skip_type,\n"
44511" intel_sub_group_avc_sic_payload_t payload);\n"
44512"\n"
44513"intel_sub_group_avc_sic_result_t __ovld\n"
44514"intel_sub_group_avc_sic_evaluate_ipe(\n"
44515" read_only image2d_t src_image, sampler_t vme_media_sampler,\n"
44516" intel_sub_group_avc_sic_payload_t payload);\n"
44517"intel_sub_group_avc_sic_result_t __ovld\n"
44518"intel_sub_group_avc_sic_evaluate_with_single_reference(\n"
44519" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44520" sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);\n"
44521"intel_sub_group_avc_sic_result_t __ovld\n"
44522"intel_sub_group_avc_sic_evaluate_with_dual_reference(\n"
44523" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44524" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44525" intel_sub_group_avc_sic_payload_t payload);\n"
44526"intel_sub_group_avc_sic_result_t __ovld\n"
44527"intel_sub_group_avc_sic_evaluate_with_multi_reference(\n"
44528" read_only image2d_t src_image, uint packed_reference_ids,\n"
44529" sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);\n"
44530"intel_sub_group_avc_sic_result_t __ovld\n"
44531"intel_sub_group_avc_sic_evaluate_with_multi_reference(\n"
44532" read_only image2d_t src_image, uint packed_reference_ids,\n"
44533" uchar packed_reference_field_polarities, sampler_t vme_media_sampler,\n"
44534" intel_sub_group_avc_sic_payload_t payload);\n"
44535"\n"
44536"uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape(\n"
44537" intel_sub_group_avc_sic_result_t result);\n"
44538"ushort __ovld intel_sub_group_avc_sic_get_best_ipe_luma_distortion(\n"
44539" intel_sub_group_avc_sic_result_t result);\n"
44540"ushort __ovld intel_sub_group_avc_sic_get_best_ipe_chroma_distortion(\n"
44541" intel_sub_group_avc_sic_result_t result);\n"
44542"ulong __ovld intel_sub_group_avc_sic_get_packed_ipe_luma_modes(\n"
44543" intel_sub_group_avc_sic_result_t result);\n"
44544"uchar __ovld intel_sub_group_avc_sic_get_ipe_chroma_mode(\n"
44545" intel_sub_group_avc_sic_result_t result);\n"
44546"uint __ovld intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold(\n"
44547" intel_sub_group_avc_sic_result_t result);\n"
44548"ulong __ovld intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold(\n"
44549" intel_sub_group_avc_sic_result_t result);\n"
44550"ushort __ovld intel_sub_group_avc_sic_get_inter_raw_sads(\n"
44551" intel_sub_group_avc_sic_result_t result);\n"
44552"\n"
44553"// Wrappers\n"
44554"intel_sub_group_avc_ime_payload_t __ovld\n"
44555"intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty(\n"
44556" uchar reference_base_penalty, intel_sub_group_avc_ime_payload_t payload);\n"
44557"intel_sub_group_avc_ref_payload_t __ovld\n"
44558"intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty(\n"
44559" uchar reference_base_penalty, intel_sub_group_avc_ref_payload_t payload);\n"
44560"intel_sub_group_avc_sic_payload_t __ovld\n"
44561"intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty(\n"
44562" uchar reference_base_penalty, intel_sub_group_avc_sic_payload_t payload);\n"
44563"\n"
44564"intel_sub_group_avc_ime_payload_t __ovld\n"
44565"intel_sub_group_avc_ime_set_inter_shape_penalty(\n"
44566" ulong packed_shape_cost, intel_sub_group_avc_ime_payload_t payload);\n"
44567"intel_sub_group_avc_ref_payload_t __ovld\n"
44568"intel_sub_group_avc_ref_set_inter_shape_penalty(\n"
44569" ulong packed_shape_cost, intel_sub_group_avc_ref_payload_t payload);\n"
44570"intel_sub_group_avc_sic_payload_t __ovld\n"
44571"intel_sub_group_avc_sic_set_inter_shape_penalty(\n"
44572" ulong packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);\n"
44573"\n"
44574"intel_sub_group_avc_ime_payload_t __ovld\n"
44575"intel_sub_group_avc_ime_set_inter_direction_penalty(\n"
44576" uchar direction_cost, intel_sub_group_avc_ime_payload_t payload);\n"
44577"intel_sub_group_avc_ref_payload_t __ovld\n"
44578"intel_sub_group_avc_ref_set_inter_direction_penalty(\n"
44579" uchar direction_cost, intel_sub_group_avc_ref_payload_t payload);\n"
44580"intel_sub_group_avc_sic_payload_t __ovld\n"
44581"intel_sub_group_avc_sic_set_inter_direction_penalty(\n"
44582" uchar direction_cost, intel_sub_group_avc_sic_payload_t payload);\n"
44583"\n"
44584"intel_sub_group_avc_ime_payload_t __ovld\n"
44585"intel_sub_group_avc_ime_set_motion_vector_cost_function(\n"
44586" ulong packed_cost_center_delta, uint2 packed_cost_table,\n"
44587" uchar cost_precision, intel_sub_group_avc_ime_payload_t payload);\n"
44588"intel_sub_group_avc_ref_payload_t __ovld\n"
44589"intel_sub_group_avc_ref_set_motion_vector_cost_function(\n"
44590" ulong packed_cost_center_delta, uint2 packed_cost_table,\n"
44591" uchar cost_precision, intel_sub_group_avc_ref_payload_t payload);\n"
44592"intel_sub_group_avc_sic_payload_t __ovld\n"
44593"intel_sub_group_avc_sic_set_motion_vector_cost_function(\n"
44594" ulong packed_cost_center_delta, uint2 packed_cost_table,\n"
44595" uchar cost_precision, intel_sub_group_avc_sic_payload_t payload);\n"
44596"\n"
44597"intel_sub_group_avc_ime_payload_t __ovld\n"
44598"intel_sub_group_avc_ime_set_source_interlaced_field_polarity(\n"
44599" uchar src_field_polarity, intel_sub_group_avc_ime_payload_t payload);\n"
44600"intel_sub_group_avc_ref_payload_t __ovld\n"
44601"intel_sub_group_avc_ref_set_source_interlaced_field_polarity(\n"
44602" uchar src_field_polarity, intel_sub_group_avc_ref_payload_t payload);\n"
44603"intel_sub_group_avc_sic_payload_t __ovld\n"
44604"intel_sub_group_avc_sic_set_source_interlaced_field_polarity(\n"
44605" uchar src_field_polarity, intel_sub_group_avc_sic_payload_t payload);\n"
44606"\n"
44607"intel_sub_group_avc_ime_payload_t __ovld\n"
44608"intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity(\n"
44609" uchar ref_field_polarity, intel_sub_group_avc_ime_payload_t payload);\n"
44610"intel_sub_group_avc_ref_payload_t __ovld\n"
44611"intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity(\n"
44612" uchar ref_field_polarity, intel_sub_group_avc_ref_payload_t payload);\n"
44613"intel_sub_group_avc_sic_payload_t __ovld\n"
44614"intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity(\n"
44615" uchar ref_field_polarity, intel_sub_group_avc_sic_payload_t payload);\n"
44616"intel_sub_group_avc_ime_payload_t __ovld\n"
44617"intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities(\n"
44618" uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n"
44619" intel_sub_group_avc_ime_payload_t payload);\n"
44620"intel_sub_group_avc_ref_payload_t __ovld\n"
44621"intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities(\n"
44622" uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n"
44623" intel_sub_group_avc_ref_payload_t payload);\n"
44624"intel_sub_group_avc_sic_payload_t __ovld\n"
44625"intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities(\n"
44626" uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n"
44627" intel_sub_group_avc_sic_payload_t payload);\n"
44628"\n"
44629"intel_sub_group_avc_ime_payload_t __ovld\n"
44630"intel_sub_group_avc_ime_set_ac_only_haar(\n"
44631" intel_sub_group_avc_ime_payload_t payload);\n"
44632"intel_sub_group_avc_ref_payload_t __ovld\n"
44633"intel_sub_group_avc_ref_set_ac_only_haar(\n"
44634" intel_sub_group_avc_ref_payload_t payload);\n"
44635"intel_sub_group_avc_sic_payload_t __ovld\n"
44636"intel_sub_group_avc_sic_set_ac_only_haar(\n"
44637" intel_sub_group_avc_sic_payload_t payload);\n"
44638"\n"
44639"ulong __ovld intel_sub_group_avc_ime_get_motion_vectors(\n"
44640" intel_sub_group_avc_ime_result_t result);\n"
44641"ulong __ovld intel_sub_group_avc_ref_get_motion_vectors(\n"
44642" intel_sub_group_avc_ref_result_t result);\n"
44643"\n"
44644"ushort __ovld intel_sub_group_avc_ime_get_inter_distortions(\n"
44645" intel_sub_group_avc_ime_result_t result);\n"
44646"ushort __ovld intel_sub_group_avc_ref_get_inter_distortions(\n"
44647" intel_sub_group_avc_ref_result_t result);\n"
44648"ushort __ovld intel_sub_group_avc_sic_get_inter_distortions(\n"
44649" intel_sub_group_avc_sic_result_t result);\n"
44650"\n"
44651"ushort __ovld intel_sub_group_avc_ime_get_best_inter_distortion(\n"
44652" intel_sub_group_avc_ime_result_t result);\n"
44653"ushort __ovld intel_sub_group_avc_ref_get_best_inter_distortion(\n"
44654" intel_sub_group_avc_ref_result_t result);\n"
44655"\n"
44656"uchar __ovld intel_sub_group_avc_ime_get_inter_major_shape(\n"
44657" intel_sub_group_avc_ime_result_t result);\n"
44658"uchar __ovld intel_sub_group_avc_ref_get_inter_major_shape(\n"
44659" intel_sub_group_avc_ref_result_t result);\n"
44660"uchar __ovld intel_sub_group_avc_ime_get_inter_minor_shapes(\n"
44661" intel_sub_group_avc_ime_result_t result);\n"
44662"uchar __ovld intel_sub_group_avc_ref_get_inter_minor_shapes(\n"
44663" intel_sub_group_avc_ref_result_t result);\n"
44664"\n"
44665"uchar __ovld intel_sub_group_avc_ime_get_inter_directions(\n"
44666" intel_sub_group_avc_ime_result_t result);\n"
44667"uchar __ovld intel_sub_group_avc_ref_get_inter_directions(\n"
44668" intel_sub_group_avc_ref_result_t result);\n"
44669"\n"
44670"uchar __ovld intel_sub_group_avc_ime_get_inter_motion_vector_count(\n"
44671" intel_sub_group_avc_ime_result_t result);\n"
44672"uchar __ovld intel_sub_group_avc_ref_get_inter_motion_vector_count(\n"
44673" intel_sub_group_avc_ref_result_t result);\n"
44674"\n"
44675"uint __ovld intel_sub_group_avc_ime_get_inter_reference_ids(\n"
44676" intel_sub_group_avc_ime_result_t result);\n"
44677"uint __ovld intel_sub_group_avc_ref_get_inter_reference_ids(\n"
44678" intel_sub_group_avc_ref_result_t result);\n"
44679"\n"
44680"uchar __ovld\n"
44681"intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities(\n"
44682" uint packed_reference_ids, uint packed_reference_parameter_field_polarities,\n"
44683" intel_sub_group_avc_ime_result_t result);\n"
44684"uchar __ovld\n"
44685"intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities(\n"
44686" uint packed_reference_ids, uint packed_reference_parameter_field_polarities,\n"
44687" intel_sub_group_avc_ref_result_t result);\n"
44688"\n"
44689"// Type conversion functions\n"
44690"intel_sub_group_avc_mce_payload_t __ovld\n"
44691"intel_sub_group_avc_ime_convert_to_mce_payload(\n"
44692" intel_sub_group_avc_ime_payload_t payload);\n"
44693"intel_sub_group_avc_ime_payload_t __ovld\n"
44694"intel_sub_group_avc_mce_convert_to_ime_payload(\n"
44695" intel_sub_group_avc_mce_payload_t payload);\n"
44696"intel_sub_group_avc_mce_payload_t __ovld\n"
44697"intel_sub_group_avc_ref_convert_to_mce_payload(\n"
44698" intel_sub_group_avc_ref_payload_t payload);\n"
44699"intel_sub_group_avc_ref_payload_t __ovld\n"
44700"intel_sub_group_avc_mce_convert_to_ref_payload(\n"
44701" intel_sub_group_avc_mce_payload_t payload);\n"
44702"intel_sub_group_avc_mce_payload_t __ovld\n"
44703"intel_sub_group_avc_sic_convert_to_mce_payload(\n"
44704" intel_sub_group_avc_sic_payload_t payload);\n"
44705"intel_sub_group_avc_sic_payload_t __ovld\n"
44706"intel_sub_group_avc_mce_convert_to_sic_payload(\n"
44707" intel_sub_group_avc_mce_payload_t payload);\n"
44708"\n"
44709"intel_sub_group_avc_mce_result_t __ovld\n"
44710"intel_sub_group_avc_ime_convert_to_mce_result(\n"
44711" intel_sub_group_avc_ime_result_t result);\n"
44712"intel_sub_group_avc_ime_result_t __ovld\n"
44713"intel_sub_group_avc_mce_convert_to_ime_result(\n"
44714" intel_sub_group_avc_mce_result_t result);\n"
44715"intel_sub_group_avc_mce_result_t __ovld\n"
44716"intel_sub_group_avc_ref_convert_to_mce_result(\n"
44717" intel_sub_group_avc_ref_result_t result);\n"
44718"intel_sub_group_avc_ref_result_t __ovld\n"
44719"intel_sub_group_avc_mce_convert_to_ref_result(\n"
44720" intel_sub_group_avc_mce_result_t result);\n"
44721"intel_sub_group_avc_mce_result_t __ovld\n"
44722"intel_sub_group_avc_sic_convert_to_mce_result(\n"
44723" intel_sub_group_avc_sic_result_t result);\n"
44724"intel_sub_group_avc_sic_result_t __ovld\n"
44725"intel_sub_group_avc_mce_convert_to_sic_result(\n"
44726" intel_sub_group_avc_mce_result_t result);\n"
44727"#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end\n"
44728"#endif // cl_intel_device_side_avc_motion_estimation\n"
44729"\n"
44730"#ifdef cl_amd_media_ops\n"
44731"uint __ovld amd_bitalign(uint a, uint b, uint c);\n"
44732"uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);\n"
44733"uint3 __ovld amd_bitalign(uint3 a, uint3 b, uint3 c);\n"
44734"uint4 __ovld amd_bitalign(uint4 a, uint4 b, uint4 c);\n"
44735"uint8 __ovld amd_bitalign(uint8 a, uint8 b, uint8 c);\n"
44736"uint16 __ovld amd_bitalign(uint16 a, uint16 b, uint16 c);\n"
44737"\n"
44738"uint __ovld amd_bytealign(uint a, uint b, uint c);\n"
44739"uint2 __ovld amd_bytealign(uint2 a, uint2 b, uint2 c);\n"
44740"uint3 __ovld amd_bytealign(uint3 a, uint3 b, uint3 c);\n"
44741"uint4 __ovld amd_bytealign(uint4 a, uint4 b, uint4 c);\n"
44742"uint8 __ovld amd_bytealign(uint8 a, uint8 b, uint8 c);\n"
44743"uint16 __ovld amd_bytealign(uint16 a, uint16 b, uint16 c);\n"
44744"\n"
44745"uint __ovld amd_lerp(uint a, uint b, uint c);\n"
44746"uint2 __ovld amd_lerp(uint2 a, uint2 b, uint2 c);\n"
44747"uint3 __ovld amd_lerp(uint3 a, uint3 b, uint3 c);\n"
44748"uint4 __ovld amd_lerp(uint4 a, uint4 b, uint4 c);\n"
44749"uint8 __ovld amd_lerp(uint8 a, uint8 b, uint8 c);\n"
44750"uint16 __ovld amd_lerp(uint16 a, uint16 b, uint16 c);\n"
44751"\n"
44752"uint __ovld amd_pack(float4 v);\n"
44753"\n"
44754"uint __ovld amd_sad4(uint4 x, uint4 y, uint z);\n"
44755"\n"
44756"uint __ovld amd_sadhi(uint a, uint b, uint c);\n"
44757"uint2 __ovld amd_sadhi(uint2 a, uint2 b, uint2 c);\n"
44758"uint3 __ovld amd_sadhi(uint3 a, uint3 b, uint3 c);\n"
44759"uint4 __ovld amd_sadhi(uint4 a, uint4 b, uint4 c);\n"
44760"uint8 __ovld amd_sadhi(uint8 a, uint8 b, uint8 c);\n"
44761"uint16 __ovld amd_sadhi(uint16 a, uint16 b, uint16 c);\n"
44762"\n"
44763"uint __ovld amd_sad(uint a, uint b, uint c);\n"
44764"uint2 __ovld amd_sad(uint2 a, uint2 b, uint2 c);\n"
44765"uint3 __ovld amd_sad(uint3 a, uint3 b, uint3 c);\n"
44766"uint4 __ovld amd_sad(uint4 a, uint4 b, uint4 c);\n"
44767"uint8 __ovld amd_sad(uint8 a, uint8 b, uint8 c);\n"
44768"uint16 __ovld amd_sad(uint16 a, uint16 b, uint16 c);\n"
44769"\n"
44770"float __ovld amd_unpack0(uint a);\n"
44771"float2 __ovld amd_unpack0(uint2 a);\n"
44772"float3 __ovld amd_unpack0(uint3 a);\n"
44773"float4 __ovld amd_unpack0(uint4 a);\n"
44774"float8 __ovld amd_unpack0(uint8 a);\n"
44775"float16 __ovld amd_unpack0(uint16 a);\n"
44776"\n"
44777"float __ovld amd_unpack1(uint a);\n"
44778"float2 __ovld amd_unpack1(uint2 a);\n"
44779"float3 __ovld amd_unpack1(uint3 a);\n"
44780"float4 __ovld amd_unpack1(uint4 a);\n"
44781"float8 __ovld amd_unpack1(uint8 a);\n"
44782"float16 __ovld amd_unpack1(uint16 a);\n"
44783"\n"
44784"float __ovld amd_unpack2(uint a);\n"
44785"float2 __ovld amd_unpack2(uint2 a);\n"
44786"float3 __ovld amd_unpack2(uint3 a);\n"
44787"float4 __ovld amd_unpack2(uint4 a);\n"
44788"float8 __ovld amd_unpack2(uint8 a);\n"
44789"float16 __ovld amd_unpack2(uint16 a);\n"
44790"\n"
44791"float __ovld amd_unpack3(uint a);\n"
44792"float2 __ovld amd_unpack3(uint2 a);\n"
44793"float3 __ovld amd_unpack3(uint3 a);\n"
44794"float4 __ovld amd_unpack3(uint4 a);\n"
44795"float8 __ovld amd_unpack3(uint8 a);\n"
44796"float16 __ovld amd_unpack3(uint16 a);\n"
44797"#endif // cl_amd_media_ops\n"
44798"\n"
44799"#ifdef cl_amd_media_ops2\n"
44800"int __ovld amd_bfe(int src0, uint src1, uint src2);\n"
44801"int2 __ovld amd_bfe(int2 src0, uint2 src1, uint2 src2);\n"
44802"int3 __ovld amd_bfe(int3 src0, uint3 src1, uint3 src2);\n"
44803"int4 __ovld amd_bfe(int4 src0, uint4 src1, uint4 src2);\n"
44804"int8 __ovld amd_bfe(int8 src0, uint8 src1, uint8 src2);\n"
44805"int16 __ovld amd_bfe(int16 src0, uint16 src1, uint16 src2);\n"
44806"\n"
44807"uint __ovld amd_bfe(uint src0, uint src1, uint src2);\n"
44808"uint2 __ovld amd_bfe(uint2 src0, uint2 src1, uint2 src2);\n"
44809"uint3 __ovld amd_bfe(uint3 src0, uint3 src1, uint3 src2);\n"
44810"uint4 __ovld amd_bfe(uint4 src0, uint4 src1, uint4 src2);\n"
44811"uint8 __ovld amd_bfe(uint8 src0, uint8 src1, uint8 src2);\n"
44812"uint16 __ovld amd_bfe(uint16 src0, uint16 src1, uint16 src2);\n"
44813"\n"
44814"uint __ovld amd_bfm(uint src0, uint src1);\n"
44815"uint2 __ovld amd_bfm(uint2 src0, uint2 src1);\n"
44816"uint3 __ovld amd_bfm(uint3 src0, uint3 src1);\n"
44817"uint4 __ovld amd_bfm(uint4 src0, uint4 src1);\n"
44818"uint8 __ovld amd_bfm(uint8 src0, uint8 src1);\n"
44819"uint16 __ovld amd_bfm(uint16 src0, uint16 src1);\n"
44820"\n"
44821"float __ovld amd_max3(float src0, float src1, float src2);\n"
44822"float2 __ovld amd_max3(float2 src0, float2 src1, float2 src2);\n"
44823"float3 __ovld amd_max3(float3 src0, float3 src1, float3 src2);\n"
44824"float4 __ovld amd_max3(float4 src0, float4 src1, float4 src2);\n"
44825"float8 __ovld amd_max3(float8 src0, float8 src1, float8 src2);\n"
44826"float16 __ovld amd_max3(float16 src0, float16 src1, float16 src2);\n"
44827"\n"
44828"int __ovld amd_max3(int src0, int src1, int src2);\n"
44829"int2 __ovld amd_max3(int2 src0, int2 src1, int2 src2);\n"
44830"int3 __ovld amd_max3(int3 src0, int3 src1, int3 src2);\n"
44831"int4 __ovld amd_max3(int4 src0, int4 src1, int4 src2);\n"
44832"int8 __ovld amd_max3(int8 src0, int8 src1, int8 src2);\n"
44833"int16 __ovld amd_max3(int16 src0, int16 src1, int16 src2);\n"
44834"\n"
44835"uint __ovld amd_max3(uint src0, uint src1, uint src2);\n"
44836"uint2 __ovld amd_max3(uint2 src0, uint2 src1, uint2 src2);\n"
44837"uint3 __ovld amd_max3(uint3 src0, uint3 src1, uint3 src2);\n"
44838"uint4 __ovld amd_max3(uint4 src0, uint4 src1, uint4 src2);\n"
44839"uint8 __ovld amd_max3(uint8 src0, uint8 src1, uint8 src2);\n"
44840"uint16 __ovld amd_max3(uint16 src0, uint16 src1, uint16 src2);\n"
44841"\n"
44842"float __ovld amd_median3(float src0, float src1, float src2);\n"
44843"float2 __ovld amd_median3(float2 src0, float2 src1, float2 src2);\n"
44844"float3 __ovld amd_median3(float3 src0, float3 src1, float3 src2);\n"
44845"float4 __ovld amd_median3(float4 src0, float4 src1, float4 src2);\n"
44846"float8 __ovld amd_median3(float8 src0, float8 src1, float8 src2);\n"
44847"float16 __ovld amd_median3(float16 src0, float16 src1, float16 src2);\n"
44848"\n"
44849"int __ovld amd_median3(int src0, int src1, int src2);\n"
44850"int2 __ovld amd_median3(int2 src0, int2 src1, int2 src2);\n"
44851"int3 __ovld amd_median3(int3 src0, int3 src1, int3 src2);\n"
44852"int4 __ovld amd_median3(int4 src0, int4 src1, int4 src2);\n"
44853"int8 __ovld amd_median3(int8 src0, int8 src1, int8 src2);\n"
44854"int16 __ovld amd_median3(int16 src0, int16 src1, int16 src2);\n"
44855"\n"
44856"uint __ovld amd_median3(uint src0, uint src1, uint src2);\n"
44857"uint2 __ovld amd_median3(uint2 src0, uint2 src1, uint2 src2);\n"
44858"uint3 __ovld amd_median3(uint3 src0, uint3 src1, uint3 src2);\n"
44859"uint4 __ovld amd_median3(uint4 src0, uint4 src1, uint4 src2);\n"
44860"uint8 __ovld amd_median3(uint8 src0, uint8 src1, uint8 src2);\n"
44861"uint16 __ovld amd_median3(uint16 src0, uint16 src1, uint16 src2);\n"
44862"\n"
44863"float __ovld amd_min3(float src0, float src1, float src);\n"
44864"float2 __ovld amd_min3(float2 src0, float2 src1, float2 src);\n"
44865"float3 __ovld amd_min3(float3 src0, float3 src1, float3 src);\n"
44866"float4 __ovld amd_min3(float4 src0, float4 src1, float4 src);\n"
44867"float8 __ovld amd_min3(float8 src0, float8 src1, float8 src);\n"
44868"float16 __ovld amd_min3(float16 src0, float16 src1, float16 src);\n"
44869"\n"
44870"int __ovld amd_min3(int src0, int src1, int src2);\n"
44871"int2 __ovld amd_min3(int2 src0, int2 src1, int2 src2);\n"
44872"int3 __ovld amd_min3(int3 src0, int3 src1, int3 src2);\n"
44873"int4 __ovld amd_min3(int4 src0, int4 src1, int4 src2);\n"
44874"int8 __ovld amd_min3(int8 src0, int8 src1, int8 src2);\n"
44875"int16 __ovld amd_min3(int16 src0, int16 src1, int16 src2);\n"
44876"\n"
44877"uint __ovld amd_min3(uint src0, uint src1, uint src2);\n"
44878"uint2 __ovld amd_min3(uint2 src0, uint2 src1, uint2 src2);\n"
44879"uint3 __ovld amd_min3(uint3 src0, uint3 src1, uint3 src2);\n"
44880"uint4 __ovld amd_min3(uint4 src0, uint4 src1, uint4 src2);\n"
44881"uint8 __ovld amd_min3(uint8 src0, uint8 src1, uint8 src2);\n"
44882"uint16 __ovld amd_min3(uint16 src0, uint16 src1, uint16 src2);\n"
44883"\n"
44884"ulong __ovld amd_mqsad(ulong src0, uint src1, ulong src2);\n"
44885"ulong2 __ovld amd_mqsad(ulong2 src0, uint2 src1, ulong2 src2);\n"
44886"ulong3 __ovld amd_mqsad(ulong3 src0, uint3 src1, ulong3 src2);\n"
44887"ulong4 __ovld amd_mqsad(ulong4 src0, uint4 src1, ulong4 src2);\n"
44888"ulong8 __ovld amd_mqsad(ulong8 src0, uint8 src1, ulong8 src2);\n"
44889"ulong16 __ovld amd_mqsad(ulong16 src0, uint16 src1, ulong16 src2);\n"
44890"\n"
44891"ulong __ovld amd_qsad(ulong src0, uint src1, ulong src2);\n"
44892"ulong2 __ovld amd_qsad(ulong2 src0, uint2 src1, ulong2 src2);\n"
44893"ulong3 __ovld amd_qsad(ulong3 src0, uint3 src1, ulong3 src2);\n"
44894"ulong4 __ovld amd_qsad(ulong4 src0, uint4 src1, ulong4 src2);\n"
44895"ulong8 __ovld amd_qsad(ulong8 src0, uint8 src1, ulong8 src2);\n"
44896"ulong16 __ovld amd_qsad(ulong16 src0, uint16 src1, ulong16 src2);\n"
44897"\n"
44898"uint __ovld amd_msad(uint src0, uint src1, uint src2);\n"
44899"uint2 __ovld amd_msad(uint2 src0, uint2 src1, uint2 src2);\n"
44900"uint3 __ovld amd_msad(uint3 src0, uint3 src1, uint3 src2);\n"
44901"uint4 __ovld amd_msad(uint4 src0, uint4 src1, uint4 src2);\n"
44902"uint8 __ovld amd_msad(uint8 src0, uint8 src1, uint8 src2);\n"
44903"uint16 __ovld amd_msad(uint16 src0, uint16 src1, uint16 src2);\n"
44904"\n"
44905"uint __ovld amd_sadd(uint src0, uint src1, uint src2);\n"
44906"uint2 __ovld amd_sadd(uint2 src0, uint2 src1, uint2 src2);\n"
44907"uint3 __ovld amd_sadd(uint3 src0, uint3 src1, uint3 src2);\n"
44908"uint4 __ovld amd_sadd(uint4 src0, uint4 src1, uint4 src2);\n"
44909"uint8 __ovld amd_sadd(uint8 src0, uint8 src1, uint8 src2);\n"
44910"uint16 __ovld amd_sadd(uint16 src0, uint16 src1, uint16 src2);\n"
44911"\n"
44912"uint __ovld amd_sadw(uint src0, uint src1, uint src2);\n"
44913"uint2 __ovld amd_sadw(uint2 src0, uint2 src1, uint2 src2);\n"
44914"uint3 __ovld amd_sadw(uint3 src0, uint3 src1, uint3 src2);\n"
44915"uint4 __ovld amd_sadw(uint4 src0, uint4 src1, uint4 src2);\n"
44916"uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2);\n"
44917"uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2);\n"
44918"#endif // cl_amd_media_ops2\n"
44919"\n"
44920"// Disable any extensions we may have enabled previously.\n"
44921"#pragma OPENCL EXTENSION all : disable\n"
44922"\n"
44923"#undef __cnfn\n"
44924"#undef __ovld\n"
44925"#endif //_OPENCL_H_\n"
44926"" } ,
44927 { "/builtins/pconfigintrin.h" , "/*===---- pconfigintrin.h - X86 platform configuration ---------------------===\n"
44928" *\n"
44929" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
44930" * of this software and associated documentation files (the \"Software\"), to deal\n"
44931" * in the Software without restriction, including without limitation the rights\n"
44932" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
44933" * copies of the Software, and to permit persons to whom the Software is\n"
44934" * furnished to do so, subject to the following conditions:\n"
44935" *\n"
44936" * The above copyright notice and this permission notice shall be included in\n"
44937" * all copies or substantial portions of the Software.\n"
44938" *\n"
44939" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
44940" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
44941" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
44942" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
44943" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
44944" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
44945" * THE SOFTWARE.\n"
44946" *\n"
44947" *===-----------------------------------------------------------------------===\n"
44948" */\n"
44949"\n"
44950"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
44951"#error \"Never use <pconfigintrin.h> directly; include <x86intrin.h> instead.\"\n"
44952"#endif\n"
44953"\n"
44954"#ifndef __PCONFIGINTRIN_H\n"
44955"#define __PCONFIGINTRIN_H\n"
44956"\n"
44957"#define __PCONFIG_KEY_PROGRAM 0x00000001\n"
44958"\n"
44959"/* Define the default attributes for the functions in this file. */\n"
44960"#define __DEFAULT_FN_ATTRS \\\n"
44961" __attribute__((__always_inline__, __nodebug__, __target__(\"pconfig\")))\n"
44962"\n"
44963"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
44964"_pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
44965"{\n"
44966" unsigned int __result;\n"
44967" __asm__ (\"pconfig\"\n"
44968" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
44969" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
44970" : \"cc\");\n"
44971" return __result;\n"
44972"}\n"
44973"\n"
44974"#undef __DEFAULT_FN_ATTRS\n"
44975"\n"
44976"#endif\n"
44977"" } ,
44978 { "/builtins/pkuintrin.h" , "/*===---- pkuintrin.h - PKU intrinsics -------------------------------------===\n"
44979" *\n"
44980" *\n"
44981" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
44982" * of this software and associated documentation files (the \"Software\"), to deal\n"
44983" * in the Software without restriction, including without limitation the rights\n"
44984" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
44985" * copies of the Software, and to permit persons to whom the Software is\n"
44986" * furnished to do so, subject to the following conditions:\n"
44987" *\n"
44988" * The above copyright notice and this permission notice shall be included in\n"
44989" * all copies or substantial portions of the Software.\n"
44990" *\n"
44991" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
44992" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
44993" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
44994" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
44995" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
44996" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
44997" * THE SOFTWARE.\n"
44998" *\n"
44999" *===-----------------------------------------------------------------------===\n"
45000" */\n"
45001"#ifndef __IMMINTRIN_H\n"
45002"#error \"Never use <pkuintrin.h> directly; include <immintrin.h> instead.\"\n"
45003"#endif\n"
45004"\n"
45005"#ifndef __PKUINTRIN_H\n"
45006"#define __PKUINTRIN_H\n"
45007"\n"
45008"/* Define the default attributes for the functions in this file. */\n"
45009"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"pku\")))\n"
45010"\n"
45011"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
45012"_rdpkru_u32(void)\n"
45013"{\n"
45014" return __builtin_ia32_rdpkru();\n"
45015"}\n"
45016"\n"
45017"static __inline__ void __DEFAULT_FN_ATTRS\n"
45018"_wrpkru(unsigned int __val)\n"
45019"{\n"
45020" __builtin_ia32_wrpkru(__val);\n"
45021"}\n"
45022"\n"
45023"#undef __DEFAULT_FN_ATTRS\n"
45024"\n"
45025"#endif\n"
45026"" } ,
45027 { "/builtins/pmmintrin.h" , "/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===\n"
45028" *\n"
45029" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45030" * of this software and associated documentation files (the \"Software\"), to deal\n"
45031" * in the Software without restriction, including without limitation the rights\n"
45032" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45033" * copies of the Software, and to permit persons to whom the Software is\n"
45034" * furnished to do so, subject to the following conditions:\n"
45035" *\n"
45036" * The above copyright notice and this permission notice shall be included in\n"
45037" * all copies or substantial portions of the Software.\n"
45038" *\n"
45039" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45040" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45041" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45042" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45043" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45044" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45045" * THE SOFTWARE.\n"
45046" *\n"
45047" *===-----------------------------------------------------------------------===\n"
45048" */\n"
45049"\n"
45050"#ifndef __PMMINTRIN_H\n"
45051"#define __PMMINTRIN_H\n"
45052"\n"
45053"#include <emmintrin.h>\n"
45054"\n"
45055"/* Define the default attributes for the functions in this file. */\n"
45056"#define __DEFAULT_FN_ATTRS \\\n"
45057" __attribute__((__always_inline__, __nodebug__, __target__(\"sse3\"), __min_vector_width__(128)))\n"
45058"\n"
45059"/// Loads data from an unaligned memory location to elements in a 128-bit\n"
45060"/// vector.\n"
45061"///\n"
45062"/// If the address of the data is not 16-byte aligned, the instruction may\n"
45063"/// read two adjacent aligned blocks of memory to retrieve the requested\n"
45064"/// data.\n"
45065"///\n"
45066"/// \\headerfile <x86intrin.h>\n"
45067"///\n"
45068"/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n"
45069"///\n"
45070"/// \\param __p\n"
45071"/// A pointer to a 128-bit integer vector containing integer values.\n"
45072"/// \\returns A 128-bit vector containing the moved values.\n"
45073"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45074"_mm_lddqu_si128(__m128i const *__p)\n"
45075"{\n"
45076" return (__m128i)__builtin_ia32_lddqu((char const *)__p);\n"
45077"}\n"
45078"\n"
45079"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
45080"/// two 128-bit vectors of [4 x float].\n"
45081"///\n"
45082"/// \\headerfile <x86intrin.h>\n"
45083"///\n"
45084"/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n"
45085"///\n"
45086"/// \\param __a\n"
45087"/// A 128-bit vector of [4 x float] containing the left source operand.\n"
45088"/// \\param __b\n"
45089"/// A 128-bit vector of [4 x float] containing the right source operand.\n"
45090"/// \\returns A 128-bit vector of [4 x float] containing the alternating sums and\n"
45091"/// differences of both operands.\n"
45092"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45093"_mm_addsub_ps(__m128 __a, __m128 __b)\n"
45094"{\n"
45095" return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);\n"
45096"}\n"
45097"\n"
45098"/// Horizontally adds the adjacent pairs of values contained in two\n"
45099"/// 128-bit vectors of [4 x float].\n"
45100"///\n"
45101"/// \\headerfile <x86intrin.h>\n"
45102"///\n"
45103"/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n"
45104"///\n"
45105"/// \\param __a\n"
45106"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
45107"/// The horizontal sums of the values are stored in the lower bits of the\n"
45108"/// destination.\n"
45109"/// \\param __b\n"
45110"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
45111"/// The horizontal sums of the values are stored in the upper bits of the\n"
45112"/// destination.\n"
45113"/// \\returns A 128-bit vector of [4 x float] containing the horizontal sums of\n"
45114"/// both operands.\n"
45115"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45116"_mm_hadd_ps(__m128 __a, __m128 __b)\n"
45117"{\n"
45118" return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);\n"
45119"}\n"
45120"\n"
45121"/// Horizontally subtracts the adjacent pairs of values contained in two\n"
45122"/// 128-bit vectors of [4 x float].\n"
45123"///\n"
45124"/// \\headerfile <x86intrin.h>\n"
45125"///\n"
45126"/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n"
45127"///\n"
45128"/// \\param __a\n"
45129"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
45130"/// The horizontal differences between the values are stored in the lower\n"
45131"/// bits of the destination.\n"
45132"/// \\param __b\n"
45133"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
45134"/// The horizontal differences between the values are stored in the upper\n"
45135"/// bits of the destination.\n"
45136"/// \\returns A 128-bit vector of [4 x float] containing the horizontal\n"
45137"/// differences of both operands.\n"
45138"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45139"_mm_hsub_ps(__m128 __a, __m128 __b)\n"
45140"{\n"
45141" return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);\n"
45142"}\n"
45143"\n"
45144"/// Moves and duplicates odd-indexed values from a 128-bit vector\n"
45145"/// of [4 x float] to float values stored in a 128-bit vector of\n"
45146"/// [4 x float].\n"
45147"///\n"
45148"/// \\headerfile <x86intrin.h>\n"
45149"///\n"
45150"/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n"
45151"///\n"
45152"/// \\param __a\n"
45153"/// A 128-bit vector of [4 x float]. \\n\n"
45154"/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of\n"
45155"/// the destination. \\n\n"
45156"/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the\n"
45157"/// destination.\n"
45158"/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n"
45159"/// values.\n"
45160"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45161"_mm_movehdup_ps(__m128 __a)\n"
45162"{\n"
45163" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);\n"
45164"}\n"
45165"\n"
45166"/// Duplicates even-indexed values from a 128-bit vector of\n"
45167"/// [4 x float] to float values stored in a 128-bit vector of [4 x float].\n"
45168"///\n"
45169"/// \\headerfile <x86intrin.h>\n"
45170"///\n"
45171"/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n"
45172"///\n"
45173"/// \\param __a\n"
45174"/// A 128-bit vector of [4 x float] \\n\n"
45175"/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of\n"
45176"/// the destination. \\n\n"
45177"/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the\n"
45178"/// destination.\n"
45179"/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n"
45180"/// values.\n"
45181"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45182"_mm_moveldup_ps(__m128 __a)\n"
45183"{\n"
45184" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);\n"
45185"}\n"
45186"\n"
45187"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
45188"/// two 128-bit vectors of [2 x double].\n"
45189"///\n"
45190"/// \\headerfile <x86intrin.h>\n"
45191"///\n"
45192"/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n"
45193"///\n"
45194"/// \\param __a\n"
45195"/// A 128-bit vector of [2 x double] containing the left source operand.\n"
45196"/// \\param __b\n"
45197"/// A 128-bit vector of [2 x double] containing the right source operand.\n"
45198"/// \\returns A 128-bit vector of [2 x double] containing the alternating sums\n"
45199"/// and differences of both operands.\n"
45200"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
45201"_mm_addsub_pd(__m128d __a, __m128d __b)\n"
45202"{\n"
45203" return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);\n"
45204"}\n"
45205"\n"
45206"/// Horizontally adds the pairs of values contained in two 128-bit\n"
45207"/// vectors of [2 x double].\n"
45208"///\n"
45209"/// \\headerfile <x86intrin.h>\n"
45210"///\n"
45211"/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n"
45212"///\n"
45213"/// \\param __a\n"
45214"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
45215"/// The horizontal sum of the values is stored in the lower bits of the\n"
45216"/// destination.\n"
45217"/// \\param __b\n"
45218"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
45219"/// The horizontal sum of the values is stored in the upper bits of the\n"
45220"/// destination.\n"
45221"/// \\returns A 128-bit vector of [2 x double] containing the horizontal sums of\n"
45222"/// both operands.\n"
45223"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
45224"_mm_hadd_pd(__m128d __a, __m128d __b)\n"
45225"{\n"
45226" return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);\n"
45227"}\n"
45228"\n"
45229"/// Horizontally subtracts the pairs of values contained in two 128-bit\n"
45230"/// vectors of [2 x double].\n"
45231"///\n"
45232"/// \\headerfile <x86intrin.h>\n"
45233"///\n"
45234"/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n"
45235"///\n"
45236"/// \\param __a\n"
45237"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
45238"/// The horizontal difference of the values is stored in the lower bits of\n"
45239"/// the destination.\n"
45240"/// \\param __b\n"
45241"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
45242"/// The horizontal difference of the values is stored in the upper bits of\n"
45243"/// the destination.\n"
45244"/// \\returns A 128-bit vector of [2 x double] containing the horizontal\n"
45245"/// differences of both operands.\n"
45246"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
45247"_mm_hsub_pd(__m128d __a, __m128d __b)\n"
45248"{\n"
45249" return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);\n"
45250"}\n"
45251"\n"
45252"/// Moves and duplicates one double-precision value to double-precision\n"
45253"/// values stored in a 128-bit vector of [2 x double].\n"
45254"///\n"
45255"/// \\headerfile <x86intrin.h>\n"
45256"///\n"
45257"/// \\code\n"
45258"/// __m128d _mm_loaddup_pd(double const *dp);\n"
45259"/// \\endcode\n"
45260"///\n"
45261"/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n"
45262"///\n"
45263"/// \\param dp\n"
45264"/// A pointer to a double-precision value to be moved and duplicated.\n"
45265"/// \\returns A 128-bit vector of [2 x double] containing the moved and\n"
45266"/// duplicated values.\n"
45267"#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)\n"
45268"\n"
45269"/// Moves and duplicates the double-precision value in the lower bits of\n"
45270"/// a 128-bit vector of [2 x double] to double-precision values stored in a\n"
45271"/// 128-bit vector of [2 x double].\n"
45272"///\n"
45273"/// \\headerfile <x86intrin.h>\n"
45274"///\n"
45275"/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n"
45276"///\n"
45277"/// \\param __a\n"
45278"/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits\n"
45279"/// [127:64] and [63:0] of the destination.\n"
45280"/// \\returns A 128-bit vector of [2 x double] containing the moved and\n"
45281"/// duplicated values.\n"
45282"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
45283"_mm_movedup_pd(__m128d __a)\n"
45284"{\n"
45285" return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n"
45286"}\n"
45287"\n"
45288"/// Establishes a linear address memory range to be monitored and puts\n"
45289"/// the processor in the monitor event pending state. Data stored in the\n"
45290"/// monitored address range causes the processor to exit the pending state.\n"
45291"///\n"
45292"/// \\headerfile <x86intrin.h>\n"
45293"///\n"
45294"/// This intrinsic corresponds to the <c> MONITOR </c> instruction.\n"
45295"///\n"
45296"/// \\param __p\n"
45297"/// The memory range to be monitored. The size of the range is determined by\n"
45298"/// CPUID function 0000_0005h.\n"
45299"/// \\param __extensions\n"
45300"/// Optional extensions for the monitoring state.\n"
45301"/// \\param __hints\n"
45302"/// Optional hints for the monitoring state.\n"
45303"static __inline__ void __DEFAULT_FN_ATTRS\n"
45304"_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)\n"
45305"{\n"
45306" __builtin_ia32_monitor((void *)__p, __extensions, __hints);\n"
45307"}\n"
45308"\n"
45309"/// Used with the MONITOR instruction to wait while the processor is in\n"
45310"/// the monitor event pending state. Data stored in the monitored address\n"
45311"/// range causes the processor to exit the pending state.\n"
45312"///\n"
45313"/// \\headerfile <x86intrin.h>\n"
45314"///\n"
45315"/// This intrinsic corresponds to the <c> MWAIT </c> instruction.\n"
45316"///\n"
45317"/// \\param __extensions\n"
45318"/// Optional extensions for the monitoring state, which may vary by\n"
45319"/// processor.\n"
45320"/// \\param __hints\n"
45321"/// Optional hints for the monitoring state, which may vary by processor.\n"
45322"static __inline__ void __DEFAULT_FN_ATTRS\n"
45323"_mm_mwait(unsigned __extensions, unsigned __hints)\n"
45324"{\n"
45325" __builtin_ia32_mwait(__extensions, __hints);\n"
45326"}\n"
45327"\n"
45328"#undef __DEFAULT_FN_ATTRS\n"
45329"\n"
45330"#endif /* __PMMINTRIN_H */\n"
45331"" } ,
45332 { "/builtins/popcntintrin.h" , "/*===---- popcntintrin.h - POPCNT intrinsics -------------------------------===\n"
45333" *\n"
45334" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45335" * of this software and associated documentation files (the \"Software\"), to deal\n"
45336" * in the Software without restriction, including without limitation the rights\n"
45337" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45338" * copies of the Software, and to permit persons to whom the Software is\n"
45339" * furnished to do so, subject to the following conditions:\n"
45340" *\n"
45341" * The above copyright notice and this permission notice shall be included in\n"
45342" * all copies or substantial portions of the Software.\n"
45343" *\n"
45344" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45345" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45346" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45347" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45348" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45349" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45350" * THE SOFTWARE.\n"
45351" *\n"
45352" *===-----------------------------------------------------------------------===\n"
45353" */\n"
45354"\n"
45355"#ifndef __POPCNTINTRIN_H\n"
45356"#define __POPCNTINTRIN_H\n"
45357"\n"
45358"/* Define the default attributes for the functions in this file. */\n"
45359"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"popcnt\")))\n"
45360"\n"
45361"/// Counts the number of bits in the source operand having a value of 1.\n"
45362"///\n"
45363"/// \\headerfile <x86intrin.h>\n"
45364"///\n"
45365"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
45366"///\n"
45367"/// \\param __A\n"
45368"/// An unsigned 32-bit integer operand.\n"
45369"/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n"
45370"/// source operand.\n"
45371"static __inline__ int __DEFAULT_FN_ATTRS\n"
45372"_mm_popcnt_u32(unsigned int __A)\n"
45373"{\n"
45374" return __builtin_popcount(__A);\n"
45375"}\n"
45376"\n"
45377"/// Counts the number of bits in the source operand having a value of 1.\n"
45378"///\n"
45379"/// \\headerfile <x86intrin.h>\n"
45380"///\n"
45381"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
45382"///\n"
45383"/// \\param __A\n"
45384"/// A signed 32-bit integer operand.\n"
45385"/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n"
45386"/// source operand.\n"
45387"static __inline__ int __DEFAULT_FN_ATTRS\n"
45388"_popcnt32(int __A)\n"
45389"{\n"
45390" return __builtin_popcount(__A);\n"
45391"}\n"
45392"\n"
45393"#ifdef __x86_64__\n"
45394"/// Counts the number of bits in the source operand having a value of 1.\n"
45395"///\n"
45396"/// \\headerfile <x86intrin.h>\n"
45397"///\n"
45398"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
45399"///\n"
45400"/// \\param __A\n"
45401"/// An unsigned 64-bit integer operand.\n"
45402"/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n"
45403"/// source operand.\n"
45404"static __inline__ long long __DEFAULT_FN_ATTRS\n"
45405"_mm_popcnt_u64(unsigned long long __A)\n"
45406"{\n"
45407" return __builtin_popcountll(__A);\n"
45408"}\n"
45409"\n"
45410"/// Counts the number of bits in the source operand having a value of 1.\n"
45411"///\n"
45412"/// \\headerfile <x86intrin.h>\n"
45413"///\n"
45414"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
45415"///\n"
45416"/// \\param __A\n"
45417"/// A signed 64-bit integer operand.\n"
45418"/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n"
45419"/// source operand.\n"
45420"static __inline__ long long __DEFAULT_FN_ATTRS\n"
45421"_popcnt64(long long __A)\n"
45422"{\n"
45423" return __builtin_popcountll(__A);\n"
45424"}\n"
45425"#endif /* __x86_64__ */\n"
45426"\n"
45427"#undef __DEFAULT_FN_ATTRS\n"
45428"\n"
45429"#endif /* __POPCNTINTRIN_H */\n"
45430"" } ,
45431 { "/builtins/prfchwintrin.h" , "/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------===\n"
45432" *\n"
45433" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45434" * of this software and associated documentation files (the \"Software\"), to deal\n"
45435" * in the Software without restriction, including without limitation the rights\n"
45436" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45437" * copies of the Software, and to permit persons to whom the Software is\n"
45438" * furnished to do so, subject to the following conditions:\n"
45439" *\n"
45440" * The above copyright notice and this permission notice shall be included in\n"
45441" * all copies or substantial portions of the Software.\n"
45442" *\n"
45443" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45444" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45445" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45446" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45447" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45448" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45449" * THE SOFTWARE.\n"
45450" *\n"
45451" *===-----------------------------------------------------------------------===\n"
45452" */\n"
45453"\n"
45454"#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)\n"
45455"#error \"Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead.\"\n"
45456"#endif\n"
45457"\n"
45458"#ifndef __PRFCHWINTRIN_H\n"
45459"#define __PRFCHWINTRIN_H\n"
45460"\n"
45461"/// Loads a memory sequence containing the specified memory address into\n"
45462"/// all data cache levels. The cache-coherency state is set to exclusive.\n"
45463"/// Data can be read from and written to the cache line without additional\n"
45464"/// delay.\n"
45465"///\n"
45466"/// \\headerfile <x86intrin.h>\n"
45467"///\n"
45468"/// This intrinsic corresponds to the \\c PREFETCHT0 instruction.\n"
45469"///\n"
45470"/// \\param __P\n"
45471"/// A pointer specifying the memory address to be prefetched.\n"
45472"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
45473"_m_prefetch(void *__P)\n"
45474"{\n"
45475" __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);\n"
45476"}\n"
45477"\n"
45478"/// Loads a memory sequence containing the specified memory address into\n"
45479"/// the L1 data cache and sets the cache-coherency to modified. This\n"
45480"/// provides a hint to the processor that the cache line will be modified.\n"
45481"/// It is intended for use when the cache line will be written to shortly\n"
45482"/// after the prefetch is performed.\n"
45483"///\n"
45484"/// Note that the effect of this intrinsic is dependent on the processor\n"
45485"/// implementation.\n"
45486"///\n"
45487"/// \\headerfile <x86intrin.h>\n"
45488"///\n"
45489"/// This intrinsic corresponds to the \\c PREFETCHW instruction.\n"
45490"///\n"
45491"/// \\param __P\n"
45492"/// A pointer specifying the memory address to be prefetched.\n"
45493"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
45494"_m_prefetchw(void *__P)\n"
45495"{\n"
45496" __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);\n"
45497"}\n"
45498"\n"
45499"#endif /* __PRFCHWINTRIN_H */\n"
45500"" } ,
45501 { "/builtins/ptwriteintrin.h" , "/*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------===\n"
45502" *\n"
45503" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45504" * of this software and associated documentation files (the \"Software\"), to deal\n"
45505" * in the Software without restriction, including without limitation the rights\n"
45506" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45507" * copies of the Software, and to permit persons to whom the Software is\n"
45508" * furnished to do so, subject to the following conditions:\n"
45509" *\n"
45510" * The above copyright notice and this permission notice shall be included in\n"
45511" * all copies or substantial portions of the Software.\n"
45512" *\n"
45513" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45514" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45515" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45516" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45517" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45518" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45519" * THE SOFTWARE.\n"
45520" *\n"
45521" *===-----------------------------------------------------------------------===\n"
45522" */\n"
45523"\n"
45524"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
45525"#error \"Never use <ptwriteintrin.h> directly; include <x86intrin.h> instead.\"\n"
45526"#endif\n"
45527"\n"
45528"#ifndef __PTWRITEINTRIN_H\n"
45529"#define __PTWRITEINTRIN_H\n"
45530"\n"
45531"/* Define the default attributes for the functions in this file. */\n"
45532"#define __DEFAULT_FN_ATTRS \\\n"
45533" __attribute__((__always_inline__, __nodebug__, __target__(\"ptwrite\")))\n"
45534"\n"
45535"static __inline__ void __DEFAULT_FN_ATTRS\n"
45536"_ptwrite32(unsigned int __value) {\n"
45537" __builtin_ia32_ptwrite32(__value);\n"
45538"}\n"
45539"\n"
45540"#ifdef __x86_64__\n"
45541"\n"
45542"static __inline__ void __DEFAULT_FN_ATTRS\n"
45543"_ptwrite64(unsigned long long __value) {\n"
45544" __builtin_ia32_ptwrite64(__value);\n"
45545"}\n"
45546"\n"
45547"#endif /* __x86_64__ */\n"
45548"\n"
45549"#undef __DEFAULT_FN_ATTRS\n"
45550"\n"
45551"#endif /* __PTWRITEINTRIN_H */\n"
45552"" } ,
45553 { "/builtins/rdseedintrin.h" , "/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------===\n"
45554" *\n"
45555" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45556" * of this software and associated documentation files (the \"Software\"), to deal\n"
45557" * in the Software without restriction, including without limitation the rights\n"
45558" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45559" * copies of the Software, and to permit persons to whom the Software is\n"
45560" * furnished to do so, subject to the following conditions:\n"
45561" *\n"
45562" * The above copyright notice and this permission notice shall be included in\n"
45563" * all copies or substantial portions of the Software.\n"
45564" *\n"
45565" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45566" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45567" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45568" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45569" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45570" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45571" * THE SOFTWARE.\n"
45572" *\n"
45573" *===-----------------------------------------------------------------------===\n"
45574" */\n"
45575"\n"
45576"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
45577"#error \"Never use <rdseedintrin.h> directly; include <x86intrin.h> instead.\"\n"
45578"#endif\n"
45579"\n"
45580"#ifndef __RDSEEDINTRIN_H\n"
45581"#define __RDSEEDINTRIN_H\n"
45582"\n"
45583"/* Define the default attributes for the functions in this file. */\n"
45584"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rdseed\")))\n"
45585"\n"
45586"static __inline__ int __DEFAULT_FN_ATTRS\n"
45587"_rdseed16_step(unsigned short *__p)\n"
45588"{\n"
45589" return __builtin_ia32_rdseed16_step(__p);\n"
45590"}\n"
45591"\n"
45592"static __inline__ int __DEFAULT_FN_ATTRS\n"
45593"_rdseed32_step(unsigned int *__p)\n"
45594"{\n"
45595" return __builtin_ia32_rdseed32_step(__p);\n"
45596"}\n"
45597"\n"
45598"#ifdef __x86_64__\n"
45599"static __inline__ int __DEFAULT_FN_ATTRS\n"
45600"_rdseed64_step(unsigned long long *__p)\n"
45601"{\n"
45602" return __builtin_ia32_rdseed64_step(__p);\n"
45603"}\n"
45604"#endif\n"
45605"\n"
45606"#undef __DEFAULT_FN_ATTRS\n"
45607"\n"
45608"#endif /* __RDSEEDINTRIN_H */\n"
45609"" } ,
45610 { "/builtins/rtmintrin.h" , "/*===---- rtmintrin.h - RTM intrinsics -------------------------------------===\n"
45611" *\n"
45612" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45613" * of this software and associated documentation files (the \"Software\"), to deal\n"
45614" * in the Software without restriction, including without limitation the rights\n"
45615" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45616" * copies of the Software, and to permit persons to whom the Software is\n"
45617" * furnished to do so, subject to the following conditions:\n"
45618" *\n"
45619" * The above copyright notice and this permission notice shall be included in\n"
45620" * all copies or substantial portions of the Software.\n"
45621" *\n"
45622" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45623" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45624" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45625" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45626" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45627" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45628" * THE SOFTWARE.\n"
45629" *\n"
45630" *===-----------------------------------------------------------------------===\n"
45631" */\n"
45632"\n"
45633"#ifndef __IMMINTRIN_H\n"
45634"#error \"Never use <rtmintrin.h> directly; include <immintrin.h> instead.\"\n"
45635"#endif\n"
45636"\n"
45637"#ifndef __RTMINTRIN_H\n"
45638"#define __RTMINTRIN_H\n"
45639"\n"
45640"#define _XBEGIN_STARTED (~0u)\n"
45641"#define _XABORT_EXPLICIT (1 << 0)\n"
45642"#define _XABORT_RETRY (1 << 1)\n"
45643"#define _XABORT_CONFLICT (1 << 2)\n"
45644"#define _XABORT_CAPACITY (1 << 3)\n"
45645"#define _XABORT_DEBUG (1 << 4)\n"
45646"#define _XABORT_NESTED (1 << 5)\n"
45647"#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)\n"
45648"\n"
45649"/* Define the default attributes for the functions in this file. */\n"
45650"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n"
45651"\n"
45652"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
45653"_xbegin(void)\n"
45654"{\n"
45655" return __builtin_ia32_xbegin();\n"
45656"}\n"
45657"\n"
45658"static __inline__ void __DEFAULT_FN_ATTRS\n"
45659"_xend(void)\n"
45660"{\n"
45661" __builtin_ia32_xend();\n"
45662"}\n"
45663"\n"
45664"#define _xabort(imm) __builtin_ia32_xabort((imm))\n"
45665"\n"
45666"#undef __DEFAULT_FN_ATTRS\n"
45667"\n"
45668"#endif /* __RTMINTRIN_H */\n"
45669"" } ,
45670 { "/builtins/s390intrin.h" , "/*===---- s390intrin.h - SystemZ intrinsics --------------------------------===\n"
45671" *\n"
45672" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45673" * of this software and associated documentation files (the \"Software\"), to deal\n"
45674" * in the Software without restriction, including without limitation the rights\n"
45675" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45676" * copies of the Software, and to permit persons to whom the Software is\n"
45677" * furnished to do so, subject to the following conditions:\n"
45678" *\n"
45679" * The above copyright notice and this permission notice shall be included in\n"
45680" * all copies or substantial portions of the Software.\n"
45681" *\n"
45682" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45683" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45684" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45685" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45686" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45687" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45688" * THE SOFTWARE.\n"
45689" *\n"
45690" *===-----------------------------------------------------------------------===\n"
45691" */\n"
45692"\n"
45693"#ifndef __S390INTRIN_H\n"
45694"#define __S390INTRIN_H\n"
45695"\n"
45696"#ifndef __s390__\n"
45697"#error \"<s390intrin.h> is for s390 only\"\n"
45698"#endif\n"
45699"\n"
45700"#ifdef __HTM__\n"
45701"#include <htmintrin.h>\n"
45702"#endif\n"
45703"\n"
45704"#ifdef __VEC__\n"
45705"#include <vecintrin.h>\n"
45706"#endif\n"
45707"\n"
45708"#endif /* __S390INTRIN_H*/\n"
45709"" } ,
45710 { "/builtins/sgxintrin.h" , "/*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------===\n"
45711" *\n"
45712" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45713" * of this software and associated documentation files (the \"Software\"), to deal\n"
45714" * in the Software without restriction, including without limitation the rights\n"
45715" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45716" * copies of the Software, and to permit persons to whom the Software is\n"
45717" * furnished to do so, subject to the following conditions:\n"
45718" *\n"
45719" * The above copyright notice and this permission notice shall be included in\n"
45720" * all copies or substantial portions of the Software.\n"
45721" *\n"
45722" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45723" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45724" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45725" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45726" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45727" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45728" * THE SOFTWARE.\n"
45729" *\n"
45730" *===-----------------------------------------------------------------------===\n"
45731" */\n"
45732"\n"
45733"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
45734"#error \"Never use <sgxintrin.h> directly; include <x86intrin.h> instead.\"\n"
45735"#endif\n"
45736"\n"
45737"#ifndef __SGXINTRIN_H\n"
45738"#define __SGXINTRIN_H\n"
45739"\n"
45740"/* Define the default attributes for the functions in this file. */\n"
45741"#define __DEFAULT_FN_ATTRS \\\n"
45742" __attribute__((__always_inline__, __nodebug__, __target__(\"sgx\")))\n"
45743"\n"
45744"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
45745"_enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
45746"{\n"
45747" unsigned int __result;\n"
45748" __asm__ (\"enclu\"\n"
45749" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
45750" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
45751" : \"cc\");\n"
45752" return __result;\n"
45753"}\n"
45754"\n"
45755"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
45756"_encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
45757"{\n"
45758" unsigned int __result;\n"
45759" __asm__ (\"encls\"\n"
45760" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
45761" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
45762" : \"cc\");\n"
45763" return __result;\n"
45764"}\n"
45765"\n"
45766"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
45767"_enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
45768"{\n"
45769" unsigned int __result;\n"
45770" __asm__ (\"enclv\"\n"
45771" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
45772" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
45773" : \"cc\");\n"
45774" return __result;\n"
45775"}\n"
45776"\n"
45777"#undef __DEFAULT_FN_ATTRS\n"
45778"\n"
45779"#endif\n"
45780"" } ,
45781 { "/builtins/shaintrin.h" , "/*===---- shaintrin.h - SHA intrinsics -------------------------------------===\n"
45782" *\n"
45783" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45784" * of this software and associated documentation files (the \"Software\"), to deal\n"
45785" * in the Software without restriction, including without limitation the rights\n"
45786" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45787" * copies of the Software, and to permit persons to whom the Software is\n"
45788" * furnished to do so, subject to the following conditions:\n"
45789" *\n"
45790" * The above copyright notice and this permission notice shall be included in\n"
45791" * all copies or substantial portions of the Software.\n"
45792" *\n"
45793" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45794" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45795" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45796" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45797" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45798" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45799" * THE SOFTWARE.\n"
45800" *\n"
45801" *===-----------------------------------------------------------------------===\n"
45802" */\n"
45803"\n"
45804"#ifndef __IMMINTRIN_H\n"
45805"#error \"Never use <shaintrin.h> directly; include <immintrin.h> instead.\"\n"
45806"#endif\n"
45807"\n"
45808"#ifndef __SHAINTRIN_H\n"
45809"#define __SHAINTRIN_H\n"
45810"\n"
45811"/* Define the default attributes for the functions in this file. */\n"
45812"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sha\"), __min_vector_width__(128)))\n"
45813"\n"
45814"#define _mm_sha1rnds4_epu32(V1, V2, M) \\\n"
45815" __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M))\n"
45816"\n"
45817"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45818"_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)\n"
45819"{\n"
45820" return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y);\n"
45821"}\n"
45822"\n"
45823"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45824"_mm_sha1msg1_epu32(__m128i __X, __m128i __Y)\n"
45825"{\n"
45826" return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y);\n"
45827"}\n"
45828"\n"
45829"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45830"_mm_sha1msg2_epu32(__m128i __X, __m128i __Y)\n"
45831"{\n"
45832" return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y);\n"
45833"}\n"
45834"\n"
45835"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45836"_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)\n"
45837"{\n"
45838" return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z);\n"
45839"}\n"
45840"\n"
45841"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45842"_mm_sha256msg1_epu32(__m128i __X, __m128i __Y)\n"
45843"{\n"
45844" return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y);\n"
45845"}\n"
45846"\n"
45847"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45848"_mm_sha256msg2_epu32(__m128i __X, __m128i __Y)\n"
45849"{\n"
45850" return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y);\n"
45851"}\n"
45852"\n"
45853"#undef __DEFAULT_FN_ATTRS\n"
45854"\n"
45855"#endif /* __SHAINTRIN_H */\n"
45856"" } ,
45857 { "/builtins/smmintrin.h" , "/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===\n"
45858" *\n"
45859" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45860" * of this software and associated documentation files (the \"Software\"), to deal\n"
45861" * in the Software without restriction, including without limitation the rights\n"
45862" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45863" * copies of the Software, and to permit persons to whom the Software is\n"
45864" * furnished to do so, subject to the following conditions:\n"
45865" *\n"
45866" * The above copyright notice and this permission notice shall be included in\n"
45867" * all copies or substantial portions of the Software.\n"
45868" *\n"
45869" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45870" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45871" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45872" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45873" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45874" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45875" * THE SOFTWARE.\n"
45876" *\n"
45877" *===-----------------------------------------------------------------------===\n"
45878" */\n"
45879"\n"
45880"#ifndef __SMMINTRIN_H\n"
45881"#define __SMMINTRIN_H\n"
45882"\n"
45883"#include <tmmintrin.h>\n"
45884"\n"
45885"/* Define the default attributes for the functions in this file. */\n"
45886"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.1\"), __min_vector_width__(128)))\n"
45887"\n"
45888"/* SSE4 Rounding macros. */\n"
45889"#define _MM_FROUND_TO_NEAREST_INT 0x00\n"
45890"#define _MM_FROUND_TO_NEG_INF 0x01\n"
45891"#define _MM_FROUND_TO_POS_INF 0x02\n"
45892"#define _MM_FROUND_TO_ZERO 0x03\n"
45893"#define _MM_FROUND_CUR_DIRECTION 0x04\n"
45894"\n"
45895"#define _MM_FROUND_RAISE_EXC 0x00\n"
45896"#define _MM_FROUND_NO_EXC 0x08\n"
45897"\n"
45898"#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT)\n"
45899"#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF)\n"
45900"#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF)\n"
45901"#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO)\n"
45902"#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)\n"
45903"#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)\n"
45904"\n"
45905"/// Rounds up each element of the 128-bit vector of [4 x float] to an\n"
45906"/// integer and returns the rounded values in a 128-bit vector of\n"
45907"/// [4 x float].\n"
45908"///\n"
45909"/// \\headerfile <x86intrin.h>\n"
45910"///\n"
45911"/// \\code\n"
45912"/// __m128 _mm_ceil_ps(__m128 X);\n"
45913"/// \\endcode\n"
45914"///\n"
45915"/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n"
45916"///\n"
45917"/// \\param X\n"
45918"/// A 128-bit vector of [4 x float] values to be rounded up.\n"
45919"/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n"
45920"#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)\n"
45921"\n"
45922"/// Rounds up each element of the 128-bit vector of [2 x double] to an\n"
45923"/// integer and returns the rounded values in a 128-bit vector of\n"
45924"/// [2 x double].\n"
45925"///\n"
45926"/// \\headerfile <x86intrin.h>\n"
45927"///\n"
45928"/// \\code\n"
45929"/// __m128d _mm_ceil_pd(__m128d X);\n"
45930"/// \\endcode\n"
45931"///\n"
45932"/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n"
45933"///\n"
45934"/// \\param X\n"
45935"/// A 128-bit vector of [2 x double] values to be rounded up.\n"
45936"/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n"
45937"#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)\n"
45938"\n"
45939"/// Copies three upper elements of the first 128-bit vector operand to\n"
45940"/// the corresponding three upper elements of the 128-bit result vector of\n"
45941"/// [4 x float]. Rounds up the lowest element of the second 128-bit vector\n"
45942"/// operand to an integer and copies it to the lowest element of the 128-bit\n"
45943"/// result vector of [4 x float].\n"
45944"///\n"
45945"/// \\headerfile <x86intrin.h>\n"
45946"///\n"
45947"/// \\code\n"
45948"/// __m128 _mm_ceil_ss(__m128 X, __m128 Y);\n"
45949"/// \\endcode\n"
45950"///\n"
45951"/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n"
45952"///\n"
45953"/// \\param X\n"
45954"/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n"
45955"/// copied to the corresponding bits of the result.\n"
45956"/// \\param Y\n"
45957"/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n"
45958"/// rounded up to the nearest integer and copied to the corresponding bits\n"
45959"/// of the result.\n"
45960"/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n"
45961"/// values.\n"
45962"#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)\n"
45963"\n"
45964"/// Copies the upper element of the first 128-bit vector operand to the\n"
45965"/// corresponding upper element of the 128-bit result vector of [2 x double].\n"
45966"/// Rounds up the lower element of the second 128-bit vector operand to an\n"
45967"/// integer and copies it to the lower element of the 128-bit result vector\n"
45968"/// of [2 x double].\n"
45969"///\n"
45970"/// \\headerfile <x86intrin.h>\n"
45971"///\n"
45972"/// \\code\n"
45973"/// __m128d _mm_ceil_sd(__m128d X, __m128d Y);\n"
45974"/// \\endcode\n"
45975"///\n"
45976"/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n"
45977"///\n"
45978"/// \\param X\n"
45979"/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n"
45980"/// copied to the corresponding bits of the result.\n"
45981"/// \\param Y\n"
45982"/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n"
45983"/// rounded up to the nearest integer and copied to the corresponding bits\n"
45984"/// of the result.\n"
45985"/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n"
45986"/// values.\n"
45987"#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)\n"
45988"\n"
45989"/// Rounds down each element of the 128-bit vector of [4 x float] to an\n"
45990"/// an integer and returns the rounded values in a 128-bit vector of\n"
45991"/// [4 x float].\n"
45992"///\n"
45993"/// \\headerfile <x86intrin.h>\n"
45994"///\n"
45995"/// \\code\n"
45996"/// __m128 _mm_floor_ps(__m128 X);\n"
45997"/// \\endcode\n"
45998"///\n"
45999"/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n"
46000"///\n"
46001"/// \\param X\n"
46002"/// A 128-bit vector of [4 x float] values to be rounded down.\n"
46003"/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n"
46004"#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)\n"
46005"\n"
46006"/// Rounds down each element of the 128-bit vector of [2 x double] to an\n"
46007"/// integer and returns the rounded values in a 128-bit vector of\n"
46008"/// [2 x double].\n"
46009"///\n"
46010"/// \\headerfile <x86intrin.h>\n"
46011"///\n"
46012"/// \\code\n"
46013"/// __m128d _mm_floor_pd(__m128d X);\n"
46014"/// \\endcode\n"
46015"///\n"
46016"/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n"
46017"///\n"
46018"/// \\param X\n"
46019"/// A 128-bit vector of [2 x double].\n"
46020"/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n"
46021"#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)\n"
46022"\n"
46023"/// Copies three upper elements of the first 128-bit vector operand to\n"
46024"/// the corresponding three upper elements of the 128-bit result vector of\n"
46025"/// [4 x float]. Rounds down the lowest element of the second 128-bit vector\n"
46026"/// operand to an integer and copies it to the lowest element of the 128-bit\n"
46027"/// result vector of [4 x float].\n"
46028"///\n"
46029"/// \\headerfile <x86intrin.h>\n"
46030"///\n"
46031"/// \\code\n"
46032"/// __m128 _mm_floor_ss(__m128 X, __m128 Y);\n"
46033"/// \\endcode\n"
46034"///\n"
46035"/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n"
46036"///\n"
46037"/// \\param X\n"
46038"/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n"
46039"/// copied to the corresponding bits of the result.\n"
46040"/// \\param Y\n"
46041"/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n"
46042"/// rounded down to the nearest integer and copied to the corresponding bits\n"
46043"/// of the result.\n"
46044"/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n"
46045"/// values.\n"
46046"#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)\n"
46047"\n"
46048"/// Copies the upper element of the first 128-bit vector operand to the\n"
46049"/// corresponding upper element of the 128-bit result vector of [2 x double].\n"
46050"/// Rounds down the lower element of the second 128-bit vector operand to an\n"
46051"/// integer and copies it to the lower element of the 128-bit result vector\n"
46052"/// of [2 x double].\n"
46053"///\n"
46054"/// \\headerfile <x86intrin.h>\n"
46055"///\n"
46056"/// \\code\n"
46057"/// __m128d _mm_floor_sd(__m128d X, __m128d Y);\n"
46058"/// \\endcode\n"
46059"///\n"
46060"/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n"
46061"///\n"
46062"/// \\param X\n"
46063"/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n"
46064"/// copied to the corresponding bits of the result.\n"
46065"/// \\param Y\n"
46066"/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n"
46067"/// rounded down to the nearest integer and copied to the corresponding bits\n"
46068"/// of the result.\n"
46069"/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n"
46070"/// values.\n"
46071"#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)\n"
46072"\n"
46073"/// Rounds each element of the 128-bit vector of [4 x float] to an\n"
46074"/// integer value according to the rounding control specified by the second\n"
46075"/// argument and returns the rounded values in a 128-bit vector of\n"
46076"/// [4 x float].\n"
46077"///\n"
46078"/// \\headerfile <x86intrin.h>\n"
46079"///\n"
46080"/// \\code\n"
46081"/// __m128 _mm_round_ps(__m128 X, const int M);\n"
46082"/// \\endcode\n"
46083"///\n"
46084"/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n"
46085"///\n"
46086"/// \\param X\n"
46087"/// A 128-bit vector of [4 x float].\n"
46088"/// \\param M\n"
46089"/// An integer value that specifies the rounding operation. \\n\n"
46090"/// Bits [7:4] are reserved. \\n\n"
46091"/// Bit [3] is a precision exception value: \\n\n"
46092"/// 0: A normal PE exception is used \\n\n"
46093"/// 1: The PE field is not updated \\n\n"
46094"/// Bit [2] is the rounding control source: \\n\n"
46095"/// 0: Use bits [1:0] of \\a M \\n\n"
46096"/// 1: Use the current MXCSR setting \\n\n"
46097"/// Bits [1:0] contain the rounding control definition: \\n\n"
46098"/// 00: Nearest \\n\n"
46099"/// 01: Downward (toward negative infinity) \\n\n"
46100"/// 10: Upward (toward positive infinity) \\n\n"
46101"/// 11: Truncated\n"
46102"/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n"
46103"#define _mm_round_ps(X, M) \\\n"
46104" (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))\n"
46105"\n"
46106"/// Copies three upper elements of the first 128-bit vector operand to\n"
46107"/// the corresponding three upper elements of the 128-bit result vector of\n"
46108"/// [4 x float]. Rounds the lowest element of the second 128-bit vector\n"
46109"/// operand to an integer value according to the rounding control specified\n"
46110"/// by the third argument and copies it to the lowest element of the 128-bit\n"
46111"/// result vector of [4 x float].\n"
46112"///\n"
46113"/// \\headerfile <x86intrin.h>\n"
46114"///\n"
46115"/// \\code\n"
46116"/// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M);\n"
46117"/// \\endcode\n"
46118"///\n"
46119"/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n"
46120"///\n"
46121"/// \\param X\n"
46122"/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n"
46123"/// copied to the corresponding bits of the result.\n"
46124"/// \\param Y\n"
46125"/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n"
46126"/// rounded to the nearest integer using the specified rounding control and\n"
46127"/// copied to the corresponding bits of the result.\n"
46128"/// \\param M\n"
46129"/// An integer value that specifies the rounding operation. \\n\n"
46130"/// Bits [7:4] are reserved. \\n\n"
46131"/// Bit [3] is a precision exception value: \\n\n"
46132"/// 0: A normal PE exception is used \\n\n"
46133"/// 1: The PE field is not updated \\n\n"
46134"/// Bit [2] is the rounding control source: \\n\n"
46135"/// 0: Use bits [1:0] of \\a M \\n\n"
46136"/// 1: Use the current MXCSR setting \\n\n"
46137"/// Bits [1:0] contain the rounding control definition: \\n\n"
46138"/// 00: Nearest \\n\n"
46139"/// 01: Downward (toward negative infinity) \\n\n"
46140"/// 10: Upward (toward positive infinity) \\n\n"
46141"/// 11: Truncated\n"
46142"/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n"
46143"/// values.\n"
46144"#define _mm_round_ss(X, Y, M) \\\n"
46145" (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \\\n"
46146" (__v4sf)(__m128)(Y), (M))\n"
46147"\n"
46148"/// Rounds each element of the 128-bit vector of [2 x double] to an\n"
46149"/// integer value according to the rounding control specified by the second\n"
46150"/// argument and returns the rounded values in a 128-bit vector of\n"
46151"/// [2 x double].\n"
46152"///\n"
46153"/// \\headerfile <x86intrin.h>\n"
46154"///\n"
46155"/// \\code\n"
46156"/// __m128d _mm_round_pd(__m128d X, const int M);\n"
46157"/// \\endcode\n"
46158"///\n"
46159"/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n"
46160"///\n"
46161"/// \\param X\n"
46162"/// A 128-bit vector of [2 x double].\n"
46163"/// \\param M\n"
46164"/// An integer value that specifies the rounding operation. \\n\n"
46165"/// Bits [7:4] are reserved. \\n\n"
46166"/// Bit [3] is a precision exception value: \\n\n"
46167"/// 0: A normal PE exception is used \\n\n"
46168"/// 1: The PE field is not updated \\n\n"
46169"/// Bit [2] is the rounding control source: \\n\n"
46170"/// 0: Use bits [1:0] of \\a M \\n\n"
46171"/// 1: Use the current MXCSR setting \\n\n"
46172"/// Bits [1:0] contain the rounding control definition: \\n\n"
46173"/// 00: Nearest \\n\n"
46174"/// 01: Downward (toward negative infinity) \\n\n"
46175"/// 10: Upward (toward positive infinity) \\n\n"
46176"/// 11: Truncated\n"
46177"/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n"
46178"#define _mm_round_pd(X, M) \\\n"
46179" (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))\n"
46180"\n"
46181"/// Copies the upper element of the first 128-bit vector operand to the\n"
46182"/// corresponding upper element of the 128-bit result vector of [2 x double].\n"
46183"/// Rounds the lower element of the second 128-bit vector operand to an\n"
46184"/// integer value according to the rounding control specified by the third\n"
46185"/// argument and copies it to the lower element of the 128-bit result vector\n"
46186"/// of [2 x double].\n"
46187"///\n"
46188"/// \\headerfile <x86intrin.h>\n"
46189"///\n"
46190"/// \\code\n"
46191"/// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M);\n"
46192"/// \\endcode\n"
46193"///\n"
46194"/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n"
46195"///\n"
46196"/// \\param X\n"
46197"/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n"
46198"/// copied to the corresponding bits of the result.\n"
46199"/// \\param Y\n"
46200"/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n"
46201"/// rounded to the nearest integer using the specified rounding control and\n"
46202"/// copied to the corresponding bits of the result.\n"
46203"/// \\param M\n"
46204"/// An integer value that specifies the rounding operation. \\n\n"
46205"/// Bits [7:4] are reserved. \\n\n"
46206"/// Bit [3] is a precision exception value: \\n\n"
46207"/// 0: A normal PE exception is used \\n\n"
46208"/// 1: The PE field is not updated \\n\n"
46209"/// Bit [2] is the rounding control source: \\n\n"
46210"/// 0: Use bits [1:0] of \\a M \\n\n"
46211"/// 1: Use the current MXCSR setting \\n\n"
46212"/// Bits [1:0] contain the rounding control definition: \\n\n"
46213"/// 00: Nearest \\n\n"
46214"/// 01: Downward (toward negative infinity) \\n\n"
46215"/// 10: Upward (toward positive infinity) \\n\n"
46216"/// 11: Truncated\n"
46217"/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n"
46218"/// values.\n"
46219"#define _mm_round_sd(X, Y, M) \\\n"
46220" (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \\\n"
46221" (__v2df)(__m128d)(Y), (M))\n"
46222"\n"
46223"/* SSE4 Packed Blending Intrinsics. */\n"
46224"/// Returns a 128-bit vector of [2 x double] where the values are\n"
46225"/// selected from either the first or second operand as specified by the\n"
46226"/// third operand, the control mask.\n"
46227"///\n"
46228"/// \\headerfile <x86intrin.h>\n"
46229"///\n"
46230"/// \\code\n"
46231"/// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M);\n"
46232"/// \\endcode\n"
46233"///\n"
46234"/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n"
46235"///\n"
46236"/// \\param V1\n"
46237"/// A 128-bit vector of [2 x double].\n"
46238"/// \\param V2\n"
46239"/// A 128-bit vector of [2 x double].\n"
46240"/// \\param M\n"
46241"/// An immediate integer operand, with mask bits [1:0] specifying how the\n"
46242"/// values are to be copied. The position of the mask bit corresponds to the\n"
46243"/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n"
46244"/// element in operand \\a V1 is copied to the same position in the result.\n"
46245"/// When a mask bit is 1, the corresponding 64-bit element in operand \\a V2\n"
46246"/// is copied to the same position in the result.\n"
46247"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
46248"#define _mm_blend_pd(V1, V2, M) \\\n"
46249" (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \\\n"
46250" (__v2df)(__m128d)(V2), (int)(M))\n"
46251"\n"
46252"/// Returns a 128-bit vector of [4 x float] where the values are selected\n"
46253"/// from either the first or second operand as specified by the third\n"
46254"/// operand, the control mask.\n"
46255"///\n"
46256"/// \\headerfile <x86intrin.h>\n"
46257"///\n"
46258"/// \\code\n"
46259"/// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M);\n"
46260"/// \\endcode\n"
46261"///\n"
46262"/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS </c> instruction.\n"
46263"///\n"
46264"/// \\param V1\n"
46265"/// A 128-bit vector of [4 x float].\n"
46266"/// \\param V2\n"
46267"/// A 128-bit vector of [4 x float].\n"
46268"/// \\param M\n"
46269"/// An immediate integer operand, with mask bits [3:0] specifying how the\n"
46270"/// values are to be copied. The position of the mask bit corresponds to the\n"
46271"/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n"
46272"/// element in operand \\a V1 is copied to the same position in the result.\n"
46273"/// When a mask bit is 1, the corresponding 32-bit element in operand \\a V2\n"
46274"/// is copied to the same position in the result.\n"
46275"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
46276"#define _mm_blend_ps(V1, V2, M) \\\n"
46277" (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \\\n"
46278" (__v4sf)(__m128)(V2), (int)(M))\n"
46279"\n"
46280"/// Returns a 128-bit vector of [2 x double] where the values are\n"
46281"/// selected from either the first or second operand as specified by the\n"
46282"/// third operand, the control mask.\n"
46283"///\n"
46284"/// \\headerfile <x86intrin.h>\n"
46285"///\n"
46286"/// This intrinsic corresponds to the <c> VBLENDVPD / BLENDVPD </c> instruction.\n"
46287"///\n"
46288"/// \\param __V1\n"
46289"/// A 128-bit vector of [2 x double].\n"
46290"/// \\param __V2\n"
46291"/// A 128-bit vector of [2 x double].\n"
46292"/// \\param __M\n"
46293"/// A 128-bit vector operand, with mask bits 127 and 63 specifying how the\n"
46294"/// values are to be copied. The position of the mask bit corresponds to the\n"
46295"/// most significant bit of a copied value. When a mask bit is 0, the\n"
46296"/// corresponding 64-bit element in operand \\a __V1 is copied to the same\n"
46297"/// position in the result. When a mask bit is 1, the corresponding 64-bit\n"
46298"/// element in operand \\a __V2 is copied to the same position in the result.\n"
46299"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
46300"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
46301"_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)\n"
46302"{\n"
46303" return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,\n"
46304" (__v2df)__M);\n"
46305"}\n"
46306"\n"
46307"/// Returns a 128-bit vector of [4 x float] where the values are\n"
46308"/// selected from either the first or second operand as specified by the\n"
46309"/// third operand, the control mask.\n"
46310"///\n"
46311"/// \\headerfile <x86intrin.h>\n"
46312"///\n"
46313"/// This intrinsic corresponds to the <c> VBLENDVPS / BLENDVPS </c> instruction.\n"
46314"///\n"
46315"/// \\param __V1\n"
46316"/// A 128-bit vector of [4 x float].\n"
46317"/// \\param __V2\n"
46318"/// A 128-bit vector of [4 x float].\n"
46319"/// \\param __M\n"
46320"/// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying\n"
46321"/// how the values are to be copied. The position of the mask bit corresponds\n"
46322"/// to the most significant bit of a copied value. When a mask bit is 0, the\n"
46323"/// corresponding 32-bit element in operand \\a __V1 is copied to the same\n"
46324"/// position in the result. When a mask bit is 1, the corresponding 32-bit\n"
46325"/// element in operand \\a __V2 is copied to the same position in the result.\n"
46326"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
46327"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
46328"_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)\n"
46329"{\n"
46330" return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,\n"
46331" (__v4sf)__M);\n"
46332"}\n"
46333"\n"
46334"/// Returns a 128-bit vector of [16 x i8] where the values are selected\n"
46335"/// from either of the first or second operand as specified by the third\n"
46336"/// operand, the control mask.\n"
46337"///\n"
46338"/// \\headerfile <x86intrin.h>\n"
46339"///\n"
46340"/// This intrinsic corresponds to the <c> VPBLENDVB / PBLENDVB </c> instruction.\n"
46341"///\n"
46342"/// \\param __V1\n"
46343"/// A 128-bit vector of [16 x i8].\n"
46344"/// \\param __V2\n"
46345"/// A 128-bit vector of [16 x i8].\n"
46346"/// \\param __M\n"
46347"/// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying\n"
46348"/// how the values are to be copied. The position of the mask bit corresponds\n"
46349"/// to the most significant bit of a copied value. When a mask bit is 0, the\n"
46350"/// corresponding 8-bit element in operand \\a __V1 is copied to the same\n"
46351"/// position in the result. When a mask bit is 1, the corresponding 8-bit\n"
46352"/// element in operand \\a __V2 is copied to the same position in the result.\n"
46353"/// \\returns A 128-bit vector of [16 x i8] containing the copied values.\n"
46354"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46355"_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)\n"
46356"{\n"
46357" return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,\n"
46358" (__v16qi)__M);\n"
46359"}\n"
46360"\n"
46361"/// Returns a 128-bit vector of [8 x i16] where the values are selected\n"
46362"/// from either of the first or second operand as specified by the third\n"
46363"/// operand, the control mask.\n"
46364"///\n"
46365"/// \\headerfile <x86intrin.h>\n"
46366"///\n"
46367"/// \\code\n"
46368"/// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M);\n"
46369"/// \\endcode\n"
46370"///\n"
46371"/// This intrinsic corresponds to the <c> VPBLENDW / PBLENDW </c> instruction.\n"
46372"///\n"
46373"/// \\param V1\n"
46374"/// A 128-bit vector of [8 x i16].\n"
46375"/// \\param V2\n"
46376"/// A 128-bit vector of [8 x i16].\n"
46377"/// \\param M\n"
46378"/// An immediate integer operand, with mask bits [7:0] specifying how the\n"
46379"/// values are to be copied. The position of the mask bit corresponds to the\n"
46380"/// index of a copied value. When a mask bit is 0, the corresponding 16-bit\n"
46381"/// element in operand \\a V1 is copied to the same position in the result.\n"
46382"/// When a mask bit is 1, the corresponding 16-bit element in operand \\a V2\n"
46383"/// is copied to the same position in the result.\n"
46384"/// \\returns A 128-bit vector of [8 x i16] containing the copied values.\n"
46385"#define _mm_blend_epi16(V1, V2, M) \\\n"
46386" (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \\\n"
46387" (__v8hi)(__m128i)(V2), (int)(M))\n"
46388"\n"
46389"/* SSE4 Dword Multiply Instructions. */\n"
46390"/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]\n"
46391"/// and returns the lower 32 bits of the each product in a 128-bit vector of\n"
46392"/// [4 x i32].\n"
46393"///\n"
46394"/// \\headerfile <x86intrin.h>\n"
46395"///\n"
46396"/// This intrinsic corresponds to the <c> VPMULLD / PMULLD </c> instruction.\n"
46397"///\n"
46398"/// \\param __V1\n"
46399"/// A 128-bit integer vector.\n"
46400"/// \\param __V2\n"
46401"/// A 128-bit integer vector.\n"
46402"/// \\returns A 128-bit integer vector containing the products of both operands.\n"
46403"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46404"_mm_mullo_epi32 (__m128i __V1, __m128i __V2)\n"
46405"{\n"
46406" return (__m128i) ((__v4su)__V1 * (__v4su)__V2);\n"
46407"}\n"
46408"\n"
46409"/// Multiplies corresponding even-indexed elements of two 128-bit\n"
46410"/// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]\n"
46411"/// containing the products.\n"
46412"///\n"
46413"/// \\headerfile <x86intrin.h>\n"
46414"///\n"
46415"/// This intrinsic corresponds to the <c> VPMULDQ / PMULDQ </c> instruction.\n"
46416"///\n"
46417"/// \\param __V1\n"
46418"/// A 128-bit vector of [4 x i32].\n"
46419"/// \\param __V2\n"
46420"/// A 128-bit vector of [4 x i32].\n"
46421"/// \\returns A 128-bit vector of [2 x i64] containing the products of both\n"
46422"/// operands.\n"
46423"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46424"_mm_mul_epi32 (__m128i __V1, __m128i __V2)\n"
46425"{\n"
46426" return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);\n"
46427"}\n"
46428"\n"
46429"/* SSE4 Floating Point Dot Product Instructions. */\n"
46430"/// Computes the dot product of the two 128-bit vectors of [4 x float]\n"
46431"/// and returns it in the elements of the 128-bit result vector of\n"
46432"/// [4 x float].\n"
46433"///\n"
46434"/// The immediate integer operand controls which input elements\n"
46435"/// will contribute to the dot product, and where the final results are\n"
46436"/// returned.\n"
46437"///\n"
46438"/// \\headerfile <x86intrin.h>\n"
46439"///\n"
46440"/// \\code\n"
46441"/// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M);\n"
46442"/// \\endcode\n"
46443"///\n"
46444"/// This intrinsic corresponds to the <c> VDPPS / DPPS </c> instruction.\n"
46445"///\n"
46446"/// \\param X\n"
46447"/// A 128-bit vector of [4 x float].\n"
46448"/// \\param Y\n"
46449"/// A 128-bit vector of [4 x float].\n"
46450"/// \\param M\n"
46451"/// An immediate integer operand. Mask bits [7:4] determine which elements\n"
46452"/// of the input vectors are used, with bit [4] corresponding to the lowest\n"
46453"/// element and bit [7] corresponding to the highest element of each [4 x\n"
46454"/// float] vector. If a bit is set, the corresponding elements from the two\n"
46455"/// input vectors are used as an input for dot product; otherwise that input\n"
46456"/// is treated as zero. Bits [3:0] determine which elements of the result\n"
46457"/// will receive a copy of the final dot product, with bit [0] corresponding\n"
46458"/// to the lowest element and bit [3] corresponding to the highest element of\n"
46459"/// each [4 x float] subvector. If a bit is set, the dot product is returned\n"
46460"/// in the corresponding element; otherwise that element is set to zero.\n"
46461"/// \\returns A 128-bit vector of [4 x float] containing the dot product.\n"
46462"#define _mm_dp_ps(X, Y, M) \\\n"
46463" (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \\\n"
46464" (__v4sf)(__m128)(Y), (M))\n"
46465"\n"
46466"/// Computes the dot product of the two 128-bit vectors of [2 x double]\n"
46467"/// and returns it in the elements of the 128-bit result vector of\n"
46468"/// [2 x double].\n"
46469"///\n"
46470"/// The immediate integer operand controls which input\n"
46471"/// elements will contribute to the dot product, and where the final results\n"
46472"/// are returned.\n"
46473"///\n"
46474"/// \\headerfile <x86intrin.h>\n"
46475"///\n"
46476"/// \\code\n"
46477"/// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M);\n"
46478"/// \\endcode\n"
46479"///\n"
46480"/// This intrinsic corresponds to the <c> VDPPD / DPPD </c> instruction.\n"
46481"///\n"
46482"/// \\param X\n"
46483"/// A 128-bit vector of [2 x double].\n"
46484"/// \\param Y\n"
46485"/// A 128-bit vector of [2 x double].\n"
46486"/// \\param M\n"
46487"/// An immediate integer operand. Mask bits [5:4] determine which elements\n"
46488"/// of the input vectors are used, with bit [4] corresponding to the lowest\n"
46489"/// element and bit [5] corresponding to the highest element of each of [2 x\n"
46490"/// double] vector. If a bit is set, the corresponding elements from the two\n"
46491"/// input vectors are used as an input for dot product; otherwise that input\n"
46492"/// is treated as zero. Bits [1:0] determine which elements of the result\n"
46493"/// will receive a copy of the final dot product, with bit [0] corresponding\n"
46494"/// to the lowest element and bit [1] corresponding to the highest element of\n"
46495"/// each [2 x double] vector. If a bit is set, the dot product is returned in\n"
46496"/// the corresponding element; otherwise that element is set to zero.\n"
46497"#define _mm_dp_pd(X, Y, M) \\\n"
46498" (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \\\n"
46499" (__v2df)(__m128d)(Y), (M))\n"
46500"\n"
46501"/* SSE4 Streaming Load Hint Instruction. */\n"
46502"/// Loads integer values from a 128-bit aligned memory location to a\n"
46503"/// 128-bit integer vector.\n"
46504"///\n"
46505"/// \\headerfile <x86intrin.h>\n"
46506"///\n"
46507"/// This intrinsic corresponds to the <c> VMOVNTDQA / MOVNTDQA </c> instruction.\n"
46508"///\n"
46509"/// \\param __V\n"
46510"/// A pointer to a 128-bit aligned memory location that contains the integer\n"
46511"/// values.\n"
46512"/// \\returns A 128-bit integer vector containing the data stored at the\n"
46513"/// specified memory location.\n"
46514"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46515"_mm_stream_load_si128 (__m128i const *__V)\n"
46516"{\n"
46517" return (__m128i) __builtin_nontemporal_load ((const __v2di *) __V);\n"
46518"}\n"
46519"\n"
46520"/* SSE4 Packed Integer Min/Max Instructions. */\n"
46521"/// Compares the corresponding elements of two 128-bit vectors of\n"
46522"/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser\n"
46523"/// of the two values.\n"
46524"///\n"
46525"/// \\headerfile <x86intrin.h>\n"
46526"///\n"
46527"/// This intrinsic corresponds to the <c> VPMINSB / PMINSB </c> instruction.\n"
46528"///\n"
46529"/// \\param __V1\n"
46530"/// A 128-bit vector of [16 x i8].\n"
46531"/// \\param __V2\n"
46532"/// A 128-bit vector of [16 x i8]\n"
46533"/// \\returns A 128-bit vector of [16 x i8] containing the lesser values.\n"
46534"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46535"_mm_min_epi8 (__m128i __V1, __m128i __V2)\n"
46536"{\n"
46537" return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);\n"
46538"}\n"
46539"\n"
46540"/// Compares the corresponding elements of two 128-bit vectors of\n"
46541"/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the\n"
46542"/// greater value of the two.\n"
46543"///\n"
46544"/// \\headerfile <x86intrin.h>\n"
46545"///\n"
46546"/// This intrinsic corresponds to the <c> VPMAXSB / PMAXSB </c> instruction.\n"
46547"///\n"
46548"/// \\param __V1\n"
46549"/// A 128-bit vector of [16 x i8].\n"
46550"/// \\param __V2\n"
46551"/// A 128-bit vector of [16 x i8].\n"
46552"/// \\returns A 128-bit vector of [16 x i8] containing the greater values.\n"
46553"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46554"_mm_max_epi8 (__m128i __V1, __m128i __V2)\n"
46555"{\n"
46556" return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);\n"
46557"}\n"
46558"\n"
46559"/// Compares the corresponding elements of two 128-bit vectors of\n"
46560"/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser\n"
46561"/// value of the two.\n"
46562"///\n"
46563"/// \\headerfile <x86intrin.h>\n"
46564"///\n"
46565"/// This intrinsic corresponds to the <c> VPMINUW / PMINUW </c> instruction.\n"
46566"///\n"
46567"/// \\param __V1\n"
46568"/// A 128-bit vector of [8 x u16].\n"
46569"/// \\param __V2\n"
46570"/// A 128-bit vector of [8 x u16].\n"
46571"/// \\returns A 128-bit vector of [8 x u16] containing the lesser values.\n"
46572"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46573"_mm_min_epu16 (__m128i __V1, __m128i __V2)\n"
46574"{\n"
46575" return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);\n"
46576"}\n"
46577"\n"
46578"/// Compares the corresponding elements of two 128-bit vectors of\n"
46579"/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the\n"
46580"/// greater value of the two.\n"
46581"///\n"
46582"/// \\headerfile <x86intrin.h>\n"
46583"///\n"
46584"/// This intrinsic corresponds to the <c> VPMAXUW / PMAXUW </c> instruction.\n"
46585"///\n"
46586"/// \\param __V1\n"
46587"/// A 128-bit vector of [8 x u16].\n"
46588"/// \\param __V2\n"
46589"/// A 128-bit vector of [8 x u16].\n"
46590"/// \\returns A 128-bit vector of [8 x u16] containing the greater values.\n"
46591"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46592"_mm_max_epu16 (__m128i __V1, __m128i __V2)\n"
46593"{\n"
46594" return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);\n"
46595"}\n"
46596"\n"
46597"/// Compares the corresponding elements of two 128-bit vectors of\n"
46598"/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser\n"
46599"/// value of the two.\n"
46600"///\n"
46601"/// \\headerfile <x86intrin.h>\n"
46602"///\n"
46603"/// This intrinsic corresponds to the <c> VPMINSD / PMINSD </c> instruction.\n"
46604"///\n"
46605"/// \\param __V1\n"
46606"/// A 128-bit vector of [4 x i32].\n"
46607"/// \\param __V2\n"
46608"/// A 128-bit vector of [4 x i32].\n"
46609"/// \\returns A 128-bit vector of [4 x i32] containing the lesser values.\n"
46610"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46611"_mm_min_epi32 (__m128i __V1, __m128i __V2)\n"
46612"{\n"
46613" return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);\n"
46614"}\n"
46615"\n"
46616"/// Compares the corresponding elements of two 128-bit vectors of\n"
46617"/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the\n"
46618"/// greater value of the two.\n"
46619"///\n"
46620"/// \\headerfile <x86intrin.h>\n"
46621"///\n"
46622"/// This intrinsic corresponds to the <c> VPMAXSD / PMAXSD </c> instruction.\n"
46623"///\n"
46624"/// \\param __V1\n"
46625"/// A 128-bit vector of [4 x i32].\n"
46626"/// \\param __V2\n"
46627"/// A 128-bit vector of [4 x i32].\n"
46628"/// \\returns A 128-bit vector of [4 x i32] containing the greater values.\n"
46629"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46630"_mm_max_epi32 (__m128i __V1, __m128i __V2)\n"
46631"{\n"
46632" return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);\n"
46633"}\n"
46634"\n"
46635"/// Compares the corresponding elements of two 128-bit vectors of\n"
46636"/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser\n"
46637"/// value of the two.\n"
46638"///\n"
46639"/// \\headerfile <x86intrin.h>\n"
46640"///\n"
46641"/// This intrinsic corresponds to the <c> VPMINUD / PMINUD </c> instruction.\n"
46642"///\n"
46643"/// \\param __V1\n"
46644"/// A 128-bit vector of [4 x u32].\n"
46645"/// \\param __V2\n"
46646"/// A 128-bit vector of [4 x u32].\n"
46647"/// \\returns A 128-bit vector of [4 x u32] containing the lesser values.\n"
46648"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46649"_mm_min_epu32 (__m128i __V1, __m128i __V2)\n"
46650"{\n"
46651" return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);\n"
46652"}\n"
46653"\n"
46654"/// Compares the corresponding elements of two 128-bit vectors of\n"
46655"/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the\n"
46656"/// greater value of the two.\n"
46657"///\n"
46658"/// \\headerfile <x86intrin.h>\n"
46659"///\n"
46660"/// This intrinsic corresponds to the <c> VPMAXUD / PMAXUD </c> instruction.\n"
46661"///\n"
46662"/// \\param __V1\n"
46663"/// A 128-bit vector of [4 x u32].\n"
46664"/// \\param __V2\n"
46665"/// A 128-bit vector of [4 x u32].\n"
46666"/// \\returns A 128-bit vector of [4 x u32] containing the greater values.\n"
46667"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46668"_mm_max_epu32 (__m128i __V1, __m128i __V2)\n"
46669"{\n"
46670" return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);\n"
46671"}\n"
46672"\n"
46673"/* SSE4 Insertion and Extraction from XMM Register Instructions. */\n"
46674"/// Takes the first argument \\a X and inserts an element from the second\n"
46675"/// argument \\a Y as selected by the third argument \\a N. That result then\n"
46676"/// has elements zeroed out also as selected by the third argument \\a N. The\n"
46677"/// resulting 128-bit vector of [4 x float] is then returned.\n"
46678"///\n"
46679"/// \\headerfile <x86intrin.h>\n"
46680"///\n"
46681"/// \\code\n"
46682"/// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N);\n"
46683"/// \\endcode\n"
46684"///\n"
46685"/// This intrinsic corresponds to the <c> VINSERTPS </c> instruction.\n"
46686"///\n"
46687"/// \\param X\n"
46688"/// A 128-bit vector source operand of [4 x float]. With the exception of\n"
46689"/// those bits in the result copied from parameter \\a Y and zeroed by bits\n"
46690"/// [3:0] of \\a N, all bits from this parameter are copied to the result.\n"
46691"/// \\param Y\n"
46692"/// A 128-bit vector source operand of [4 x float]. One single-precision\n"
46693"/// floating-point element from this source, as determined by the immediate\n"
46694"/// parameter, is copied to the result.\n"
46695"/// \\param N\n"
46696"/// Specifies which bits from operand \\a Y will be copied, which bits in the\n"
46697"/// result they will be be copied to, and which bits in the result will be\n"
46698"/// cleared. The following assignments are made: \\n\n"
46699"/// Bits [7:6] specify the bits to copy from operand \\a Y: \\n\n"
46700"/// 00: Selects bits [31:0] from operand \\a Y. \\n\n"
46701"/// 01: Selects bits [63:32] from operand \\a Y. \\n\n"
46702"/// 10: Selects bits [95:64] from operand \\a Y. \\n\n"
46703"/// 11: Selects bits [127:96] from operand \\a Y. \\n\n"
46704"/// Bits [5:4] specify the bits in the result to which the selected bits\n"
46705"/// from operand \\a Y are copied: \\n\n"
46706"/// 00: Copies the selected bits from \\a Y to result bits [31:0]. \\n\n"
46707"/// 01: Copies the selected bits from \\a Y to result bits [63:32]. \\n\n"
46708"/// 10: Copies the selected bits from \\a Y to result bits [95:64]. \\n\n"
46709"/// 11: Copies the selected bits from \\a Y to result bits [127:96]. \\n\n"
46710"/// Bits[3:0]: If any of these bits are set, the corresponding result\n"
46711"/// element is cleared.\n"
46712"/// \\returns A 128-bit vector of [4 x float] containing the copied\n"
46713"/// single-precision floating point elements from the operands.\n"
46714"#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))\n"
46715"\n"
46716"/// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and\n"
46717"/// returns it, using the immediate value parameter \\a N as a selector.\n"
46718"///\n"
46719"/// \\headerfile <x86intrin.h>\n"
46720"///\n"
46721"/// \\code\n"
46722"/// int _mm_extract_ps(__m128 X, const int N);\n"
46723"/// \\endcode\n"
46724"///\n"
46725"/// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c>\n"
46726"/// instruction.\n"
46727"///\n"
46728"/// \\param X\n"
46729"/// A 128-bit vector of [4 x float].\n"
46730"/// \\param N\n"
46731"/// An immediate value. Bits [1:0] determines which bits from the argument\n"
46732"/// \\a X are extracted and returned: \\n\n"
46733"/// 00: Bits [31:0] of parameter \\a X are returned. \\n\n"
46734"/// 01: Bits [63:32] of parameter \\a X are returned. \\n\n"
46735"/// 10: Bits [95:64] of parameter \\a X are returned. \\n\n"
46736"/// 11: Bits [127:96] of parameter \\a X are returned.\n"
46737"/// \\returns A 32-bit integer containing the extracted 32 bits of float data.\n"
46738"#define _mm_extract_ps(X, N) (__extension__ \\\n"
46739" ({ union { int __i; float __f; } __t; \\\n"
46740" __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \\\n"
46741" __t.__i;}))\n"
46742"\n"
46743"/* Miscellaneous insert and extract macros. */\n"
46744"/* Extract a single-precision float from X at index N into D. */\n"
46745"#define _MM_EXTRACT_FLOAT(D, X, N) \\\n"
46746" { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }\n"
46747"\n"
46748"/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create\n"
46749" an index suitable for _mm_insert_ps. */\n"
46750"#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))\n"
46751"\n"
46752"/* Extract a float from X at index N into the first index of the return. */\n"
46753"#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \\\n"
46754" _MM_MK_INSERTPS_NDX((N), 0, 0x0e))\n"
46755"\n"
46756"/* Insert int into packed integer array at index. */\n"
46757"/// Constructs a 128-bit vector of [16 x i8] by first making a copy of\n"
46758"/// the 128-bit integer vector parameter, and then inserting the lower 8 bits\n"
46759"/// of an integer parameter \\a I into an offset specified by the immediate\n"
46760"/// value parameter \\a N.\n"
46761"///\n"
46762"/// \\headerfile <x86intrin.h>\n"
46763"///\n"
46764"/// \\code\n"
46765"/// __m128i _mm_insert_epi8(__m128i X, int I, const int N);\n"
46766"/// \\endcode\n"
46767"///\n"
46768"/// This intrinsic corresponds to the <c> VPINSRB / PINSRB </c> instruction.\n"
46769"///\n"
46770"/// \\param X\n"
46771"/// A 128-bit integer vector of [16 x i8]. This vector is copied to the\n"
46772"/// result and then one of the sixteen elements in the result vector is\n"
46773"/// replaced by the lower 8 bits of \\a I.\n"
46774"/// \\param I\n"
46775"/// An integer. The lower 8 bits of this operand are written to the result\n"
46776"/// beginning at the offset specified by \\a N.\n"
46777"/// \\param N\n"
46778"/// An immediate value. Bits [3:0] specify the bit offset in the result at\n"
46779"/// which the lower 8 bits of \\a I are written. \\n\n"
46780"/// 0000: Bits [7:0] of the result are used for insertion. \\n\n"
46781"/// 0001: Bits [15:8] of the result are used for insertion. \\n\n"
46782"/// 0010: Bits [23:16] of the result are used for insertion. \\n\n"
46783"/// 0011: Bits [31:24] of the result are used for insertion. \\n\n"
46784"/// 0100: Bits [39:32] of the result are used for insertion. \\n\n"
46785"/// 0101: Bits [47:40] of the result are used for insertion. \\n\n"
46786"/// 0110: Bits [55:48] of the result are used for insertion. \\n\n"
46787"/// 0111: Bits [63:56] of the result are used for insertion. \\n\n"
46788"/// 1000: Bits [71:64] of the result are used for insertion. \\n\n"
46789"/// 1001: Bits [79:72] of the result are used for insertion. \\n\n"
46790"/// 1010: Bits [87:80] of the result are used for insertion. \\n\n"
46791"/// 1011: Bits [95:88] of the result are used for insertion. \\n\n"
46792"/// 1100: Bits [103:96] of the result are used for insertion. \\n\n"
46793"/// 1101: Bits [111:104] of the result are used for insertion. \\n\n"
46794"/// 1110: Bits [119:112] of the result are used for insertion. \\n\n"
46795"/// 1111: Bits [127:120] of the result are used for insertion.\n"
46796"/// \\returns A 128-bit integer vector containing the constructed values.\n"
46797"#define _mm_insert_epi8(X, I, N) \\\n"
46798" (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \\\n"
46799" (int)(I), (int)(N))\n"
46800"\n"
46801"/// Constructs a 128-bit vector of [4 x i32] by first making a copy of\n"
46802"/// the 128-bit integer vector parameter, and then inserting the 32-bit\n"
46803"/// integer parameter \\a I at the offset specified by the immediate value\n"
46804"/// parameter \\a N.\n"
46805"///\n"
46806"/// \\headerfile <x86intrin.h>\n"
46807"///\n"
46808"/// \\code\n"
46809"/// __m128i _mm_insert_epi32(__m128i X, int I, const int N);\n"
46810"/// \\endcode\n"
46811"///\n"
46812"/// This intrinsic corresponds to the <c> VPINSRD / PINSRD </c> instruction.\n"
46813"///\n"
46814"/// \\param X\n"
46815"/// A 128-bit integer vector of [4 x i32]. This vector is copied to the\n"
46816"/// result and then one of the four elements in the result vector is\n"
46817"/// replaced by \\a I.\n"
46818"/// \\param I\n"
46819"/// A 32-bit integer that is written to the result beginning at the offset\n"
46820"/// specified by \\a N.\n"
46821"/// \\param N\n"
46822"/// An immediate value. Bits [1:0] specify the bit offset in the result at\n"
46823"/// which the integer \\a I is written. \\n\n"
46824"/// 00: Bits [31:0] of the result are used for insertion. \\n\n"
46825"/// 01: Bits [63:32] of the result are used for insertion. \\n\n"
46826"/// 10: Bits [95:64] of the result are used for insertion. \\n\n"
46827"/// 11: Bits [127:96] of the result are used for insertion.\n"
46828"/// \\returns A 128-bit integer vector containing the constructed values.\n"
46829"#define _mm_insert_epi32(X, I, N) \\\n"
46830" (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \\\n"
46831" (int)(I), (int)(N))\n"
46832"\n"
46833"#ifdef __x86_64__\n"
46834"/// Constructs a 128-bit vector of [2 x i64] by first making a copy of\n"
46835"/// the 128-bit integer vector parameter, and then inserting the 64-bit\n"
46836"/// integer parameter \\a I, using the immediate value parameter \\a N as an\n"
46837"/// insertion location selector.\n"
46838"///\n"
46839"/// \\headerfile <x86intrin.h>\n"
46840"///\n"
46841"/// \\code\n"
46842"/// __m128i _mm_insert_epi64(__m128i X, long long I, const int N);\n"
46843"/// \\endcode\n"
46844"///\n"
46845"/// This intrinsic corresponds to the <c> VPINSRQ / PINSRQ </c> instruction.\n"
46846"///\n"
46847"/// \\param X\n"
46848"/// A 128-bit integer vector of [2 x i64]. This vector is copied to the\n"
46849"/// result and then one of the two elements in the result vector is replaced\n"
46850"/// by \\a I.\n"
46851"/// \\param I\n"
46852"/// A 64-bit integer that is written to the result beginning at the offset\n"
46853"/// specified by \\a N.\n"
46854"/// \\param N\n"
46855"/// An immediate value. Bit [0] specifies the bit offset in the result at\n"
46856"/// which the integer \\a I is written. \\n\n"
46857"/// 0: Bits [63:0] of the result are used for insertion. \\n\n"
46858"/// 1: Bits [127:64] of the result are used for insertion. \\n\n"
46859"/// \\returns A 128-bit integer vector containing the constructed values.\n"
46860"#define _mm_insert_epi64(X, I, N) \\\n"
46861" (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \\\n"
46862" (long long)(I), (int)(N))\n"
46863"#endif /* __x86_64__ */\n"
46864"\n"
46865"/* Extract int from packed integer array at index. This returns the element\n"
46866" * as a zero extended value, so it is unsigned.\n"
46867" */\n"
46868"/// Extracts an 8-bit element from the 128-bit integer vector of\n"
46869"/// [16 x i8], using the immediate value parameter \\a N as a selector.\n"
46870"///\n"
46871"/// \\headerfile <x86intrin.h>\n"
46872"///\n"
46873"/// \\code\n"
46874"/// int _mm_extract_epi8(__m128i X, const int N);\n"
46875"/// \\endcode\n"
46876"///\n"
46877"/// This intrinsic corresponds to the <c> VPEXTRB / PEXTRB </c> instruction.\n"
46878"///\n"
46879"/// \\param X\n"
46880"/// A 128-bit integer vector.\n"
46881"/// \\param N\n"
46882"/// An immediate value. Bits [3:0] specify which 8-bit vector element from\n"
46883"/// the argument \\a X to extract and copy to the result. \\n\n"
46884"/// 0000: Bits [7:0] of parameter \\a X are extracted. \\n\n"
46885"/// 0001: Bits [15:8] of the parameter \\a X are extracted. \\n\n"
46886"/// 0010: Bits [23:16] of the parameter \\a X are extracted. \\n\n"
46887"/// 0011: Bits [31:24] of the parameter \\a X are extracted. \\n\n"
46888"/// 0100: Bits [39:32] of the parameter \\a X are extracted. \\n\n"
46889"/// 0101: Bits [47:40] of the parameter \\a X are extracted. \\n\n"
46890"/// 0110: Bits [55:48] of the parameter \\a X are extracted. \\n\n"
46891"/// 0111: Bits [63:56] of the parameter \\a X are extracted. \\n\n"
46892"/// 1000: Bits [71:64] of the parameter \\a X are extracted. \\n\n"
46893"/// 1001: Bits [79:72] of the parameter \\a X are extracted. \\n\n"
46894"/// 1010: Bits [87:80] of the parameter \\a X are extracted. \\n\n"
46895"/// 1011: Bits [95:88] of the parameter \\a X are extracted. \\n\n"
46896"/// 1100: Bits [103:96] of the parameter \\a X are extracted. \\n\n"
46897"/// 1101: Bits [111:104] of the parameter \\a X are extracted. \\n\n"
46898"/// 1110: Bits [119:112] of the parameter \\a X are extracted. \\n\n"
46899"/// 1111: Bits [127:120] of the parameter \\a X are extracted.\n"
46900"/// \\returns An unsigned integer, whose lower 8 bits are selected from the\n"
46901"/// 128-bit integer vector parameter and the remaining bits are assigned\n"
46902"/// zeros.\n"
46903"#define _mm_extract_epi8(X, N) \\\n"
46904" (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \\\n"
46905" (int)(N))\n"
46906"\n"
46907"/// Extracts a 32-bit element from the 128-bit integer vector of\n"
46908"/// [4 x i32], using the immediate value parameter \\a N as a selector.\n"
46909"///\n"
46910"/// \\headerfile <x86intrin.h>\n"
46911"///\n"
46912"/// \\code\n"
46913"/// int _mm_extract_epi32(__m128i X, const int N);\n"
46914"/// \\endcode\n"
46915"///\n"
46916"/// This intrinsic corresponds to the <c> VPEXTRD / PEXTRD </c> instruction.\n"
46917"///\n"
46918"/// \\param X\n"
46919"/// A 128-bit integer vector.\n"
46920"/// \\param N\n"
46921"/// An immediate value. Bits [1:0] specify which 32-bit vector element from\n"
46922"/// the argument \\a X to extract and copy to the result. \\n\n"
46923"/// 00: Bits [31:0] of the parameter \\a X are extracted. \\n\n"
46924"/// 01: Bits [63:32] of the parameter \\a X are extracted. \\n\n"
46925"/// 10: Bits [95:64] of the parameter \\a X are extracted. \\n\n"
46926"/// 11: Bits [127:96] of the parameter \\a X are exracted.\n"
46927"/// \\returns An integer, whose lower 32 bits are selected from the 128-bit\n"
46928"/// integer vector parameter and the remaining bits are assigned zeros.\n"
46929"#define _mm_extract_epi32(X, N) \\\n"
46930" (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))\n"
46931"\n"
46932"#ifdef __x86_64__\n"
46933"/// Extracts a 64-bit element from the 128-bit integer vector of\n"
46934"/// [2 x i64], using the immediate value parameter \\a N as a selector.\n"
46935"///\n"
46936"/// \\headerfile <x86intrin.h>\n"
46937"///\n"
46938"/// \\code\n"
46939"/// long long _mm_extract_epi64(__m128i X, const int N);\n"
46940"/// \\endcode\n"
46941"///\n"
46942"/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n"
46943"///\n"
46944"/// \\param X\n"
46945"/// A 128-bit integer vector.\n"
46946"/// \\param N\n"
46947"/// An immediate value. Bit [0] specifies which 64-bit vector element from\n"
46948"/// the argument \\a X to return. \\n\n"
46949"/// 0: Bits [63:0] are returned. \\n\n"
46950"/// 1: Bits [127:64] are returned. \\n\n"
46951"/// \\returns A 64-bit integer.\n"
46952"#define _mm_extract_epi64(X, N) \\\n"
46953" (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))\n"
46954"#endif /* __x86_64 */\n"
46955"\n"
46956"/* SSE4 128-bit Packed Integer Comparisons. */\n"
46957"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
46958"/// zeros.\n"
46959"///\n"
46960"/// \\headerfile <x86intrin.h>\n"
46961"///\n"
46962"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
46963"///\n"
46964"/// \\param __M\n"
46965"/// A 128-bit integer vector containing the bits to be tested.\n"
46966"/// \\param __V\n"
46967"/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n"
46968"/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n"
46969"static __inline__ int __DEFAULT_FN_ATTRS\n"
46970"_mm_testz_si128(__m128i __M, __m128i __V)\n"
46971"{\n"
46972" return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);\n"
46973"}\n"
46974"\n"
46975"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
46976"/// ones.\n"
46977"///\n"
46978"/// \\headerfile <x86intrin.h>\n"
46979"///\n"
46980"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
46981"///\n"
46982"/// \\param __M\n"
46983"/// A 128-bit integer vector containing the bits to be tested.\n"
46984"/// \\param __V\n"
46985"/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n"
46986"/// \\returns TRUE if the specified bits are all ones; FALSE otherwise.\n"
46987"static __inline__ int __DEFAULT_FN_ATTRS\n"
46988"_mm_testc_si128(__m128i __M, __m128i __V)\n"
46989"{\n"
46990" return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);\n"
46991"}\n"
46992"\n"
46993"/// Tests whether the specified bits in a 128-bit integer vector are\n"
46994"/// neither all zeros nor all ones.\n"
46995"///\n"
46996"/// \\headerfile <x86intrin.h>\n"
46997"///\n"
46998"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
46999"///\n"
47000"/// \\param __M\n"
47001"/// A 128-bit integer vector containing the bits to be tested.\n"
47002"/// \\param __V\n"
47003"/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n"
47004"/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n"
47005"/// FALSE otherwise.\n"
47006"static __inline__ int __DEFAULT_FN_ATTRS\n"
47007"_mm_testnzc_si128(__m128i __M, __m128i __V)\n"
47008"{\n"
47009" return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);\n"
47010"}\n"
47011"\n"
47012"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
47013"/// ones.\n"
47014"///\n"
47015"/// \\headerfile <x86intrin.h>\n"
47016"///\n"
47017"/// \\code\n"
47018"/// int _mm_test_all_ones(__m128i V);\n"
47019"/// \\endcode\n"
47020"///\n"
47021"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
47022"///\n"
47023"/// \\param V\n"
47024"/// A 128-bit integer vector containing the bits to be tested.\n"
47025"/// \\returns TRUE if the bits specified in the operand are all set to 1; FALSE\n"
47026"/// otherwise.\n"
47027"#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))\n"
47028"\n"
47029"/// Tests whether the specified bits in a 128-bit integer vector are\n"
47030"/// neither all zeros nor all ones.\n"
47031"///\n"
47032"/// \\headerfile <x86intrin.h>\n"
47033"///\n"
47034"/// \\code\n"
47035"/// int _mm_test_mix_ones_zeros(__m128i M, __m128i V);\n"
47036"/// \\endcode\n"
47037"///\n"
47038"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
47039"///\n"
47040"/// \\param M\n"
47041"/// A 128-bit integer vector containing the bits to be tested.\n"
47042"/// \\param V\n"
47043"/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n"
47044"/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n"
47045"/// FALSE otherwise.\n"
47046"#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))\n"
47047"\n"
47048"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
47049"/// zeros.\n"
47050"///\n"
47051"/// \\headerfile <x86intrin.h>\n"
47052"///\n"
47053"/// \\code\n"
47054"/// int _mm_test_all_zeros(__m128i M, __m128i V);\n"
47055"/// \\endcode\n"
47056"///\n"
47057"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
47058"///\n"
47059"/// \\param M\n"
47060"/// A 128-bit integer vector containing the bits to be tested.\n"
47061"/// \\param V\n"
47062"/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n"
47063"/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n"
47064"#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))\n"
47065"\n"
47066"/* SSE4 64-bit Packed Integer Comparisons. */\n"
47067"/// Compares each of the corresponding 64-bit values of the 128-bit\n"
47068"/// integer vectors for equality.\n"
47069"///\n"
47070"/// \\headerfile <x86intrin.h>\n"
47071"///\n"
47072"/// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.\n"
47073"///\n"
47074"/// \\param __V1\n"
47075"/// A 128-bit integer vector.\n"
47076"/// \\param __V2\n"
47077"/// A 128-bit integer vector.\n"
47078"/// \\returns A 128-bit integer vector containing the comparison results.\n"
47079"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47080"_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)\n"
47081"{\n"
47082" return (__m128i)((__v2di)__V1 == (__v2di)__V2);\n"
47083"}\n"
47084"\n"
47085"/* SSE4 Packed Integer Sign-Extension. */\n"
47086"/// Sign-extends each of the lower eight 8-bit integer elements of a\n"
47087"/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n"
47088"/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n"
47089"/// are unused.\n"
47090"///\n"
47091"/// \\headerfile <x86intrin.h>\n"
47092"///\n"
47093"/// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.\n"
47094"///\n"
47095"/// \\param __V\n"
47096"/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-\n"
47097"/// extended to 16-bit values.\n"
47098"/// \\returns A 128-bit vector of [8 x i16] containing the sign-extended values.\n"
47099"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47100"_mm_cvtepi8_epi16(__m128i __V)\n"
47101"{\n"
47102" /* This function always performs a signed extension, but __v16qi is a char\n"
47103" which may be signed or unsigned, so use __v16qs. */\n"
47104" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n"
47105"}\n"
47106"\n"
47107"/// Sign-extends each of the lower four 8-bit integer elements of a\n"
47108"/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n"
47109"/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n"
47110"/// vector are unused.\n"
47111"///\n"
47112"/// \\headerfile <x86intrin.h>\n"
47113"///\n"
47114"/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.\n"
47115"///\n"
47116"/// \\param __V\n"
47117"/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n"
47118"/// sign-extended to 32-bit values.\n"
47119"/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n"
47120"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47121"_mm_cvtepi8_epi32(__m128i __V)\n"
47122"{\n"
47123" /* This function always performs a signed extension, but __v16qi is a char\n"
47124" which may be signed or unsigned, so use __v16qs. */\n"
47125" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);\n"
47126"}\n"
47127"\n"
47128"/// Sign-extends each of the lower two 8-bit integer elements of a\n"
47129"/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n"
47130"/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n"
47131"/// vector are unused.\n"
47132"///\n"
47133"/// \\headerfile <x86intrin.h>\n"
47134"///\n"
47135"/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.\n"
47136"///\n"
47137"/// \\param __V\n"
47138"/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n"
47139"/// sign-extended to 64-bit values.\n"
47140"/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n"
47141"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47142"_mm_cvtepi8_epi64(__m128i __V)\n"
47143"{\n"
47144" /* This function always performs a signed extension, but __v16qi is a char\n"
47145" which may be signed or unsigned, so use __v16qs. */\n"
47146" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);\n"
47147"}\n"
47148"\n"
47149"/// Sign-extends each of the lower four 16-bit integer elements of a\n"
47150"/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n"
47151"/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n"
47152"/// vector are unused.\n"
47153"///\n"
47154"/// \\headerfile <x86intrin.h>\n"
47155"///\n"
47156"/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.\n"
47157"///\n"
47158"/// \\param __V\n"
47159"/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n"
47160"/// sign-extended to 32-bit values.\n"
47161"/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n"
47162"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47163"_mm_cvtepi16_epi32(__m128i __V)\n"
47164"{\n"
47165" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);\n"
47166"}\n"
47167"\n"
47168"/// Sign-extends each of the lower two 16-bit integer elements of a\n"
47169"/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n"
47170"/// a 128-bit vector of [2 x i64]. The upper six elements of the input\n"
47171"/// vector are unused.\n"
47172"///\n"
47173"/// \\headerfile <x86intrin.h>\n"
47174"///\n"
47175"/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.\n"
47176"///\n"
47177"/// \\param __V\n"
47178"/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n"
47179"/// sign-extended to 64-bit values.\n"
47180"/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n"
47181"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47182"_mm_cvtepi16_epi64(__m128i __V)\n"
47183"{\n"
47184" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);\n"
47185"}\n"
47186"\n"
47187"/// Sign-extends each of the lower two 32-bit integer elements of a\n"
47188"/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n"
47189"/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n"
47190"/// are unused.\n"
47191"///\n"
47192"/// \\headerfile <x86intrin.h>\n"
47193"///\n"
47194"/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.\n"
47195"///\n"
47196"/// \\param __V\n"
47197"/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n"
47198"/// sign-extended to 64-bit values.\n"
47199"/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n"
47200"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47201"_mm_cvtepi32_epi64(__m128i __V)\n"
47202"{\n"
47203" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);\n"
47204"}\n"
47205"\n"
47206"/* SSE4 Packed Integer Zero-Extension. */\n"
47207"/// Zero-extends each of the lower eight 8-bit integer elements of a\n"
47208"/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n"
47209"/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n"
47210"/// are unused.\n"
47211"///\n"
47212"/// \\headerfile <x86intrin.h>\n"
47213"///\n"
47214"/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.\n"
47215"///\n"
47216"/// \\param __V\n"
47217"/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are\n"
47218"/// zero-extended to 16-bit values.\n"
47219"/// \\returns A 128-bit vector of [8 x i16] containing the zero-extended values.\n"
47220"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47221"_mm_cvtepu8_epi16(__m128i __V)\n"
47222"{\n"
47223" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n"
47224"}\n"
47225"\n"
47226"/// Zero-extends each of the lower four 8-bit integer elements of a\n"
47227"/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n"
47228"/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n"
47229"/// vector are unused.\n"
47230"///\n"
47231"/// \\headerfile <x86intrin.h>\n"
47232"///\n"
47233"/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.\n"
47234"///\n"
47235"/// \\param __V\n"
47236"/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n"
47237"/// zero-extended to 32-bit values.\n"
47238"/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n"
47239"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47240"_mm_cvtepu8_epi32(__m128i __V)\n"
47241"{\n"
47242" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);\n"
47243"}\n"
47244"\n"
47245"/// Zero-extends each of the lower two 8-bit integer elements of a\n"
47246"/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n"
47247"/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n"
47248"/// vector are unused.\n"
47249"///\n"
47250"/// \\headerfile <x86intrin.h>\n"
47251"///\n"
47252"/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.\n"
47253"///\n"
47254"/// \\param __V\n"
47255"/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n"
47256"/// zero-extended to 64-bit values.\n"
47257"/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n"
47258"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47259"_mm_cvtepu8_epi64(__m128i __V)\n"
47260"{\n"
47261" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);\n"
47262"}\n"
47263"\n"
47264"/// Zero-extends each of the lower four 16-bit integer elements of a\n"
47265"/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n"
47266"/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n"
47267"/// vector are unused.\n"
47268"///\n"
47269"/// \\headerfile <x86intrin.h>\n"
47270"///\n"
47271"/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.\n"
47272"///\n"
47273"/// \\param __V\n"
47274"/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n"
47275"/// zero-extended to 32-bit values.\n"
47276"/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n"
47277"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47278"_mm_cvtepu16_epi32(__m128i __V)\n"
47279"{\n"
47280" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);\n"
47281"}\n"
47282"\n"
47283"/// Zero-extends each of the lower two 16-bit integer elements of a\n"
47284"/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n"
47285"/// a 128-bit vector of [2 x i64]. The upper six elements of the input vector\n"
47286"/// are unused.\n"
47287"///\n"
47288"/// \\headerfile <x86intrin.h>\n"
47289"///\n"
47290"/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.\n"
47291"///\n"
47292"/// \\param __V\n"
47293"/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n"
47294"/// zero-extended to 64-bit values.\n"
47295"/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n"
47296"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47297"_mm_cvtepu16_epi64(__m128i __V)\n"
47298"{\n"
47299" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);\n"
47300"}\n"
47301"\n"
47302"/// Zero-extends each of the lower two 32-bit integer elements of a\n"
47303"/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n"
47304"/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n"
47305"/// are unused.\n"
47306"///\n"
47307"/// \\headerfile <x86intrin.h>\n"
47308"///\n"
47309"/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.\n"
47310"///\n"
47311"/// \\param __V\n"
47312"/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n"
47313"/// zero-extended to 64-bit values.\n"
47314"/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n"
47315"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47316"_mm_cvtepu32_epi64(__m128i __V)\n"
47317"{\n"
47318" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);\n"
47319"}\n"
47320"\n"
47321"/* SSE4 Pack with Unsigned Saturation. */\n"
47322"/// Converts 32-bit signed integers from both 128-bit integer vector\n"
47323"/// operands into 16-bit unsigned integers, and returns the packed result.\n"
47324"/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than\n"
47325"/// 0x0000 are saturated to 0x0000.\n"
47326"///\n"
47327"/// \\headerfile <x86intrin.h>\n"
47328"///\n"
47329"/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.\n"
47330"///\n"
47331"/// \\param __V1\n"
47332"/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n"
47333"/// signed integer and is converted to a 16-bit unsigned integer with\n"
47334"/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n"
47335"/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n"
47336"/// are written to the lower 64 bits of the result.\n"
47337"/// \\param __V2\n"
47338"/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n"
47339"/// signed integer and is converted to a 16-bit unsigned integer with\n"
47340"/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n"
47341"/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n"
47342"/// are written to the higher 64 bits of the result.\n"
47343"/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n"
47344"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47345"_mm_packus_epi32(__m128i __V1, __m128i __V2)\n"
47346"{\n"
47347" return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);\n"
47348"}\n"
47349"\n"
47350"/* SSE4 Multiple Packed Sums of Absolute Difference. */\n"
47351"/// Subtracts 8-bit unsigned integer values and computes the absolute\n"
47352"/// values of the differences to the corresponding bits in the destination.\n"
47353"/// Then sums of the absolute differences are returned according to the bit\n"
47354"/// fields in the immediate operand.\n"
47355"///\n"
47356"/// \\headerfile <x86intrin.h>\n"
47357"///\n"
47358"/// \\code\n"
47359"/// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M);\n"
47360"/// \\endcode\n"
47361"///\n"
47362"/// This intrinsic corresponds to the <c> VMPSADBW / MPSADBW </c> instruction.\n"
47363"///\n"
47364"/// \\param X\n"
47365"/// A 128-bit vector of [16 x i8].\n"
47366"/// \\param Y\n"
47367"/// A 128-bit vector of [16 x i8].\n"
47368"/// \\param M\n"
47369"/// An 8-bit immediate operand specifying how the absolute differences are to\n"
47370"/// be calculated, according to the following algorithm:\n"
47371"/// \\code\n"
47372"/// // M2 represents bit 2 of the immediate operand\n"
47373"/// // M10 represents bits [1:0] of the immediate operand\n"
47374"/// i = M2 * 4;\n"
47375"/// j = M10 * 4;\n"
47376"/// for (k = 0; k < 8; k = k + 1) {\n"
47377"/// d0 = abs(X[i + k + 0] - Y[j + 0]);\n"
47378"/// d1 = abs(X[i + k + 1] - Y[j + 1]);\n"
47379"/// d2 = abs(X[i + k + 2] - Y[j + 2]);\n"
47380"/// d3 = abs(X[i + k + 3] - Y[j + 3]);\n"
47381"/// r[k] = d0 + d1 + d2 + d3;\n"
47382"/// }\n"
47383"/// \\endcode\n"
47384"/// \\returns A 128-bit integer vector containing the sums of the sets of\n"
47385"/// absolute differences between both operands.\n"
47386"#define _mm_mpsadbw_epu8(X, Y, M) \\\n"
47387" (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \\\n"
47388" (__v16qi)(__m128i)(Y), (M))\n"
47389"\n"
47390"/// Finds the minimum unsigned 16-bit element in the input 128-bit\n"
47391"/// vector of [8 x u16] and returns it and along with its index.\n"
47392"///\n"
47393"/// \\headerfile <x86intrin.h>\n"
47394"///\n"
47395"/// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>\n"
47396"/// instruction.\n"
47397"///\n"
47398"/// \\param __V\n"
47399"/// A 128-bit vector of [8 x u16].\n"
47400"/// \\returns A 128-bit value where bits [15:0] contain the minimum value found\n"
47401"/// in parameter \\a __V, bits [18:16] contain the index of the minimum value\n"
47402"/// and the remaining bits are set to 0.\n"
47403"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47404"_mm_minpos_epu16(__m128i __V)\n"
47405"{\n"
47406" return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);\n"
47407"}\n"
47408"\n"
47409"/* Handle the sse4.2 definitions here. */\n"
47410"\n"
47411"/* These definitions are normally in nmmintrin.h, but gcc puts them in here\n"
47412" so we'll do the same. */\n"
47413"\n"
47414"#undef __DEFAULT_FN_ATTRS\n"
47415"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.2\")))\n"
47416"\n"
47417"/* These specify the type of data that we're comparing. */\n"
47418"#define _SIDD_UBYTE_OPS 0x00\n"
47419"#define _SIDD_UWORD_OPS 0x01\n"
47420"#define _SIDD_SBYTE_OPS 0x02\n"
47421"#define _SIDD_SWORD_OPS 0x03\n"
47422"\n"
47423"/* These specify the type of comparison operation. */\n"
47424"#define _SIDD_CMP_EQUAL_ANY 0x00\n"
47425"#define _SIDD_CMP_RANGES 0x04\n"
47426"#define _SIDD_CMP_EQUAL_EACH 0x08\n"
47427"#define _SIDD_CMP_EQUAL_ORDERED 0x0c\n"
47428"\n"
47429"/* These macros specify the polarity of the operation. */\n"
47430"#define _SIDD_POSITIVE_POLARITY 0x00\n"
47431"#define _SIDD_NEGATIVE_POLARITY 0x10\n"
47432"#define _SIDD_MASKED_POSITIVE_POLARITY 0x20\n"
47433"#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30\n"
47434"\n"
47435"/* These macros are used in _mm_cmpXstri() to specify the return. */\n"
47436"#define _SIDD_LEAST_SIGNIFICANT 0x00\n"
47437"#define _SIDD_MOST_SIGNIFICANT 0x40\n"
47438"\n"
47439"/* These macros are used in _mm_cmpXstri() to specify the return. */\n"
47440"#define _SIDD_BIT_MASK 0x00\n"
47441"#define _SIDD_UNIT_MASK 0x40\n"
47442"\n"
47443"/* SSE4.2 Packed Comparison Intrinsics. */\n"
47444"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47445"/// data with implicitly defined lengths that is contained in source operands\n"
47446"/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n"
47447"/// mask of the comparison.\n"
47448"///\n"
47449"/// \\headerfile <x86intrin.h>\n"
47450"///\n"
47451"/// \\code\n"
47452"/// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M);\n"
47453"/// \\endcode\n"
47454"///\n"
47455"/// This intrinsic corresponds to the <c> VPCMPISTRM / PCMPISTRM </c>\n"
47456"/// instruction.\n"
47457"///\n"
47458"/// \\param A\n"
47459"/// A 128-bit integer vector containing one of the source operands to be\n"
47460"/// compared.\n"
47461"/// \\param B\n"
47462"/// A 128-bit integer vector containing one of the source operands to be\n"
47463"/// compared.\n"
47464"/// \\param M\n"
47465"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47466"/// words, the type of comparison to perform, and the format of the return\n"
47467"/// value. \\n\n"
47468"/// Bits [1:0]: Determine source data format. \\n\n"
47469"/// 00: 16 unsigned bytes \\n\n"
47470"/// 01: 8 unsigned words \\n\n"
47471"/// 10: 16 signed bytes \\n\n"
47472"/// 11: 8 signed words \\n\n"
47473"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47474"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47475"/// the characters in \\a A. \\n\n"
47476"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47477"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47478"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47479"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47480"/// \\a B for equality. \\n\n"
47481"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47482"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47483"/// mask of the comparison results. \\n\n"
47484"/// 00: No effect. \\n\n"
47485"/// 01: Negate the bit mask. \\n\n"
47486"/// 10: No effect. \\n\n"
47487"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47488"/// to the size of \\a A or \\a B. \\n\n"
47489"/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n"
47490"/// bytes. \\n\n"
47491"/// 0: The result is zero-extended to 16 bytes. \\n\n"
47492"/// 1: The result is expanded to 16 bytes (this expansion is performed by\n"
47493"/// repeating each bit 8 or 16 times).\n"
47494"/// \\returns Returns a 128-bit integer vector representing the result mask of\n"
47495"/// the comparison.\n"
47496"#define _mm_cmpistrm(A, B, M) \\\n"
47497" (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \\\n"
47498" (__v16qi)(__m128i)(B), (int)(M))\n"
47499"\n"
47500"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47501"/// data with implicitly defined lengths that is contained in source operands\n"
47502"/// \\a A and \\a B. Returns an integer representing the result index of the\n"
47503"/// comparison.\n"
47504"///\n"
47505"/// \\headerfile <x86intrin.h>\n"
47506"///\n"
47507"/// \\code\n"
47508"/// int _mm_cmpistri(__m128i A, __m128i B, const int M);\n"
47509"/// \\endcode\n"
47510"///\n"
47511"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47512"/// instruction.\n"
47513"///\n"
47514"/// \\param A\n"
47515"/// A 128-bit integer vector containing one of the source operands to be\n"
47516"/// compared.\n"
47517"/// \\param B\n"
47518"/// A 128-bit integer vector containing one of the source operands to be\n"
47519"/// compared.\n"
47520"/// \\param M\n"
47521"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47522"/// words, the type of comparison to perform, and the format of the return\n"
47523"/// value. \\n\n"
47524"/// Bits [1:0]: Determine source data format. \\n\n"
47525"/// 00: 16 unsigned bytes \\n\n"
47526"/// 01: 8 unsigned words \\n\n"
47527"/// 10: 16 signed bytes \\n\n"
47528"/// 11: 8 signed words \\n\n"
47529"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47530"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47531"/// the characters in \\a A. \\n\n"
47532"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47533"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47534"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47535"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47536"/// \\a B for equality. \\n\n"
47537"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
47538"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47539"/// mask of the comparison results. \\n\n"
47540"/// 00: No effect. \\n\n"
47541"/// 01: Negate the bit mask. \\n\n"
47542"/// 10: No effect. \\n\n"
47543"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47544"/// to the size of \\a A or \\a B. \\n\n"
47545"/// Bit [6]: Determines whether the index of the lowest set bit or the\n"
47546"/// highest set bit is returned. \\n\n"
47547"/// 0: The index of the least significant set bit. \\n\n"
47548"/// 1: The index of the most significant set bit. \\n\n"
47549"/// \\returns Returns an integer representing the result index of the comparison.\n"
47550"#define _mm_cmpistri(A, B, M) \\\n"
47551" (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \\\n"
47552" (__v16qi)(__m128i)(B), (int)(M))\n"
47553"\n"
47554"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47555"/// data with explicitly defined lengths that is contained in source operands\n"
47556"/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n"
47557"/// mask of the comparison.\n"
47558"///\n"
47559"/// \\headerfile <x86intrin.h>\n"
47560"///\n"
47561"/// \\code\n"
47562"/// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M);\n"
47563"/// \\endcode\n"
47564"///\n"
47565"/// This intrinsic corresponds to the <c> VPCMPESTRM / PCMPESTRM </c>\n"
47566"/// instruction.\n"
47567"///\n"
47568"/// \\param A\n"
47569"/// A 128-bit integer vector containing one of the source operands to be\n"
47570"/// compared.\n"
47571"/// \\param LA\n"
47572"/// An integer that specifies the length of the string in \\a A.\n"
47573"/// \\param B\n"
47574"/// A 128-bit integer vector containing one of the source operands to be\n"
47575"/// compared.\n"
47576"/// \\param LB\n"
47577"/// An integer that specifies the length of the string in \\a B.\n"
47578"/// \\param M\n"
47579"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47580"/// words, the type of comparison to perform, and the format of the return\n"
47581"/// value. \\n\n"
47582"/// Bits [1:0]: Determine source data format. \\n\n"
47583"/// 00: 16 unsigned bytes \\n\n"
47584"/// 01: 8 unsigned words \\n\n"
47585"/// 10: 16 signed bytes \\n\n"
47586"/// 11: 8 signed words \\n\n"
47587"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47588"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47589"/// the characters in \\a A. \\n\n"
47590"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47591"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47592"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47593"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47594"/// \\a B for equality. \\n\n"
47595"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47596"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47597"/// mask of the comparison results. \\n\n"
47598"/// 00: No effect. \\n\n"
47599"/// 01: Negate the bit mask. \\n\n"
47600"/// 10: No effect. \\n\n"
47601"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47602"/// to the size of \\a A or \\a B. \\n\n"
47603"/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n"
47604"/// bytes. \\n\n"
47605"/// 0: The result is zero-extended to 16 bytes. \\n\n"
47606"/// 1: The result is expanded to 16 bytes (this expansion is performed by\n"
47607"/// repeating each bit 8 or 16 times). \\n\n"
47608"/// \\returns Returns a 128-bit integer vector representing the result mask of\n"
47609"/// the comparison.\n"
47610"#define _mm_cmpestrm(A, LA, B, LB, M) \\\n"
47611" (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
47612" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
47613" (int)(M))\n"
47614"\n"
47615"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47616"/// data with explicitly defined lengths that is contained in source operands\n"
47617"/// \\a A and \\a B. Returns an integer representing the result index of the\n"
47618"/// comparison.\n"
47619"///\n"
47620"/// \\headerfile <x86intrin.h>\n"
47621"///\n"
47622"/// \\code\n"
47623"/// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M);\n"
47624"/// \\endcode\n"
47625"///\n"
47626"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
47627"/// instruction.\n"
47628"///\n"
47629"/// \\param A\n"
47630"/// A 128-bit integer vector containing one of the source operands to be\n"
47631"/// compared.\n"
47632"/// \\param LA\n"
47633"/// An integer that specifies the length of the string in \\a A.\n"
47634"/// \\param B\n"
47635"/// A 128-bit integer vector containing one of the source operands to be\n"
47636"/// compared.\n"
47637"/// \\param LB\n"
47638"/// An integer that specifies the length of the string in \\a B.\n"
47639"/// \\param M\n"
47640"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47641"/// words, the type of comparison to perform, and the format of the return\n"
47642"/// value. \\n\n"
47643"/// Bits [1:0]: Determine source data format. \\n\n"
47644"/// 00: 16 unsigned bytes \\n\n"
47645"/// 01: 8 unsigned words \\n\n"
47646"/// 10: 16 signed bytes \\n\n"
47647"/// 11: 8 signed words \\n\n"
47648"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47649"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47650"/// the characters in \\a A. \\n\n"
47651"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47652"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47653"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47654"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47655"/// \\a B for equality. \\n\n"
47656"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
47657"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47658"/// mask of the comparison results. \\n\n"
47659"/// 00: No effect. \\n\n"
47660"/// 01: Negate the bit mask. \\n\n"
47661"/// 10: No effect. \\n\n"
47662"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47663"/// to the size of \\a A or \\a B. \\n\n"
47664"/// Bit [6]: Determines whether the index of the lowest set bit or the\n"
47665"/// highest set bit is returned. \\n\n"
47666"/// 0: The index of the least significant set bit. \\n\n"
47667"/// 1: The index of the most significant set bit. \\n\n"
47668"/// \\returns Returns an integer representing the result index of the comparison.\n"
47669"#define _mm_cmpestri(A, LA, B, LB, M) \\\n"
47670" (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
47671" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
47672" (int)(M))\n"
47673"\n"
47674"/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */\n"
47675"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47676"/// data with implicitly defined lengths that is contained in source operands\n"
47677"/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n"
47678"/// string in \\a B is the maximum, otherwise, returns 0.\n"
47679"///\n"
47680"/// \\headerfile <x86intrin.h>\n"
47681"///\n"
47682"/// \\code\n"
47683"/// int _mm_cmpistra(__m128i A, __m128i B, const int M);\n"
47684"/// \\endcode\n"
47685"///\n"
47686"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47687"/// instruction.\n"
47688"///\n"
47689"/// \\param A\n"
47690"/// A 128-bit integer vector containing one of the source operands to be\n"
47691"/// compared.\n"
47692"/// \\param B\n"
47693"/// A 128-bit integer vector containing one of the source operands to be\n"
47694"/// compared.\n"
47695"/// \\param M\n"
47696"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47697"/// words and the type of comparison to perform. \\n\n"
47698"/// Bits [1:0]: Determine source data format. \\n\n"
47699"/// 00: 16 unsigned bytes \\n\n"
47700"/// 01: 8 unsigned words \\n\n"
47701"/// 10: 16 signed bytes \\n\n"
47702"/// 11: 8 signed words \\n\n"
47703"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47704"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47705"/// the characters in \\a A. \\n\n"
47706"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47707"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47708"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47709"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47710"/// \\a B for equality. \\n\n"
47711"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47712"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47713"/// mask of the comparison results. \\n\n"
47714"/// 00: No effect. \\n\n"
47715"/// 01: Negate the bit mask. \\n\n"
47716"/// 10: No effect. \\n\n"
47717"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47718"/// to the size of \\a A or \\a B. \\n\n"
47719"/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n"
47720"/// \\a B is the maximum; otherwise, returns 0.\n"
47721"#define _mm_cmpistra(A, B, M) \\\n"
47722" (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \\\n"
47723" (__v16qi)(__m128i)(B), (int)(M))\n"
47724"\n"
47725"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47726"/// data with implicitly defined lengths that is contained in source operands\n"
47727"/// \\a A and \\a B. Returns 1 if the bit mask is non-zero, otherwise, returns\n"
47728"/// 0.\n"
47729"///\n"
47730"/// \\headerfile <x86intrin.h>\n"
47731"///\n"
47732"/// \\code\n"
47733"/// int _mm_cmpistrc(__m128i A, __m128i B, const int M);\n"
47734"/// \\endcode\n"
47735"///\n"
47736"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47737"/// instruction.\n"
47738"///\n"
47739"/// \\param A\n"
47740"/// A 128-bit integer vector containing one of the source operands to be\n"
47741"/// compared.\n"
47742"/// \\param B\n"
47743"/// A 128-bit integer vector containing one of the source operands to be\n"
47744"/// compared.\n"
47745"/// \\param M\n"
47746"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47747"/// words and the type of comparison to perform. \\n\n"
47748"/// Bits [1:0]: Determine source data format. \\n\n"
47749"/// 00: 16 unsigned bytes \\n\n"
47750"/// 01: 8 unsigned words \\n\n"
47751"/// 10: 16 signed bytes \\n\n"
47752"/// 11: 8 signed words \\n\n"
47753"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47754"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47755"/// the characters in \\a A. \\n\n"
47756"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47757"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47758"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47759"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47760"/// \\a B for equality. \\n\n"
47761"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
47762"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47763"/// mask of the comparison results. \\n\n"
47764"/// 00: No effect. \\n\n"
47765"/// 01: Negate the bit mask. \\n\n"
47766"/// 10: No effect. \\n\n"
47767"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47768"/// to the size of \\a A or \\a B.\n"
47769"/// \\returns Returns 1 if the bit mask is non-zero, otherwise, returns 0.\n"
47770"#define _mm_cmpistrc(A, B, M) \\\n"
47771" (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \\\n"
47772" (__v16qi)(__m128i)(B), (int)(M))\n"
47773"\n"
47774"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47775"/// data with implicitly defined lengths that is contained in source operands\n"
47776"/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n"
47777"///\n"
47778"/// \\headerfile <x86intrin.h>\n"
47779"///\n"
47780"/// \\code\n"
47781"/// int _mm_cmpistro(__m128i A, __m128i B, const int M);\n"
47782"/// \\endcode\n"
47783"///\n"
47784"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47785"/// instruction.\n"
47786"///\n"
47787"/// \\param A\n"
47788"/// A 128-bit integer vector containing one of the source operands to be\n"
47789"/// compared.\n"
47790"/// \\param B\n"
47791"/// A 128-bit integer vector containing one of the source operands to be\n"
47792"/// compared.\n"
47793"/// \\param M\n"
47794"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47795"/// words and the type of comparison to perform. \\n\n"
47796"/// Bits [1:0]: Determine source data format. \\n\n"
47797"/// 00: 16 unsigned bytes \\n\n"
47798"/// 01: 8 unsigned words \\n\n"
47799"/// 10: 16 signed bytes \\n\n"
47800"/// 11: 8 signed words \\n\n"
47801"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47802"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47803"/// the characters in \\a A. \\n\n"
47804"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47805"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47806"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47807"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47808"/// \\a B for equality. \\n\n"
47809"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
47810"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47811"/// mask of the comparison results. \\n\n"
47812"/// 00: No effect. \\n\n"
47813"/// 01: Negate the bit mask. \\n\n"
47814"/// 10: No effect. \\n\n"
47815"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47816"/// to the size of \\a A or \\a B. \\n\n"
47817"/// \\returns Returns bit 0 of the resulting bit mask.\n"
47818"#define _mm_cmpistro(A, B, M) \\\n"
47819" (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \\\n"
47820" (__v16qi)(__m128i)(B), (int)(M))\n"
47821"\n"
47822"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47823"/// data with implicitly defined lengths that is contained in source operands\n"
47824"/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n"
47825"/// the maximum, otherwise, returns 0.\n"
47826"///\n"
47827"/// \\headerfile <x86intrin.h>\n"
47828"///\n"
47829"/// \\code\n"
47830"/// int _mm_cmpistrs(__m128i A, __m128i B, const int M);\n"
47831"/// \\endcode\n"
47832"///\n"
47833"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47834"/// instruction.\n"
47835"///\n"
47836"/// \\param A\n"
47837"/// A 128-bit integer vector containing one of the source operands to be\n"
47838"/// compared.\n"
47839"/// \\param B\n"
47840"/// A 128-bit integer vector containing one of the source operands to be\n"
47841"/// compared.\n"
47842"/// \\param M\n"
47843"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47844"/// words and the type of comparison to perform. \\n\n"
47845"/// Bits [1:0]: Determine source data format. \\n\n"
47846"/// 00: 16 unsigned bytes \\n\n"
47847"/// 01: 8 unsigned words \\n\n"
47848"/// 10: 16 signed bytes \\n\n"
47849"/// 11: 8 signed words \\n\n"
47850"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47851"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47852"/// the characters in \\a A. \\n\n"
47853"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47854"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47855"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47856"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47857"/// \\a B for equality. \\n\n"
47858"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47859"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47860"/// mask of the comparison results. \\n\n"
47861"/// 00: No effect. \\n\n"
47862"/// 01: Negate the bit mask. \\n\n"
47863"/// 10: No effect. \\n\n"
47864"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47865"/// to the size of \\a A or \\a B. \\n\n"
47866"/// \\returns Returns 1 if the length of the string in \\a A is less than the\n"
47867"/// maximum, otherwise, returns 0.\n"
47868"#define _mm_cmpistrs(A, B, M) \\\n"
47869" (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \\\n"
47870" (__v16qi)(__m128i)(B), (int)(M))\n"
47871"\n"
47872"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47873"/// data with implicitly defined lengths that is contained in source operands\n"
47874"/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n"
47875"/// the maximum, otherwise, returns 0.\n"
47876"///\n"
47877"/// \\headerfile <x86intrin.h>\n"
47878"///\n"
47879"/// \\code\n"
47880"/// int _mm_cmpistrz(__m128i A, __m128i B, const int M);\n"
47881"/// \\endcode\n"
47882"///\n"
47883"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47884"/// instruction.\n"
47885"///\n"
47886"/// \\param A\n"
47887"/// A 128-bit integer vector containing one of the source operands to be\n"
47888"/// compared.\n"
47889"/// \\param B\n"
47890"/// A 128-bit integer vector containing one of the source operands to be\n"
47891"/// compared.\n"
47892"/// \\param M\n"
47893"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47894"/// words and the type of comparison to perform. \\n\n"
47895"/// Bits [1:0]: Determine source data format. \\n\n"
47896"/// 00: 16 unsigned bytes \\n\n"
47897"/// 01: 8 unsigned words \\n\n"
47898"/// 10: 16 signed bytes \\n\n"
47899"/// 11: 8 signed words \\n\n"
47900"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47901"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47902"/// the characters in \\a A. \\n\n"
47903"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47904"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47905"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47906"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47907"/// \\a B for equality. \\n\n"
47908"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47909"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47910"/// mask of the comparison results. \\n\n"
47911"/// 00: No effect. \\n\n"
47912"/// 01: Negate the bit mask. \\n\n"
47913"/// 10: No effect. \\n\n"
47914"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47915"/// to the size of \\a A or \\a B.\n"
47916"/// \\returns Returns 1 if the length of the string in \\a B is less than the\n"
47917"/// maximum, otherwise, returns 0.\n"
47918"#define _mm_cmpistrz(A, B, M) \\\n"
47919" (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \\\n"
47920" (__v16qi)(__m128i)(B), (int)(M))\n"
47921"\n"
47922"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47923"/// data with explicitly defined lengths that is contained in source operands\n"
47924"/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n"
47925"/// string in \\a B is the maximum, otherwise, returns 0.\n"
47926"///\n"
47927"/// \\headerfile <x86intrin.h>\n"
47928"///\n"
47929"/// \\code\n"
47930"/// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M);\n"
47931"/// \\endcode\n"
47932"///\n"
47933"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
47934"/// instruction.\n"
47935"///\n"
47936"/// \\param A\n"
47937"/// A 128-bit integer vector containing one of the source operands to be\n"
47938"/// compared.\n"
47939"/// \\param LA\n"
47940"/// An integer that specifies the length of the string in \\a A.\n"
47941"/// \\param B\n"
47942"/// A 128-bit integer vector containing one of the source operands to be\n"
47943"/// compared.\n"
47944"/// \\param LB\n"
47945"/// An integer that specifies the length of the string in \\a B.\n"
47946"/// \\param M\n"
47947"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47948"/// words and the type of comparison to perform. \\n\n"
47949"/// Bits [1:0]: Determine source data format. \\n\n"
47950"/// 00: 16 unsigned bytes \\n\n"
47951"/// 01: 8 unsigned words \\n\n"
47952"/// 10: 16 signed bytes \\n\n"
47953"/// 11: 8 signed words \\n\n"
47954"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47955"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47956"/// the characters in \\a A. \\n\n"
47957"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47958"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47959"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47960"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47961"/// \\a B for equality. \\n\n"
47962"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47963"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47964"/// mask of the comparison results. \\n\n"
47965"/// 00: No effect. \\n\n"
47966"/// 01: Negate the bit mask. \\n\n"
47967"/// 10: No effect. \\n\n"
47968"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47969"/// to the size of \\a A or \\a B.\n"
47970"/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n"
47971"/// \\a B is the maximum, otherwise, returns 0.\n"
47972"#define _mm_cmpestra(A, LA, B, LB, M) \\\n"
47973" (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
47974" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
47975" (int)(M))\n"
47976"\n"
47977"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47978"/// data with explicitly defined lengths that is contained in source operands\n"
47979"/// \\a A and \\a B. Returns 1 if the resulting mask is non-zero, otherwise,\n"
47980"/// returns 0.\n"
47981"///\n"
47982"/// \\headerfile <x86intrin.h>\n"
47983"///\n"
47984"/// \\code\n"
47985"/// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M);\n"
47986"/// \\endcode\n"
47987"///\n"
47988"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
47989"/// instruction.\n"
47990"///\n"
47991"/// \\param A\n"
47992"/// A 128-bit integer vector containing one of the source operands to be\n"
47993"/// compared.\n"
47994"/// \\param LA\n"
47995"/// An integer that specifies the length of the string in \\a A.\n"
47996"/// \\param B\n"
47997"/// A 128-bit integer vector containing one of the source operands to be\n"
47998"/// compared.\n"
47999"/// \\param LB\n"
48000"/// An integer that specifies the length of the string in \\a B.\n"
48001"/// \\param M\n"
48002"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
48003"/// words and the type of comparison to perform. \\n\n"
48004"/// Bits [1:0]: Determine source data format. \\n\n"
48005"/// 00: 16 unsigned bytes \\n\n"
48006"/// 01: 8 unsigned words \\n\n"
48007"/// 10: 16 signed bytes \\n\n"
48008"/// 11: 8 signed words \\n\n"
48009"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
48010"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
48011"/// the characters in \\a A. \\n\n"
48012"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
48013"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
48014"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
48015"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
48016"/// \\a B for equality. \\n\n"
48017"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
48018"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
48019"/// mask of the comparison results. \\n\n"
48020"/// 00: No effect. \\n\n"
48021"/// 01: Negate the bit mask. \\n\n"
48022"/// 10: No effect. \\n\n"
48023"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
48024"/// to the size of \\a A or \\a B. \\n\n"
48025"/// \\returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0.\n"
48026"#define _mm_cmpestrc(A, LA, B, LB, M) \\\n"
48027" (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
48028" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
48029" (int)(M))\n"
48030"\n"
48031"/// Uses the immediate operand \\a M to perform a comparison of string\n"
48032"/// data with explicitly defined lengths that is contained in source operands\n"
48033"/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n"
48034"///\n"
48035"/// \\headerfile <x86intrin.h>\n"
48036"///\n"
48037"/// \\code\n"
48038"/// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M);\n"
48039"/// \\endcode\n"
48040"///\n"
48041"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
48042"/// instruction.\n"
48043"///\n"
48044"/// \\param A\n"
48045"/// A 128-bit integer vector containing one of the source operands to be\n"
48046"/// compared.\n"
48047"/// \\param LA\n"
48048"/// An integer that specifies the length of the string in \\a A.\n"
48049"/// \\param B\n"
48050"/// A 128-bit integer vector containing one of the source operands to be\n"
48051"/// compared.\n"
48052"/// \\param LB\n"
48053"/// An integer that specifies the length of the string in \\a B.\n"
48054"/// \\param M\n"
48055"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
48056"/// words and the type of comparison to perform. \\n\n"
48057"/// Bits [1:0]: Determine source data format. \\n\n"
48058"/// 00: 16 unsigned bytes \\n\n"
48059"/// 01: 8 unsigned words \\n\n"
48060"/// 10: 16 signed bytes \\n\n"
48061"/// 11: 8 signed words \\n\n"
48062"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
48063"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
48064"/// the characters in \\a A. \\n\n"
48065"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
48066"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
48067"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
48068"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
48069"/// \\a B for equality. \\n\n"
48070"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
48071"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
48072"/// mask of the comparison results. \\n\n"
48073"/// 00: No effect. \\n\n"
48074"/// 01: Negate the bit mask. \\n\n"
48075"/// 10: No effect. \\n\n"
48076"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
48077"/// to the size of \\a A or \\a B.\n"
48078"/// \\returns Returns bit 0 of the resulting bit mask.\n"
48079"#define _mm_cmpestro(A, LA, B, LB, M) \\\n"
48080" (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
48081" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
48082" (int)(M))\n"
48083"\n"
48084"/// Uses the immediate operand \\a M to perform a comparison of string\n"
48085"/// data with explicitly defined lengths that is contained in source operands\n"
48086"/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n"
48087"/// the maximum, otherwise, returns 0.\n"
48088"///\n"
48089"/// \\headerfile <x86intrin.h>\n"
48090"///\n"
48091"/// \\code\n"
48092"/// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M);\n"
48093"/// \\endcode\n"
48094"///\n"
48095"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
48096"/// instruction.\n"
48097"///\n"
48098"/// \\param A\n"
48099"/// A 128-bit integer vector containing one of the source operands to be\n"
48100"/// compared.\n"
48101"/// \\param LA\n"
48102"/// An integer that specifies the length of the string in \\a A.\n"
48103"/// \\param B\n"
48104"/// A 128-bit integer vector containing one of the source operands to be\n"
48105"/// compared.\n"
48106"/// \\param LB\n"
48107"/// An integer that specifies the length of the string in \\a B.\n"
48108"/// \\param M\n"
48109"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
48110"/// words and the type of comparison to perform. \\n\n"
48111"/// Bits [1:0]: Determine source data format. \\n\n"
48112"/// 00: 16 unsigned bytes \\n\n"
48113"/// 01: 8 unsigned words \\n\n"
48114"/// 10: 16 signed bytes \\n\n"
48115"/// 11: 8 signed words \\n\n"
48116"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
48117"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
48118"/// the characters in \\a A. \\n\n"
48119"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
48120"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
48121"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
48122"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
48123"/// \\a B for equality. \\n\n"
48124"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
48125"/// Bits [5:4]: Determine whether to perform a one's complement in the bit\n"
48126"/// mask of the comparison results. \\n\n"
48127"/// 00: No effect. \\n\n"
48128"/// 01: Negate the bit mask. \\n\n"
48129"/// 10: No effect. \\n\n"
48130"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
48131"/// to the size of \\a A or \\a B. \\n\n"
48132"/// \\returns Returns 1 if the length of the string in \\a A is less than the\n"
48133"/// maximum, otherwise, returns 0.\n"
48134"#define _mm_cmpestrs(A, LA, B, LB, M) \\\n"
48135" (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
48136" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
48137" (int)(M))\n"
48138"\n"
48139"/// Uses the immediate operand \\a M to perform a comparison of string\n"
48140"/// data with explicitly defined lengths that is contained in source operands\n"
48141"/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n"
48142"/// the maximum, otherwise, returns 0.\n"
48143"///\n"
48144"/// \\headerfile <x86intrin.h>\n"
48145"///\n"
48146"/// \\code\n"
48147"/// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M);\n"
48148"/// \\endcode\n"
48149"///\n"
48150"/// This intrinsic corresponds to the <c> VPCMPESTRI </c> instruction.\n"
48151"///\n"
48152"/// \\param A\n"
48153"/// A 128-bit integer vector containing one of the source operands to be\n"
48154"/// compared.\n"
48155"/// \\param LA\n"
48156"/// An integer that specifies the length of the string in \\a A.\n"
48157"/// \\param B\n"
48158"/// A 128-bit integer vector containing one of the source operands to be\n"
48159"/// compared.\n"
48160"/// \\param LB\n"
48161"/// An integer that specifies the length of the string in \\a B.\n"
48162"/// \\param M\n"
48163"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
48164"/// words and the type of comparison to perform. \\n\n"
48165"/// Bits [1:0]: Determine source data format. \\n\n"
48166"/// 00: 16 unsigned bytes \\n\n"
48167"/// 01: 8 unsigned words \\n\n"
48168"/// 10: 16 signed bytes \\n\n"
48169"/// 11: 8 signed words \\n\n"
48170"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
48171"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
48172"/// the characters in \\a A. \\n\n"
48173"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
48174"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
48175"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
48176"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
48177"/// \\a B for equality. \\n\n"
48178"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
48179"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
48180"/// mask of the comparison results. \\n\n"
48181"/// 00: No effect. \\n\n"
48182"/// 01: Negate the bit mask. \\n\n"
48183"/// 10: No effect. \\n\n"
48184"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
48185"/// to the size of \\a A or \\a B.\n"
48186"/// \\returns Returns 1 if the length of the string in \\a B is less than the\n"
48187"/// maximum, otherwise, returns 0.\n"
48188"#define _mm_cmpestrz(A, LA, B, LB, M) \\\n"
48189" (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
48190" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
48191" (int)(M))\n"
48192"\n"
48193"/* SSE4.2 Compare Packed Data -- Greater Than. */\n"
48194"/// Compares each of the corresponding 64-bit values of the 128-bit\n"
48195"/// integer vectors to determine if the values in the first operand are\n"
48196"/// greater than those in the second operand.\n"
48197"///\n"
48198"/// \\headerfile <x86intrin.h>\n"
48199"///\n"
48200"/// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.\n"
48201"///\n"
48202"/// \\param __V1\n"
48203"/// A 128-bit integer vector.\n"
48204"/// \\param __V2\n"
48205"/// A 128-bit integer vector.\n"
48206"/// \\returns A 128-bit integer vector containing the comparison results.\n"
48207"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
48208"_mm_cmpgt_epi64(__m128i __V1, __m128i __V2)\n"
48209"{\n"
48210" return (__m128i)((__v2di)__V1 > (__v2di)__V2);\n"
48211"}\n"
48212"\n"
48213"/* SSE4.2 Accumulate CRC32. */\n"
48214"/// Adds the unsigned integer operand to the CRC-32C checksum of the\n"
48215"/// unsigned char operand.\n"
48216"///\n"
48217"/// \\headerfile <x86intrin.h>\n"
48218"///\n"
48219"/// This intrinsic corresponds to the <c> CRC32B </c> instruction.\n"
48220"///\n"
48221"/// \\param __C\n"
48222"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
48223"/// \\a __D.\n"
48224"/// \\param __D\n"
48225"/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.\n"
48226"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
48227"/// operand \\a __D.\n"
48228"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
48229"_mm_crc32_u8(unsigned int __C, unsigned char __D)\n"
48230"{\n"
48231" return __builtin_ia32_crc32qi(__C, __D);\n"
48232"}\n"
48233"\n"
48234"/// Adds the unsigned integer operand to the CRC-32C checksum of the\n"
48235"/// unsigned short operand.\n"
48236"///\n"
48237"/// \\headerfile <x86intrin.h>\n"
48238"///\n"
48239"/// This intrinsic corresponds to the <c> CRC32W </c> instruction.\n"
48240"///\n"
48241"/// \\param __C\n"
48242"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
48243"/// \\a __D.\n"
48244"/// \\param __D\n"
48245"/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.\n"
48246"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
48247"/// operand \\a __D.\n"
48248"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
48249"_mm_crc32_u16(unsigned int __C, unsigned short __D)\n"
48250"{\n"
48251" return __builtin_ia32_crc32hi(__C, __D);\n"
48252"}\n"
48253"\n"
48254"/// Adds the first unsigned integer operand to the CRC-32C checksum of\n"
48255"/// the second unsigned integer operand.\n"
48256"///\n"
48257"/// \\headerfile <x86intrin.h>\n"
48258"///\n"
48259"/// This intrinsic corresponds to the <c> CRC32L </c> instruction.\n"
48260"///\n"
48261"/// \\param __C\n"
48262"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
48263"/// \\a __D.\n"
48264"/// \\param __D\n"
48265"/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.\n"
48266"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
48267"/// operand \\a __D.\n"
48268"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
48269"_mm_crc32_u32(unsigned int __C, unsigned int __D)\n"
48270"{\n"
48271" return __builtin_ia32_crc32si(__C, __D);\n"
48272"}\n"
48273"\n"
48274"#ifdef __x86_64__\n"
48275"/// Adds the unsigned integer operand to the CRC-32C checksum of the\n"
48276"/// unsigned 64-bit integer operand.\n"
48277"///\n"
48278"/// \\headerfile <x86intrin.h>\n"
48279"///\n"
48280"/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.\n"
48281"///\n"
48282"/// \\param __C\n"
48283"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
48284"/// \\a __D.\n"
48285"/// \\param __D\n"
48286"/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.\n"
48287"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
48288"/// operand \\a __D.\n"
48289"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
48290"_mm_crc32_u64(unsigned long long __C, unsigned long long __D)\n"
48291"{\n"
48292" return __builtin_ia32_crc32di(__C, __D);\n"
48293"}\n"
48294"#endif /* __x86_64__ */\n"
48295"\n"
48296"#undef __DEFAULT_FN_ATTRS\n"
48297"\n"
48298"#include <popcntintrin.h>\n"
48299"\n"
48300"#endif /* __SMMINTRIN_H */\n"
48301"" } ,
48302 { "/builtins/stdalign.h" , "/*===---- stdalign.h - Standard header for alignment ------------------------===\n"
48303" *\n"
48304" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48305" * of this software and associated documentation files (the \"Software\"), to deal\n"
48306" * in the Software without restriction, including without limitation the rights\n"
48307" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48308" * copies of the Software, and to permit persons to whom the Software is\n"
48309" * furnished to do so, subject to the following conditions:\n"
48310" *\n"
48311" * The above copyright notice and this permission notice shall be included in\n"
48312" * all copies or substantial portions of the Software.\n"
48313" *\n"
48314" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48315" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48316" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48317" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48318" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48319" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48320" * THE SOFTWARE.\n"
48321" *\n"
48322" *===-----------------------------------------------------------------------===\n"
48323" */\n"
48324"\n"
48325"#ifndef __STDALIGN_H\n"
48326"#define __STDALIGN_H\n"
48327"\n"
48328"#ifndef __cplusplus\n"
48329"#define alignas _Alignas\n"
48330"#define alignof _Alignof\n"
48331"#endif\n"
48332"\n"
48333"#define __alignas_is_defined 1\n"
48334"#define __alignof_is_defined 1\n"
48335"\n"
48336"#endif /* __STDALIGN_H */\n"
48337"" } ,
48338 { "/builtins/stdarg.h" , "/*===---- stdarg.h - Variable argument handling ----------------------------===\n"
48339" *\n"
48340" * Copyright (c) 2008 Eli Friedman\n"
48341" *\n"
48342" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48343" * of this software and associated documentation files (the \"Software\"), to deal\n"
48344" * in the Software without restriction, including without limitation the rights\n"
48345" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48346" * copies of the Software, and to permit persons to whom the Software is\n"
48347" * furnished to do so, subject to the following conditions:\n"
48348" *\n"
48349" * The above copyright notice and this permission notice shall be included in\n"
48350" * all copies or substantial portions of the Software.\n"
48351" *\n"
48352" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48353" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48354" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48355" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48356" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48357" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48358" * THE SOFTWARE.\n"
48359" *\n"
48360" *===-----------------------------------------------------------------------===\n"
48361" */\n"
48362"\n"
48363"#ifndef __STDARG_H\n"
48364"#define __STDARG_H\n"
48365"\n"
48366"#ifndef _VA_LIST\n"
48367"typedef __builtin_va_list va_list;\n"
48368"#define _VA_LIST\n"
48369"#endif\n"
48370"#define va_start(ap, param) __builtin_va_start(ap, param)\n"
48371"#define va_end(ap) __builtin_va_end(ap)\n"
48372"#define va_arg(ap, type) __builtin_va_arg(ap, type)\n"
48373"\n"
48374"/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode\n"
48375" * or -ansi is not specified, since it was not part of C90.\n"
48376" */\n"
48377"#define __va_copy(d,s) __builtin_va_copy(d,s)\n"
48378"\n"
48379"#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L || !defined(__STRICT_ANSI__)\n"
48380"#define va_copy(dest, src) __builtin_va_copy(dest, src)\n"
48381"#endif\n"
48382"\n"
48383"#ifndef __GNUC_VA_LIST\n"
48384"#define __GNUC_VA_LIST 1\n"
48385"typedef __builtin_va_list __gnuc_va_list;\n"
48386"#endif\n"
48387"\n"
48388"#endif /* __STDARG_H */\n"
48389"" } ,
48390 { "/builtins/stdatomic.h" , "/*===---- stdatomic.h - Standard header for atomic types and operations -----===\n"
48391" *\n"
48392" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48393" * of this software and associated documentation files (the \"Software\"), to deal\n"
48394" * in the Software without restriction, including without limitation the rights\n"
48395" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48396" * copies of the Software, and to permit persons to whom the Software is\n"
48397" * furnished to do so, subject to the following conditions:\n"
48398" *\n"
48399" * The above copyright notice and this permission notice shall be included in\n"
48400" * all copies or substantial portions of the Software.\n"
48401" *\n"
48402" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48403" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48404" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48405" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48406" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48407" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48408" * THE SOFTWARE.\n"
48409" *\n"
48410" *===-----------------------------------------------------------------------===\n"
48411" */\n"
48412"\n"
48413"#ifndef __CLANG_STDATOMIC_H\n"
48414"#define __CLANG_STDATOMIC_H\n"
48415"\n"
48416"/* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for\n"
48417" * example, already has a Clang-compatible stdatomic.h header.\n"
48418" */\n"
48419"#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>)\n"
48420"# include_next <stdatomic.h>\n"
48421"#else\n"
48422"\n"
48423"#include <stddef.h>\n"
48424"#include <stdint.h>\n"
48425"\n"
48426"#ifdef __cplusplus\n"
48427"extern \"C\" {\n"
48428"#endif\n"
48429"\n"
48430"/* 7.17.1 Introduction */\n"
48431"\n"
48432"#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE\n"
48433"#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE\n"
48434"#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE\n"
48435"#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE\n"
48436"#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE\n"
48437"#define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE\n"
48438"#define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE\n"
48439"#define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE\n"
48440"#define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE\n"
48441"#define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE\n"
48442"\n"
48443"/* 7.17.2 Initialization */\n"
48444"\n"
48445"#define ATOMIC_VAR_INIT(value) (value)\n"
48446"#define atomic_init __c11_atomic_init\n"
48447"\n"
48448"/* 7.17.3 Order and consistency */\n"
48449"\n"
48450"typedef enum memory_order {\n"
48451" memory_order_relaxed = __ATOMIC_RELAXED,\n"
48452" memory_order_consume = __ATOMIC_CONSUME,\n"
48453" memory_order_acquire = __ATOMIC_ACQUIRE,\n"
48454" memory_order_release = __ATOMIC_RELEASE,\n"
48455" memory_order_acq_rel = __ATOMIC_ACQ_REL,\n"
48456" memory_order_seq_cst = __ATOMIC_SEQ_CST\n"
48457"} memory_order;\n"
48458"\n"
48459"#define kill_dependency(y) (y)\n"
48460"\n"
48461"/* 7.17.4 Fences */\n"
48462"\n"
48463"/* These should be provided by the libc implementation. */\n"
48464"void atomic_thread_fence(memory_order);\n"
48465"void atomic_signal_fence(memory_order);\n"
48466"\n"
48467"#define atomic_thread_fence(order) __c11_atomic_thread_fence(order)\n"
48468"#define atomic_signal_fence(order) __c11_atomic_signal_fence(order)\n"
48469"\n"
48470"/* 7.17.5 Lock-free property */\n"
48471"\n"
48472"#define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj)))\n"
48473"\n"
48474"/* 7.17.6 Atomic integer types */\n"
48475"\n"
48476"#ifdef __cplusplus\n"
48477"typedef _Atomic(bool) atomic_bool;\n"
48478"#else\n"
48479"typedef _Atomic(_Bool) atomic_bool;\n"
48480"#endif\n"
48481"typedef _Atomic(char) atomic_char;\n"
48482"typedef _Atomic(signed char) atomic_schar;\n"
48483"typedef _Atomic(unsigned char) atomic_uchar;\n"
48484"typedef _Atomic(short) atomic_short;\n"
48485"typedef _Atomic(unsigned short) atomic_ushort;\n"
48486"typedef _Atomic(int) atomic_int;\n"
48487"typedef _Atomic(unsigned int) atomic_uint;\n"
48488"typedef _Atomic(long) atomic_long;\n"
48489"typedef _Atomic(unsigned long) atomic_ulong;\n"
48490"typedef _Atomic(long long) atomic_llong;\n"
48491"typedef _Atomic(unsigned long long) atomic_ullong;\n"
48492"typedef _Atomic(uint_least16_t) atomic_char16_t;\n"
48493"typedef _Atomic(uint_least32_t) atomic_char32_t;\n"
48494"typedef _Atomic(wchar_t) atomic_wchar_t;\n"
48495"typedef _Atomic(int_least8_t) atomic_int_least8_t;\n"
48496"typedef _Atomic(uint_least8_t) atomic_uint_least8_t;\n"
48497"typedef _Atomic(int_least16_t) atomic_int_least16_t;\n"
48498"typedef _Atomic(uint_least16_t) atomic_uint_least16_t;\n"
48499"typedef _Atomic(int_least32_t) atomic_int_least32_t;\n"
48500"typedef _Atomic(uint_least32_t) atomic_uint_least32_t;\n"
48501"typedef _Atomic(int_least64_t) atomic_int_least64_t;\n"
48502"typedef _Atomic(uint_least64_t) atomic_uint_least64_t;\n"
48503"typedef _Atomic(int_fast8_t) atomic_int_fast8_t;\n"
48504"typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t;\n"
48505"typedef _Atomic(int_fast16_t) atomic_int_fast16_t;\n"
48506"typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t;\n"
48507"typedef _Atomic(int_fast32_t) atomic_int_fast32_t;\n"
48508"typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t;\n"
48509"typedef _Atomic(int_fast64_t) atomic_int_fast64_t;\n"
48510"typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t;\n"
48511"typedef _Atomic(intptr_t) atomic_intptr_t;\n"
48512"typedef _Atomic(uintptr_t) atomic_uintptr_t;\n"
48513"typedef _Atomic(size_t) atomic_size_t;\n"
48514"typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t;\n"
48515"typedef _Atomic(intmax_t) atomic_intmax_t;\n"
48516"typedef _Atomic(uintmax_t) atomic_uintmax_t;\n"
48517"\n"
48518"/* 7.17.7 Operations on atomic types */\n"
48519"\n"
48520"#define atomic_store(object, desired) __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST)\n"
48521"#define atomic_store_explicit __c11_atomic_store\n"
48522"\n"
48523"#define atomic_load(object) __c11_atomic_load(object, __ATOMIC_SEQ_CST)\n"
48524"#define atomic_load_explicit __c11_atomic_load\n"
48525"\n"
48526"#define atomic_exchange(object, desired) __c11_atomic_exchange(object, desired, __ATOMIC_SEQ_CST)\n"
48527"#define atomic_exchange_explicit __c11_atomic_exchange\n"
48528"\n"
48529"#define atomic_compare_exchange_strong(object, expected, desired) __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n"
48530"#define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong\n"
48531"\n"
48532"#define atomic_compare_exchange_weak(object, expected, desired) __c11_atomic_compare_exchange_weak(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n"
48533"#define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak\n"
48534"\n"
48535"#define atomic_fetch_add(object, operand) __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST)\n"
48536"#define atomic_fetch_add_explicit __c11_atomic_fetch_add\n"
48537"\n"
48538"#define atomic_fetch_sub(object, operand) __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST)\n"
48539"#define atomic_fetch_sub_explicit __c11_atomic_fetch_sub\n"
48540"\n"
48541"#define atomic_fetch_or(object, operand) __c11_atomic_fetch_or(object, operand, __ATOMIC_SEQ_CST)\n"
48542"#define atomic_fetch_or_explicit __c11_atomic_fetch_or\n"
48543"\n"
48544"#define atomic_fetch_xor(object, operand) __c11_atomic_fetch_xor(object, operand, __ATOMIC_SEQ_CST)\n"
48545"#define atomic_fetch_xor_explicit __c11_atomic_fetch_xor\n"
48546"\n"
48547"#define atomic_fetch_and(object, operand) __c11_atomic_fetch_and(object, operand, __ATOMIC_SEQ_CST)\n"
48548"#define atomic_fetch_and_explicit __c11_atomic_fetch_and\n"
48549"\n"
48550"/* 7.17.8 Atomic flag type and operations */\n"
48551"\n"
48552"typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;\n"
48553"\n"
48554"#define ATOMIC_FLAG_INIT { 0 }\n"
48555"\n"
48556"/* These should be provided by the libc implementation. */\n"
48557"#ifdef __cplusplus\n"
48558"bool atomic_flag_test_and_set(volatile atomic_flag *);\n"
48559"bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n"
48560"#else\n"
48561"_Bool atomic_flag_test_and_set(volatile atomic_flag *);\n"
48562"_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n"
48563"#endif\n"
48564"void atomic_flag_clear(volatile atomic_flag *);\n"
48565"void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order);\n"
48566"\n"
48567"#define atomic_flag_test_and_set(object) __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST)\n"
48568"#define atomic_flag_test_and_set_explicit(object, order) __c11_atomic_exchange(&(object)->_Value, 1, order)\n"
48569"\n"
48570"#define atomic_flag_clear(object) __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST)\n"
48571"#define atomic_flag_clear_explicit(object, order) __c11_atomic_store(&(object)->_Value, 0, order)\n"
48572"\n"
48573"#ifdef __cplusplus\n"
48574"}\n"
48575"#endif\n"
48576"\n"
48577"#endif /* __STDC_HOSTED__ */\n"
48578"#endif /* __CLANG_STDATOMIC_H */\n"
48579"\n"
48580"" } ,
48581 { "/builtins/stdbool.h" , "/*===---- stdbool.h - Standard header for booleans -------------------------===\n"
48582" *\n"
48583" * Copyright (c) 2008 Eli Friedman\n"
48584" *\n"
48585" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48586" * of this software and associated documentation files (the \"Software\"), to deal\n"
48587" * in the Software without restriction, including without limitation the rights\n"
48588" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48589" * copies of the Software, and to permit persons to whom the Software is\n"
48590" * furnished to do so, subject to the following conditions:\n"
48591" *\n"
48592" * The above copyright notice and this permission notice shall be included in\n"
48593" * all copies or substantial portions of the Software.\n"
48594" *\n"
48595" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48596" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48597" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48598" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48599" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48600" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48601" * THE SOFTWARE.\n"
48602" *\n"
48603" *===-----------------------------------------------------------------------===\n"
48604" */\n"
48605"\n"
48606"#ifndef __STDBOOL_H\n"
48607"#define __STDBOOL_H\n"
48608"\n"
48609"/* Don't define bool, true, and false in C++, except as a GNU extension. */\n"
48610"#ifndef __cplusplus\n"
48611"#define bool _Bool\n"
48612"#define true 1\n"
48613"#define false 0\n"
48614"#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)\n"
48615"/* Define _Bool as a GNU extension. */\n"
48616"#define _Bool bool\n"
48617"#if __cplusplus < 201103L\n"
48618"/* For C++98, define bool, false, true as a GNU extension. */\n"
48619"#define bool bool\n"
48620"#define false false\n"
48621"#define true true\n"
48622"#endif\n"
48623"#endif\n"
48624"\n"
48625"#define __bool_true_false_are_defined 1\n"
48626"\n"
48627"#endif /* __STDBOOL_H */\n"
48628"" } ,
48629 { "/builtins/stddef.h" , "/*===---- stddef.h - Basic type definitions --------------------------------===\n"
48630" *\n"
48631" * Copyright (c) 2008 Eli Friedman\n"
48632" *\n"
48633" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48634" * of this software and associated documentation files (the \"Software\"), to deal\n"
48635" * in the Software without restriction, including without limitation the rights\n"
48636" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48637" * copies of the Software, and to permit persons to whom the Software is\n"
48638" * furnished to do so, subject to the following conditions:\n"
48639" *\n"
48640" * The above copyright notice and this permission notice shall be included in\n"
48641" * all copies or substantial portions of the Software.\n"
48642" *\n"
48643" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48644" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48645" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48646" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48647" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48648" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48649" * THE SOFTWARE.\n"
48650" *\n"
48651" *===-----------------------------------------------------------------------===\n"
48652" */\n"
48653"\n"
48654"#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \\\n"
48655" defined(__need_size_t) || defined(__need_wchar_t) || \\\n"
48656" defined(__need_NULL) || defined(__need_wint_t)\n"
48657"\n"
48658"#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \\\n"
48659" !defined(__need_wchar_t) && !defined(__need_NULL) && \\\n"
48660" !defined(__need_wint_t)\n"
48661"/* Always define miscellaneous pieces when modules are available. */\n"
48662"#if !__has_feature(modules)\n"
48663"#define __STDDEF_H\n"
48664"#endif\n"
48665"#define __need_ptrdiff_t\n"
48666"#define __need_size_t\n"
48667"#define __need_wchar_t\n"
48668"#define __need_NULL\n"
48669"#define __need_STDDEF_H_misc\n"
48670"/* __need_wint_t is intentionally not defined here. */\n"
48671"#endif\n"
48672"\n"
48673"#if defined(__need_ptrdiff_t)\n"
48674"#if !defined(_PTRDIFF_T) || __has_feature(modules)\n"
48675"/* Always define ptrdiff_t when modules are available. */\n"
48676"#if !__has_feature(modules)\n"
48677"#define _PTRDIFF_T\n"
48678"#endif\n"
48679"typedef __PTRDIFF_TYPE__ ptrdiff_t;\n"
48680"#endif\n"
48681"#undef __need_ptrdiff_t\n"
48682"#endif /* defined(__need_ptrdiff_t) */\n"
48683"\n"
48684"#if defined(__need_size_t)\n"
48685"#if !defined(_SIZE_T) || __has_feature(modules)\n"
48686"/* Always define size_t when modules are available. */\n"
48687"#if !__has_feature(modules)\n"
48688"#define _SIZE_T\n"
48689"#endif\n"
48690"typedef __SIZE_TYPE__ size_t;\n"
48691"#endif\n"
48692"#undef __need_size_t\n"
48693"#endif /*defined(__need_size_t) */\n"
48694"\n"
48695"#if defined(__need_STDDEF_H_misc)\n"
48696"/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is\n"
48697" * enabled. */\n"
48698"#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \\\n"
48699" !defined(_RSIZE_T)) || __has_feature(modules)\n"
48700"/* Always define rsize_t when modules are available. */\n"
48701"#if !__has_feature(modules)\n"
48702"#define _RSIZE_T\n"
48703"#endif\n"
48704"typedef __SIZE_TYPE__ rsize_t;\n"
48705"#endif\n"
48706"#endif /* defined(__need_STDDEF_H_misc) */\n"
48707"\n"
48708"#if defined(__need_wchar_t)\n"
48709"#ifndef __cplusplus\n"
48710"/* Always define wchar_t when modules are available. */\n"
48711"#if !defined(_WCHAR_T) || __has_feature(modules)\n"
48712"#if !__has_feature(modules)\n"
48713"#define _WCHAR_T\n"
48714"#if defined(_MSC_EXTENSIONS)\n"
48715"#define _WCHAR_T_DEFINED\n"
48716"#endif\n"
48717"#endif\n"
48718"typedef __WCHAR_TYPE__ wchar_t;\n"
48719"#endif\n"
48720"#endif\n"
48721"#undef __need_wchar_t\n"
48722"#endif /* defined(__need_wchar_t) */\n"
48723"\n"
48724"#if defined(__need_NULL)\n"
48725"#undef NULL\n"
48726"#ifdef __cplusplus\n"
48727"# if !defined(__MINGW32__) && !defined(_MSC_VER)\n"
48728"# define NULL __null\n"
48729"# else\n"
48730"# define NULL 0\n"
48731"# endif\n"
48732"#else\n"
48733"# define NULL ((void*)0)\n"
48734"#endif\n"
48735"#ifdef __cplusplus\n"
48736"#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)\n"
48737"namespace std { typedef decltype(nullptr) nullptr_t; }\n"
48738"using ::std::nullptr_t;\n"
48739"#endif\n"
48740"#endif\n"
48741"#undef __need_NULL\n"
48742"#endif /* defined(__need_NULL) */\n"
48743"\n"
48744"#if defined(__need_STDDEF_H_misc)\n"
48745"#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L\n"
48746"#include \"__stddef_max_align_t.h\"\n"
48747"#endif\n"
48748"#define offsetof(t, d) __builtin_offsetof(t, d)\n"
48749"#undef __need_STDDEF_H_misc\n"
48750"#endif /* defined(__need_STDDEF_H_misc) */\n"
48751"\n"
48752"/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use\n"
48753"__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */\n"
48754"#if defined(__need_wint_t)\n"
48755"/* Always define wint_t when modules are available. */\n"
48756"#if !defined(_WINT_T) || __has_feature(modules)\n"
48757"#if !__has_feature(modules)\n"
48758"#define _WINT_T\n"
48759"#endif\n"
48760"typedef __WINT_TYPE__ wint_t;\n"
48761"#endif\n"
48762"#undef __need_wint_t\n"
48763"#endif /* __need_wint_t */\n"
48764"\n"
48765"#endif\n"
48766"" } ,
48767 { "/builtins/stdint.h" , "/*===---- stdint.h - Standard header for sized integer types --------------===*\\\n"
48768" *\n"
48769" * Copyright (c) 2009 Chris Lattner\n"
48770" *\n"
48771" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48772" * of this software and associated documentation files (the \"Software\"), to deal\n"
48773" * in the Software without restriction, including without limitation the rights\n"
48774" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48775" * copies of the Software, and to permit persons to whom the Software is\n"
48776" * furnished to do so, subject to the following conditions:\n"
48777" *\n"
48778" * The above copyright notice and this permission notice shall be included in\n"
48779" * all copies or substantial portions of the Software.\n"
48780" *\n"
48781" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48782" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48783" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48784" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48785" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48786" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48787" * THE SOFTWARE.\n"
48788" *\n"
48789"\\*===----------------------------------------------------------------------===*/\n"
48790"\n"
48791"#ifndef __CLANG_STDINT_H2\n"
48792"#define __CLANG_STDINT_H2\n"
48793"\n"
48794"/* If we're hosted, fall back to the system's stdint.h, which might have\n"
48795" * additional definitions.\n"
48796" */\n"
48797"#if __STDC_HOSTED__ && __has_include_next(<stdint.h>)\n"
48798"\n"
48799"// C99 7.18.3 Limits of other integer types\n"
48800"//\n"
48801"// Footnote 219, 220: C++ implementations should define these macros only when\n"
48802"// __STDC_LIMIT_MACROS is defined before <stdint.h> is included.\n"
48803"//\n"
48804"// Footnote 222: C++ implementations should define these macros only when\n"
48805"// __STDC_CONSTANT_MACROS is defined before <stdint.h> is included.\n"
48806"//\n"
48807"// C++11 [cstdint.syn]p2:\n"
48808"//\n"
48809"// The macros defined by <cstdint> are provided unconditionally. In particular,\n"
48810"// the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in\n"
48811"// footnotes 219, 220, and 222 in the C standard) play no role in C++.\n"
48812"//\n"
48813"// C11 removed the problematic footnotes.\n"
48814"//\n"
48815"// Work around this inconsistency by always defining those macros in C++ mode,\n"
48816"// so that a C library implementation which follows the C99 standard can be\n"
48817"// used in C++.\n"
48818"# ifdef __cplusplus\n"
48819"# if !defined(__STDC_LIMIT_MACROS)\n"
48820"# define __STDC_LIMIT_MACROS\n"
48821"# define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n"
48822"# endif\n"
48823"# if !defined(__STDC_CONSTANT_MACROS)\n"
48824"# define __STDC_CONSTANT_MACROS\n"
48825"# define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n"
48826"# endif\n"
48827"# endif\n"
48828"\n"
48829"# include_next <stdint.h>\n"
48830"\n"
48831"# ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n"
48832"# undef __STDC_LIMIT_MACROS\n"
48833"# undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n"
48834"# endif\n"
48835"# ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n"
48836"# undef __STDC_CONSTANT_MACROS\n"
48837"# undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n"
48838"# endif\n"
48839"\n"
48840"#else\n"
48841"\n"
48842"/* C99 7.18.1.1 Exact-width integer types.\n"
48843" * C99 7.18.1.2 Minimum-width integer types.\n"
48844" * C99 7.18.1.3 Fastest minimum-width integer types.\n"
48845" *\n"
48846" * The standard requires that exact-width type be defined for 8-, 16-, 32-, and\n"
48847" * 64-bit types if they are implemented. Other exact width types are optional.\n"
48848" * This implementation defines an exact-width types for every integer width\n"
48849" * that is represented in the standard integer types.\n"
48850" *\n"
48851" * The standard also requires minimum-width types be defined for 8-, 16-, 32-,\n"
48852" * and 64-bit widths regardless of whether there are corresponding exact-width\n"
48853" * types.\n"
48854" *\n"
48855" * To accommodate targets that are missing types that are exactly 8, 16, 32, or\n"
48856" * 64 bits wide, this implementation takes an approach of cascading\n"
48857" * redefinitions, redefining __int_leastN_t to successively smaller exact-width\n"
48858" * types. It is therefore important that the types are defined in order of\n"
48859" * descending widths.\n"
48860" *\n"
48861" * We currently assume that the minimum-width types and the fastest\n"
48862" * minimum-width types are the same. This is allowed by the standard, but is\n"
48863" * suboptimal.\n"
48864" *\n"
48865" * In violation of the standard, some targets do not implement a type that is\n"
48866" * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).\n"
48867" * To accommodate these targets, a required minimum-width type is only\n"
48868" * defined if there exists an exact-width type of equal or greater width.\n"
48869" */\n"
48870"\n"
48871"#ifdef __INT64_TYPE__\n"
48872"# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/\n"
48873"typedef __INT64_TYPE__ int64_t;\n"
48874"# endif /* __int8_t_defined */\n"
48875"typedef __UINT64_TYPE__ uint64_t;\n"
48876"# define __int_least64_t int64_t\n"
48877"# define __uint_least64_t uint64_t\n"
48878"# define __int_least32_t int64_t\n"
48879"# define __uint_least32_t uint64_t\n"
48880"# define __int_least16_t int64_t\n"
48881"# define __uint_least16_t uint64_t\n"
48882"# define __int_least8_t int64_t\n"
48883"# define __uint_least8_t uint64_t\n"
48884"#endif /* __INT64_TYPE__ */\n"
48885"\n"
48886"#ifdef __int_least64_t\n"
48887"typedef __int_least64_t int_least64_t;\n"
48888"typedef __uint_least64_t uint_least64_t;\n"
48889"typedef __int_least64_t int_fast64_t;\n"
48890"typedef __uint_least64_t uint_fast64_t;\n"
48891"#endif /* __int_least64_t */\n"
48892"\n"
48893"#ifdef __INT56_TYPE__\n"
48894"typedef __INT56_TYPE__ int56_t;\n"
48895"typedef __UINT56_TYPE__ uint56_t;\n"
48896"typedef int56_t int_least56_t;\n"
48897"typedef uint56_t uint_least56_t;\n"
48898"typedef int56_t int_fast56_t;\n"
48899"typedef uint56_t uint_fast56_t;\n"
48900"# define __int_least32_t int56_t\n"
48901"# define __uint_least32_t uint56_t\n"
48902"# define __int_least16_t int56_t\n"
48903"# define __uint_least16_t uint56_t\n"
48904"# define __int_least8_t int56_t\n"
48905"# define __uint_least8_t uint56_t\n"
48906"#endif /* __INT56_TYPE__ */\n"
48907"\n"
48908"\n"
48909"#ifdef __INT48_TYPE__\n"
48910"typedef __INT48_TYPE__ int48_t;\n"
48911"typedef __UINT48_TYPE__ uint48_t;\n"
48912"typedef int48_t int_least48_t;\n"
48913"typedef uint48_t uint_least48_t;\n"
48914"typedef int48_t int_fast48_t;\n"
48915"typedef uint48_t uint_fast48_t;\n"
48916"# define __int_least32_t int48_t\n"
48917"# define __uint_least32_t uint48_t\n"
48918"# define __int_least16_t int48_t\n"
48919"# define __uint_least16_t uint48_t\n"
48920"# define __int_least8_t int48_t\n"
48921"# define __uint_least8_t uint48_t\n"
48922"#endif /* __INT48_TYPE__ */\n"
48923"\n"
48924"\n"
48925"#ifdef __INT40_TYPE__\n"
48926"typedef __INT40_TYPE__ int40_t;\n"
48927"typedef __UINT40_TYPE__ uint40_t;\n"
48928"typedef int40_t int_least40_t;\n"
48929"typedef uint40_t uint_least40_t;\n"
48930"typedef int40_t int_fast40_t;\n"
48931"typedef uint40_t uint_fast40_t;\n"
48932"# define __int_least32_t int40_t\n"
48933"# define __uint_least32_t uint40_t\n"
48934"# define __int_least16_t int40_t\n"
48935"# define __uint_least16_t uint40_t\n"
48936"# define __int_least8_t int40_t\n"
48937"# define __uint_least8_t uint40_t\n"
48938"#endif /* __INT40_TYPE__ */\n"
48939"\n"
48940"\n"
48941"#ifdef __INT32_TYPE__\n"
48942"\n"
48943"# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/\n"
48944"typedef __INT32_TYPE__ int32_t;\n"
48945"# endif /* __int8_t_defined */\n"
48946"\n"
48947"# ifndef __uint32_t_defined /* more glibc compatibility */\n"
48948"# define __uint32_t_defined\n"
48949"typedef __UINT32_TYPE__ uint32_t;\n"
48950"# endif /* __uint32_t_defined */\n"
48951"\n"
48952"# define __int_least32_t int32_t\n"
48953"# define __uint_least32_t uint32_t\n"
48954"# define __int_least16_t int32_t\n"
48955"# define __uint_least16_t uint32_t\n"
48956"# define __int_least8_t int32_t\n"
48957"# define __uint_least8_t uint32_t\n"
48958"#endif /* __INT32_TYPE__ */\n"
48959"\n"
48960"#ifdef __int_least32_t\n"
48961"typedef __int_least32_t int_least32_t;\n"
48962"typedef __uint_least32_t uint_least32_t;\n"
48963"typedef __int_least32_t int_fast32_t;\n"
48964"typedef __uint_least32_t uint_fast32_t;\n"
48965"#endif /* __int_least32_t */\n"
48966"\n"
48967"#ifdef __INT24_TYPE__\n"
48968"typedef __INT24_TYPE__ int24_t;\n"
48969"typedef __UINT24_TYPE__ uint24_t;\n"
48970"typedef int24_t int_least24_t;\n"
48971"typedef uint24_t uint_least24_t;\n"
48972"typedef int24_t int_fast24_t;\n"
48973"typedef uint24_t uint_fast24_t;\n"
48974"# define __int_least16_t int24_t\n"
48975"# define __uint_least16_t uint24_t\n"
48976"# define __int_least8_t int24_t\n"
48977"# define __uint_least8_t uint24_t\n"
48978"#endif /* __INT24_TYPE__ */\n"
48979"\n"
48980"#ifdef __INT16_TYPE__\n"
48981"#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/\n"
48982"typedef __INT16_TYPE__ int16_t;\n"
48983"#endif /* __int8_t_defined */\n"
48984"typedef __UINT16_TYPE__ uint16_t;\n"
48985"# define __int_least16_t int16_t\n"
48986"# define __uint_least16_t uint16_t\n"
48987"# define __int_least8_t int16_t\n"
48988"# define __uint_least8_t uint16_t\n"
48989"#endif /* __INT16_TYPE__ */\n"
48990"\n"
48991"#ifdef __int_least16_t\n"
48992"typedef __int_least16_t int_least16_t;\n"
48993"typedef __uint_least16_t uint_least16_t;\n"
48994"typedef __int_least16_t int_fast16_t;\n"
48995"typedef __uint_least16_t uint_fast16_t;\n"
48996"#endif /* __int_least16_t */\n"
48997"\n"
48998"\n"
48999"#ifdef __INT8_TYPE__\n"
49000"#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/\n"
49001"typedef __INT8_TYPE__ int8_t;\n"
49002"#endif /* __int8_t_defined */\n"
49003"typedef __UINT8_TYPE__ uint8_t;\n"
49004"# define __int_least8_t int8_t\n"
49005"# define __uint_least8_t uint8_t\n"
49006"#endif /* __INT8_TYPE__ */\n"
49007"\n"
49008"#ifdef __int_least8_t\n"
49009"typedef __int_least8_t int_least8_t;\n"
49010"typedef __uint_least8_t uint_least8_t;\n"
49011"typedef __int_least8_t int_fast8_t;\n"
49012"typedef __uint_least8_t uint_fast8_t;\n"
49013"#endif /* __int_least8_t */\n"
49014"\n"
49015"/* prevent glibc sys/types.h from defining conflicting types */\n"
49016"#ifndef __int8_t_defined\n"
49017"# define __int8_t_defined\n"
49018"#endif /* __int8_t_defined */\n"
49019"\n"
49020"/* C99 7.18.1.4 Integer types capable of holding object pointers.\n"
49021" */\n"
49022"#define __stdint_join3(a,b,c) a ## b ## c\n"
49023"\n"
49024"#ifndef _INTPTR_T\n"
49025"#ifndef __intptr_t_defined\n"
49026"typedef __INTPTR_TYPE__ intptr_t;\n"
49027"#define __intptr_t_defined\n"
49028"#define _INTPTR_T\n"
49029"#endif\n"
49030"#endif\n"
49031"\n"
49032"#ifndef _UINTPTR_T\n"
49033"typedef __UINTPTR_TYPE__ uintptr_t;\n"
49034"#define _UINTPTR_T\n"
49035"#endif\n"
49036"\n"
49037"/* C99 7.18.1.5 Greatest-width integer types.\n"
49038" */\n"
49039"typedef __INTMAX_TYPE__ intmax_t;\n"
49040"typedef __UINTMAX_TYPE__ uintmax_t;\n"
49041"\n"
49042"/* C99 7.18.4 Macros for minimum-width integer constants.\n"
49043" *\n"
49044" * The standard requires that integer constant macros be defined for all the\n"
49045" * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width\n"
49046" * types are required, the corresponding integer constant macros are defined\n"
49047" * here. This implementation also defines minimum-width types for every other\n"
49048" * integer width that the target implements, so corresponding macros are\n"
49049" * defined below, too.\n"
49050" *\n"
49051" * These macros are defined using the same successive-shrinking approach as\n"
49052" * the type definitions above. It is likewise important that macros are defined\n"
49053" * in order of decending width.\n"
49054" *\n"
49055" * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the\n"
49056" * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n"
49057" */\n"
49058"\n"
49059"#define __int_c_join(a, b) a ## b\n"
49060"#define __int_c(v, suffix) __int_c_join(v, suffix)\n"
49061"#define __uint_c(v, suffix) __int_c_join(v##U, suffix)\n"
49062"\n"
49063"\n"
49064"#ifdef __INT64_TYPE__\n"
49065"# ifdef __INT64_C_SUFFIX__\n"
49066"# define __int64_c_suffix __INT64_C_SUFFIX__\n"
49067"# define __int32_c_suffix __INT64_C_SUFFIX__\n"
49068"# define __int16_c_suffix __INT64_C_SUFFIX__\n"
49069"# define __int8_c_suffix __INT64_C_SUFFIX__\n"
49070"# else\n"
49071"# undef __int64_c_suffix\n"
49072"# undef __int32_c_suffix\n"
49073"# undef __int16_c_suffix\n"
49074"# undef __int8_c_suffix\n"
49075"# endif /* __INT64_C_SUFFIX__ */\n"
49076"#endif /* __INT64_TYPE__ */\n"
49077"\n"
49078"#ifdef __int_least64_t\n"
49079"# ifdef __int64_c_suffix\n"
49080"# define INT64_C(v) __int_c(v, __int64_c_suffix)\n"
49081"# define UINT64_C(v) __uint_c(v, __int64_c_suffix)\n"
49082"# else\n"
49083"# define INT64_C(v) v\n"
49084"# define UINT64_C(v) v ## U\n"
49085"# endif /* __int64_c_suffix */\n"
49086"#endif /* __int_least64_t */\n"
49087"\n"
49088"\n"
49089"#ifdef __INT56_TYPE__\n"
49090"# ifdef __INT56_C_SUFFIX__\n"
49091"# define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__)\n"
49092"# define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__)\n"
49093"# define __int32_c_suffix __INT56_C_SUFFIX__\n"
49094"# define __int16_c_suffix __INT56_C_SUFFIX__\n"
49095"# define __int8_c_suffix __INT56_C_SUFFIX__\n"
49096"# else\n"
49097"# define INT56_C(v) v\n"
49098"# define UINT56_C(v) v ## U\n"
49099"# undef __int32_c_suffix\n"
49100"# undef __int16_c_suffix\n"
49101"# undef __int8_c_suffix\n"
49102"# endif /* __INT56_C_SUFFIX__ */\n"
49103"#endif /* __INT56_TYPE__ */\n"
49104"\n"
49105"\n"
49106"#ifdef __INT48_TYPE__\n"
49107"# ifdef __INT48_C_SUFFIX__\n"
49108"# define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__)\n"
49109"# define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__)\n"
49110"# define __int32_c_suffix __INT48_C_SUFFIX__\n"
49111"# define __int16_c_suffix __INT48_C_SUFFIX__\n"
49112"# define __int8_c_suffix __INT48_C_SUFFIX__\n"
49113"# else\n"
49114"# define INT48_C(v) v\n"
49115"# define UINT48_C(v) v ## U\n"
49116"# undef __int32_c_suffix\n"
49117"# undef __int16_c_suffix\n"
49118"# undef __int8_c_suffix\n"
49119"# endif /* __INT48_C_SUFFIX__ */\n"
49120"#endif /* __INT48_TYPE__ */\n"
49121"\n"
49122"\n"
49123"#ifdef __INT40_TYPE__\n"
49124"# ifdef __INT40_C_SUFFIX__\n"
49125"# define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__)\n"
49126"# define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__)\n"
49127"# define __int32_c_suffix __INT40_C_SUFFIX__\n"
49128"# define __int16_c_suffix __INT40_C_SUFFIX__\n"
49129"# define __int8_c_suffix __INT40_C_SUFFIX__\n"
49130"# else\n"
49131"# define INT40_C(v) v\n"
49132"# define UINT40_C(v) v ## U\n"
49133"# undef __int32_c_suffix\n"
49134"# undef __int16_c_suffix\n"
49135"# undef __int8_c_suffix\n"
49136"# endif /* __INT40_C_SUFFIX__ */\n"
49137"#endif /* __INT40_TYPE__ */\n"
49138"\n"
49139"\n"
49140"#ifdef __INT32_TYPE__\n"
49141"# ifdef __INT32_C_SUFFIX__\n"
49142"# define __int32_c_suffix __INT32_C_SUFFIX__\n"
49143"# define __int16_c_suffix __INT32_C_SUFFIX__\n"
49144"# define __int8_c_suffix __INT32_C_SUFFIX__\n"
49145"#else\n"
49146"# undef __int32_c_suffix\n"
49147"# undef __int16_c_suffix\n"
49148"# undef __int8_c_suffix\n"
49149"# endif /* __INT32_C_SUFFIX__ */\n"
49150"#endif /* __INT32_TYPE__ */\n"
49151"\n"
49152"#ifdef __int_least32_t\n"
49153"# ifdef __int32_c_suffix\n"
49154"# define INT32_C(v) __int_c(v, __int32_c_suffix)\n"
49155"# define UINT32_C(v) __uint_c(v, __int32_c_suffix)\n"
49156"# else\n"
49157"# define INT32_C(v) v\n"
49158"# define UINT32_C(v) v ## U\n"
49159"# endif /* __int32_c_suffix */\n"
49160"#endif /* __int_least32_t */\n"
49161"\n"
49162"\n"
49163"#ifdef __INT24_TYPE__\n"
49164"# ifdef __INT24_C_SUFFIX__\n"
49165"# define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__)\n"
49166"# define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__)\n"
49167"# define __int16_c_suffix __INT24_C_SUFFIX__\n"
49168"# define __int8_c_suffix __INT24_C_SUFFIX__\n"
49169"# else\n"
49170"# define INT24_C(v) v\n"
49171"# define UINT24_C(v) v ## U\n"
49172"# undef __int16_c_suffix\n"
49173"# undef __int8_c_suffix\n"
49174"# endif /* __INT24_C_SUFFIX__ */\n"
49175"#endif /* __INT24_TYPE__ */\n"
49176"\n"
49177"\n"
49178"#ifdef __INT16_TYPE__\n"
49179"# ifdef __INT16_C_SUFFIX__\n"
49180"# define __int16_c_suffix __INT16_C_SUFFIX__\n"
49181"# define __int8_c_suffix __INT16_C_SUFFIX__\n"
49182"#else\n"
49183"# undef __int16_c_suffix\n"
49184"# undef __int8_c_suffix\n"
49185"# endif /* __INT16_C_SUFFIX__ */\n"
49186"#endif /* __INT16_TYPE__ */\n"
49187"\n"
49188"#ifdef __int_least16_t\n"
49189"# ifdef __int16_c_suffix\n"
49190"# define INT16_C(v) __int_c(v, __int16_c_suffix)\n"
49191"# define UINT16_C(v) __uint_c(v, __int16_c_suffix)\n"
49192"# else\n"
49193"# define INT16_C(v) v\n"
49194"# define UINT16_C(v) v ## U\n"
49195"# endif /* __int16_c_suffix */\n"
49196"#endif /* __int_least16_t */\n"
49197"\n"
49198"\n"
49199"#ifdef __INT8_TYPE__\n"
49200"# ifdef __INT8_C_SUFFIX__\n"
49201"# define __int8_c_suffix __INT8_C_SUFFIX__\n"
49202"#else\n"
49203"# undef __int8_c_suffix\n"
49204"# endif /* __INT8_C_SUFFIX__ */\n"
49205"#endif /* __INT8_TYPE__ */\n"
49206"\n"
49207"#ifdef __int_least8_t\n"
49208"# ifdef __int8_c_suffix\n"
49209"# define INT8_C(v) __int_c(v, __int8_c_suffix)\n"
49210"# define UINT8_C(v) __uint_c(v, __int8_c_suffix)\n"
49211"# else\n"
49212"# define INT8_C(v) v\n"
49213"# define UINT8_C(v) v ## U\n"
49214"# endif /* __int8_c_suffix */\n"
49215"#endif /* __int_least8_t */\n"
49216"\n"
49217"\n"
49218"/* C99 7.18.2.1 Limits of exact-width integer types.\n"
49219" * C99 7.18.2.2 Limits of minimum-width integer types.\n"
49220" * C99 7.18.2.3 Limits of fastest minimum-width integer types.\n"
49221" *\n"
49222" * The presence of limit macros are completely optional in C99. This\n"
49223" * implementation defines limits for all of the types (exact- and\n"
49224" * minimum-width) that it defines above, using the limits of the minimum-width\n"
49225" * type for any types that do not have exact-width representations.\n"
49226" *\n"
49227" * As in the type definitions, this section takes an approach of\n"
49228" * successive-shrinking to determine which limits to use for the standard (8,\n"
49229" * 16, 32, 64) bit widths when they don't have exact representations. It is\n"
49230" * therefore important that the definitions be kept in order of decending\n"
49231" * widths.\n"
49232" *\n"
49233" * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the\n"
49234" * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n"
49235" */\n"
49236"\n"
49237"#ifdef __INT64_TYPE__\n"
49238"# define INT64_MAX INT64_C( 9223372036854775807)\n"
49239"# define INT64_MIN (-INT64_C( 9223372036854775807)-1)\n"
49240"# define UINT64_MAX UINT64_C(18446744073709551615)\n"
49241"# define __INT_LEAST64_MIN INT64_MIN\n"
49242"# define __INT_LEAST64_MAX INT64_MAX\n"
49243"# define __UINT_LEAST64_MAX UINT64_MAX\n"
49244"# define __INT_LEAST32_MIN INT64_MIN\n"
49245"# define __INT_LEAST32_MAX INT64_MAX\n"
49246"# define __UINT_LEAST32_MAX UINT64_MAX\n"
49247"# define __INT_LEAST16_MIN INT64_MIN\n"
49248"# define __INT_LEAST16_MAX INT64_MAX\n"
49249"# define __UINT_LEAST16_MAX UINT64_MAX\n"
49250"# define __INT_LEAST8_MIN INT64_MIN\n"
49251"# define __INT_LEAST8_MAX INT64_MAX\n"
49252"# define __UINT_LEAST8_MAX UINT64_MAX\n"
49253"#endif /* __INT64_TYPE__ */\n"
49254"\n"
49255"#ifdef __INT_LEAST64_MIN\n"
49256"# define INT_LEAST64_MIN __INT_LEAST64_MIN\n"
49257"# define INT_LEAST64_MAX __INT_LEAST64_MAX\n"
49258"# define UINT_LEAST64_MAX __UINT_LEAST64_MAX\n"
49259"# define INT_FAST64_MIN __INT_LEAST64_MIN\n"
49260"# define INT_FAST64_MAX __INT_LEAST64_MAX\n"
49261"# define UINT_FAST64_MAX __UINT_LEAST64_MAX\n"
49262"#endif /* __INT_LEAST64_MIN */\n"
49263"\n"
49264"\n"
49265"#ifdef __INT56_TYPE__\n"
49266"# define INT56_MAX INT56_C(36028797018963967)\n"
49267"# define INT56_MIN (-INT56_C(36028797018963967)-1)\n"
49268"# define UINT56_MAX UINT56_C(72057594037927935)\n"
49269"# define INT_LEAST56_MIN INT56_MIN\n"
49270"# define INT_LEAST56_MAX INT56_MAX\n"
49271"# define UINT_LEAST56_MAX UINT56_MAX\n"
49272"# define INT_FAST56_MIN INT56_MIN\n"
49273"# define INT_FAST56_MAX INT56_MAX\n"
49274"# define UINT_FAST56_MAX UINT56_MAX\n"
49275"# define __INT_LEAST32_MIN INT56_MIN\n"
49276"# define __INT_LEAST32_MAX INT56_MAX\n"
49277"# define __UINT_LEAST32_MAX UINT56_MAX\n"
49278"# define __INT_LEAST16_MIN INT56_MIN\n"
49279"# define __INT_LEAST16_MAX INT56_MAX\n"
49280"# define __UINT_LEAST16_MAX UINT56_MAX\n"
49281"# define __INT_LEAST8_MIN INT56_MIN\n"
49282"# define __INT_LEAST8_MAX INT56_MAX\n"
49283"# define __UINT_LEAST8_MAX UINT56_MAX\n"
49284"#endif /* __INT56_TYPE__ */\n"
49285"\n"
49286"\n"
49287"#ifdef __INT48_TYPE__\n"
49288"# define INT48_MAX INT48_C(140737488355327)\n"
49289"# define INT48_MIN (-INT48_C(140737488355327)-1)\n"
49290"# define UINT48_MAX UINT48_C(281474976710655)\n"
49291"# define INT_LEAST48_MIN INT48_MIN\n"
49292"# define INT_LEAST48_MAX INT48_MAX\n"
49293"# define UINT_LEAST48_MAX UINT48_MAX\n"
49294"# define INT_FAST48_MIN INT48_MIN\n"
49295"# define INT_FAST48_MAX INT48_MAX\n"
49296"# define UINT_FAST48_MAX UINT48_MAX\n"
49297"# define __INT_LEAST32_MIN INT48_MIN\n"
49298"# define __INT_LEAST32_MAX INT48_MAX\n"
49299"# define __UINT_LEAST32_MAX UINT48_MAX\n"
49300"# define __INT_LEAST16_MIN INT48_MIN\n"
49301"# define __INT_LEAST16_MAX INT48_MAX\n"
49302"# define __UINT_LEAST16_MAX UINT48_MAX\n"
49303"# define __INT_LEAST8_MIN INT48_MIN\n"
49304"# define __INT_LEAST8_MAX INT48_MAX\n"
49305"# define __UINT_LEAST8_MAX UINT48_MAX\n"
49306"#endif /* __INT48_TYPE__ */\n"
49307"\n"
49308"\n"
49309"#ifdef __INT40_TYPE__\n"
49310"# define INT40_MAX INT40_C(549755813887)\n"
49311"# define INT40_MIN (-INT40_C(549755813887)-1)\n"
49312"# define UINT40_MAX UINT40_C(1099511627775)\n"
49313"# define INT_LEAST40_MIN INT40_MIN\n"
49314"# define INT_LEAST40_MAX INT40_MAX\n"
49315"# define UINT_LEAST40_MAX UINT40_MAX\n"
49316"# define INT_FAST40_MIN INT40_MIN\n"
49317"# define INT_FAST40_MAX INT40_MAX\n"
49318"# define UINT_FAST40_MAX UINT40_MAX\n"
49319"# define __INT_LEAST32_MIN INT40_MIN\n"
49320"# define __INT_LEAST32_MAX INT40_MAX\n"
49321"# define __UINT_LEAST32_MAX UINT40_MAX\n"
49322"# define __INT_LEAST16_MIN INT40_MIN\n"
49323"# define __INT_LEAST16_MAX INT40_MAX\n"
49324"# define __UINT_LEAST16_MAX UINT40_MAX\n"
49325"# define __INT_LEAST8_MIN INT40_MIN\n"
49326"# define __INT_LEAST8_MAX INT40_MAX\n"
49327"# define __UINT_LEAST8_MAX UINT40_MAX\n"
49328"#endif /* __INT40_TYPE__ */\n"
49329"\n"
49330"\n"
49331"#ifdef __INT32_TYPE__\n"
49332"# define INT32_MAX INT32_C(2147483647)\n"
49333"# define INT32_MIN (-INT32_C(2147483647)-1)\n"
49334"# define UINT32_MAX UINT32_C(4294967295)\n"
49335"# define __INT_LEAST32_MIN INT32_MIN\n"
49336"# define __INT_LEAST32_MAX INT32_MAX\n"
49337"# define __UINT_LEAST32_MAX UINT32_MAX\n"
49338"# define __INT_LEAST16_MIN INT32_MIN\n"
49339"# define __INT_LEAST16_MAX INT32_MAX\n"
49340"# define __UINT_LEAST16_MAX UINT32_MAX\n"
49341"# define __INT_LEAST8_MIN INT32_MIN\n"
49342"# define __INT_LEAST8_MAX INT32_MAX\n"
49343"# define __UINT_LEAST8_MAX UINT32_MAX\n"
49344"#endif /* __INT32_TYPE__ */\n"
49345"\n"
49346"#ifdef __INT_LEAST32_MIN\n"
49347"# define INT_LEAST32_MIN __INT_LEAST32_MIN\n"
49348"# define INT_LEAST32_MAX __INT_LEAST32_MAX\n"
49349"# define UINT_LEAST32_MAX __UINT_LEAST32_MAX\n"
49350"# define INT_FAST32_MIN __INT_LEAST32_MIN\n"
49351"# define INT_FAST32_MAX __INT_LEAST32_MAX\n"
49352"# define UINT_FAST32_MAX __UINT_LEAST32_MAX\n"
49353"#endif /* __INT_LEAST32_MIN */\n"
49354"\n"
49355"\n"
49356"#ifdef __INT24_TYPE__\n"
49357"# define INT24_MAX INT24_C(8388607)\n"
49358"# define INT24_MIN (-INT24_C(8388607)-1)\n"
49359"# define UINT24_MAX UINT24_C(16777215)\n"
49360"# define INT_LEAST24_MIN INT24_MIN\n"
49361"# define INT_LEAST24_MAX INT24_MAX\n"
49362"# define UINT_LEAST24_MAX UINT24_MAX\n"
49363"# define INT_FAST24_MIN INT24_MIN\n"
49364"# define INT_FAST24_MAX INT24_MAX\n"
49365"# define UINT_FAST24_MAX UINT24_MAX\n"
49366"# define __INT_LEAST16_MIN INT24_MIN\n"
49367"# define __INT_LEAST16_MAX INT24_MAX\n"
49368"# define __UINT_LEAST16_MAX UINT24_MAX\n"
49369"# define __INT_LEAST8_MIN INT24_MIN\n"
49370"# define __INT_LEAST8_MAX INT24_MAX\n"
49371"# define __UINT_LEAST8_MAX UINT24_MAX\n"
49372"#endif /* __INT24_TYPE__ */\n"
49373"\n"
49374"\n"
49375"#ifdef __INT16_TYPE__\n"
49376"#define INT16_MAX INT16_C(32767)\n"
49377"#define INT16_MIN (-INT16_C(32767)-1)\n"
49378"#define UINT16_MAX UINT16_C(65535)\n"
49379"# define __INT_LEAST16_MIN INT16_MIN\n"
49380"# define __INT_LEAST16_MAX INT16_MAX\n"
49381"# define __UINT_LEAST16_MAX UINT16_MAX\n"
49382"# define __INT_LEAST8_MIN INT16_MIN\n"
49383"# define __INT_LEAST8_MAX INT16_MAX\n"
49384"# define __UINT_LEAST8_MAX UINT16_MAX\n"
49385"#endif /* __INT16_TYPE__ */\n"
49386"\n"
49387"#ifdef __INT_LEAST16_MIN\n"
49388"# define INT_LEAST16_MIN __INT_LEAST16_MIN\n"
49389"# define INT_LEAST16_MAX __INT_LEAST16_MAX\n"
49390"# define UINT_LEAST16_MAX __UINT_LEAST16_MAX\n"
49391"# define INT_FAST16_MIN __INT_LEAST16_MIN\n"
49392"# define INT_FAST16_MAX __INT_LEAST16_MAX\n"
49393"# define UINT_FAST16_MAX __UINT_LEAST16_MAX\n"
49394"#endif /* __INT_LEAST16_MIN */\n"
49395"\n"
49396"\n"
49397"#ifdef __INT8_TYPE__\n"
49398"# define INT8_MAX INT8_C(127)\n"
49399"# define INT8_MIN (-INT8_C(127)-1)\n"
49400"# define UINT8_MAX UINT8_C(255)\n"
49401"# define __INT_LEAST8_MIN INT8_MIN\n"
49402"# define __INT_LEAST8_MAX INT8_MAX\n"
49403"# define __UINT_LEAST8_MAX UINT8_MAX\n"
49404"#endif /* __INT8_TYPE__ */\n"
49405"\n"
49406"#ifdef __INT_LEAST8_MIN\n"
49407"# define INT_LEAST8_MIN __INT_LEAST8_MIN\n"
49408"# define INT_LEAST8_MAX __INT_LEAST8_MAX\n"
49409"# define UINT_LEAST8_MAX __UINT_LEAST8_MAX\n"
49410"# define INT_FAST8_MIN __INT_LEAST8_MIN\n"
49411"# define INT_FAST8_MAX __INT_LEAST8_MAX\n"
49412"# define UINT_FAST8_MAX __UINT_LEAST8_MAX\n"
49413"#endif /* __INT_LEAST8_MIN */\n"
49414"\n"
49415"/* Some utility macros */\n"
49416"#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN)\n"
49417"#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX)\n"
49418"#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)\n"
49419"#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v))\n"
49420"#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))\n"
49421"\n"
49422"/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */\n"
49423"/* C99 7.18.3 Limits of other integer types. */\n"
49424"\n"
49425"#define INTPTR_MIN (-__INTPTR_MAX__-1)\n"
49426"#define INTPTR_MAX __INTPTR_MAX__\n"
49427"#define UINTPTR_MAX __UINTPTR_MAX__\n"
49428"#define PTRDIFF_MIN (-__PTRDIFF_MAX__-1)\n"
49429"#define PTRDIFF_MAX __PTRDIFF_MAX__\n"
49430"#define SIZE_MAX __SIZE_MAX__\n"
49431"\n"
49432"/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__\n"
49433" * is enabled. */\n"
49434"#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1\n"
49435"#define RSIZE_MAX (SIZE_MAX >> 1)\n"
49436"#endif\n"
49437"\n"
49438"/* C99 7.18.2.5 Limits of greatest-width integer types. */\n"
49439"#define INTMAX_MIN (-__INTMAX_MAX__-1)\n"
49440"#define INTMAX_MAX __INTMAX_MAX__\n"
49441"#define UINTMAX_MAX __UINTMAX_MAX__\n"
49442"\n"
49443"/* C99 7.18.3 Limits of other integer types. */\n"
49444"#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)\n"
49445"#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)\n"
49446"#ifdef __WINT_UNSIGNED__\n"
49447"# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)\n"
49448"# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)\n"
49449"#else\n"
49450"# define WINT_MIN __INTN_MIN(__WINT_WIDTH__)\n"
49451"# define WINT_MAX __INTN_MAX(__WINT_WIDTH__)\n"
49452"#endif\n"
49453"\n"
49454"#ifndef WCHAR_MAX\n"
49455"# define WCHAR_MAX __WCHAR_MAX__\n"
49456"#endif\n"
49457"#ifndef WCHAR_MIN\n"
49458"# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)\n"
49459"# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)\n"
49460"# else\n"
49461"# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)\n"
49462"# endif\n"
49463"#endif\n"
49464"\n"
49465"/* 7.18.4.2 Macros for greatest-width integer constants. */\n"
49466"#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)\n"
49467"#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)\n"
49468"\n"
49469"#endif /* __STDC_HOSTED__ */\n"
49470"#endif /* __CLANG_STDINT_H2 */\n"
49471"" } ,
49472 { "/builtins/stdnoreturn.h" , "/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------===\n"
49473" *\n"
49474" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
49475" * of this software and associated documentation files (the \"Software\"), to deal\n"
49476" * in the Software without restriction, including without limitation the rights\n"
49477" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
49478" * copies of the Software, and to permit persons to whom the Software is\n"
49479" * furnished to do so, subject to the following conditions:\n"
49480" *\n"
49481" * The above copyright notice and this permission notice shall be included in\n"
49482" * all copies or substantial portions of the Software.\n"
49483" *\n"
49484" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
49485" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
49486" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
49487" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
49488" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
49489" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
49490" * THE SOFTWARE.\n"
49491" *\n"
49492" *===-----------------------------------------------------------------------===\n"
49493" */\n"
49494"\n"
49495"#ifndef __STDNORETURN_H\n"
49496"#define __STDNORETURN_H\n"
49497"\n"
49498"#define noreturn _Noreturn\n"
49499"#define __noreturn_is_defined 1\n"
49500"\n"
49501"#endif /* __STDNORETURN_H */\n"
49502"" } ,
49503 { "/builtins/tbmintrin.h" , "/*===---- tbmintrin.h - TBM intrinsics -------------------------------------===\n"
49504" *\n"
49505" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
49506" * of this software and associated documentation files (the \"Software\"), to deal\n"
49507" * in the Software without restriction, including without limitation the rights\n"
49508" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
49509" * copies of the Software, and to permit persons to whom the Software is\n"
49510" * furnished to do so, subject to the following conditions:\n"
49511" *\n"
49512" * The above copyright notice and this permission notice shall be included in\n"
49513" * all copies or substantial portions of the Software.\n"
49514" *\n"
49515" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
49516" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
49517" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
49518" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
49519" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
49520" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
49521" * THE SOFTWARE.\n"
49522" *\n"
49523" *===-----------------------------------------------------------------------===\n"
49524" */\n"
49525"\n"
49526"#ifndef __X86INTRIN_H\n"
49527"#error \"Never use <tbmintrin.h> directly; include <x86intrin.h> instead.\"\n"
49528"#endif\n"
49529"\n"
49530"#ifndef __TBMINTRIN_H\n"
49531"#define __TBMINTRIN_H\n"
49532"\n"
49533"/* Define the default attributes for the functions in this file. */\n"
49534"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"tbm\")))\n"
49535"\n"
49536"#define __bextri_u32(a, b) \\\n"
49537" ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \\\n"
49538" (unsigned int)(b)))\n"
49539"\n"
49540"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49541"__blcfill_u32(unsigned int __a)\n"
49542"{\n"
49543" return __a & (__a + 1);\n"
49544"}\n"
49545"\n"
49546"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49547"__blci_u32(unsigned int __a)\n"
49548"{\n"
49549" return __a | ~(__a + 1);\n"
49550"}\n"
49551"\n"
49552"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49553"__blcic_u32(unsigned int __a)\n"
49554"{\n"
49555" return ~__a & (__a + 1);\n"
49556"}\n"
49557"\n"
49558"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49559"__blcmsk_u32(unsigned int __a)\n"
49560"{\n"
49561" return __a ^ (__a + 1);\n"
49562"}\n"
49563"\n"
49564"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49565"__blcs_u32(unsigned int __a)\n"
49566"{\n"
49567" return __a | (__a + 1);\n"
49568"}\n"
49569"\n"
49570"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49571"__blsfill_u32(unsigned int __a)\n"
49572"{\n"
49573" return __a | (__a - 1);\n"
49574"}\n"
49575"\n"
49576"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49577"__blsic_u32(unsigned int __a)\n"
49578"{\n"
49579" return ~__a | (__a - 1);\n"
49580"}\n"
49581"\n"
49582"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49583"__t1mskc_u32(unsigned int __a)\n"
49584"{\n"
49585" return ~__a | (__a + 1);\n"
49586"}\n"
49587"\n"
49588"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49589"__tzmsk_u32(unsigned int __a)\n"
49590"{\n"
49591" return ~__a & (__a - 1);\n"
49592"}\n"
49593"\n"
49594"#ifdef __x86_64__\n"
49595"#define __bextri_u64(a, b) \\\n"
49596" ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \\\n"
49597" (unsigned long long)(b)))\n"
49598"\n"
49599"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49600"__blcfill_u64(unsigned long long __a)\n"
49601"{\n"
49602" return __a & (__a + 1);\n"
49603"}\n"
49604"\n"
49605"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49606"__blci_u64(unsigned long long __a)\n"
49607"{\n"
49608" return __a | ~(__a + 1);\n"
49609"}\n"
49610"\n"
49611"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49612"__blcic_u64(unsigned long long __a)\n"
49613"{\n"
49614" return ~__a & (__a + 1);\n"
49615"}\n"
49616"\n"
49617"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49618"__blcmsk_u64(unsigned long long __a)\n"
49619"{\n"
49620" return __a ^ (__a + 1);\n"
49621"}\n"
49622"\n"
49623"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49624"__blcs_u64(unsigned long long __a)\n"
49625"{\n"
49626" return __a | (__a + 1);\n"
49627"}\n"
49628"\n"
49629"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49630"__blsfill_u64(unsigned long long __a)\n"
49631"{\n"
49632" return __a | (__a - 1);\n"
49633"}\n"
49634"\n"
49635"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49636"__blsic_u64(unsigned long long __a)\n"
49637"{\n"
49638" return ~__a | (__a - 1);\n"
49639"}\n"
49640"\n"
49641"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49642"__t1mskc_u64(unsigned long long __a)\n"
49643"{\n"
49644" return ~__a | (__a + 1);\n"
49645"}\n"
49646"\n"
49647"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49648"__tzmsk_u64(unsigned long long __a)\n"
49649"{\n"
49650" return ~__a & (__a - 1);\n"
49651"}\n"
49652"#endif\n"
49653"\n"
49654"#undef __DEFAULT_FN_ATTRS\n"
49655"\n"
49656"#endif /* __TBMINTRIN_H */\n"
49657"" } ,
49658 { "/builtins/tgmath.h" , "/*===---- tgmath.h - Standard header for type generic math ----------------===*\\\n"
49659" *\n"
49660" * Copyright (c) 2009 Howard Hinnant\n"
49661" *\n"
49662" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
49663" * of this software and associated documentation files (the \"Software\"), to deal\n"
49664" * in the Software without restriction, including without limitation the rights\n"
49665" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
49666" * copies of the Software, and to permit persons to whom the Software is\n"
49667" * furnished to do so, subject to the following conditions:\n"
49668" *\n"
49669" * The above copyright notice and this permission notice shall be included in\n"
49670" * all copies or substantial portions of the Software.\n"
49671" *\n"
49672" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
49673" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
49674" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
49675" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
49676" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
49677" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
49678" * THE SOFTWARE.\n"
49679" *\n"
49680"\\*===----------------------------------------------------------------------===*/\n"
49681"\n"
49682"#ifndef __CLANG_TGMATH_H\n"
49683"#define __CLANG_TGMATH_H\n"
49684"\n"
49685"/* C99 7.22 Type-generic math <tgmath.h>. */\n"
49686"#include <math.h>\n"
49687"\n"
49688"/*\n"
49689" * Allow additional definitions and implementation-defined values on Apple\n"
49690" * platforms. This is done after #include <math.h> to avoid depcycle conflicts\n"
49691" * between libcxx and darwin in C++ modules builds.\n"
49692" */\n"
49693"#if defined(__APPLE__) && __STDC_HOSTED__ && __has_include_next(<tgmath.h>)\n"
49694"# include_next <tgmath.h>\n"
49695"#else\n"
49696"\n"
49697"/* C++ handles type genericity with overloading in math.h. */\n"
49698"#ifndef __cplusplus\n"
49699"#include <complex.h>\n"
49700"\n"
49701"#define _TG_ATTRSp __attribute__((__overloadable__))\n"
49702"#define _TG_ATTRS __attribute__((__overloadable__, __always_inline__))\n"
49703"\n"
49704"// promotion\n"
49705"\n"
49706"typedef void _Argument_type_is_not_arithmetic;\n"
49707"static _Argument_type_is_not_arithmetic __tg_promote(...)\n"
49708" __attribute__((__unavailable__,__overloadable__));\n"
49709"static double _TG_ATTRSp __tg_promote(int);\n"
49710"static double _TG_ATTRSp __tg_promote(unsigned int);\n"
49711"static double _TG_ATTRSp __tg_promote(long);\n"
49712"static double _TG_ATTRSp __tg_promote(unsigned long);\n"
49713"static double _TG_ATTRSp __tg_promote(long long);\n"
49714"static double _TG_ATTRSp __tg_promote(unsigned long long);\n"
49715"static float _TG_ATTRSp __tg_promote(float);\n"
49716"static double _TG_ATTRSp __tg_promote(double);\n"
49717"static long double _TG_ATTRSp __tg_promote(long double);\n"
49718"static float _Complex _TG_ATTRSp __tg_promote(float _Complex);\n"
49719"static double _Complex _TG_ATTRSp __tg_promote(double _Complex);\n"
49720"static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex);\n"
49721"\n"
49722"#define __tg_promote1(__x) (__typeof__(__tg_promote(__x)))\n"
49723"#define __tg_promote2(__x, __y) (__typeof__(__tg_promote(__x) + \\\n"
49724" __tg_promote(__y)))\n"
49725"#define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \\\n"
49726" __tg_promote(__y) + \\\n"
49727" __tg_promote(__z)))\n"
49728"\n"
49729"// acos\n"
49730"\n"
49731"static float\n"
49732" _TG_ATTRS\n"
49733" __tg_acos(float __x) {return acosf(__x);}\n"
49734"\n"
49735"static double\n"
49736" _TG_ATTRS\n"
49737" __tg_acos(double __x) {return acos(__x);}\n"
49738"\n"
49739"static long double\n"
49740" _TG_ATTRS\n"
49741" __tg_acos(long double __x) {return acosl(__x);}\n"
49742"\n"
49743"static float _Complex\n"
49744" _TG_ATTRS\n"
49745" __tg_acos(float _Complex __x) {return cacosf(__x);}\n"
49746"\n"
49747"static double _Complex\n"
49748" _TG_ATTRS\n"
49749" __tg_acos(double _Complex __x) {return cacos(__x);}\n"
49750"\n"
49751"static long double _Complex\n"
49752" _TG_ATTRS\n"
49753" __tg_acos(long double _Complex __x) {return cacosl(__x);}\n"
49754"\n"
49755"#undef acos\n"
49756"#define acos(__x) __tg_acos(__tg_promote1((__x))(__x))\n"
49757"\n"
49758"// asin\n"
49759"\n"
49760"static float\n"
49761" _TG_ATTRS\n"
49762" __tg_asin(float __x) {return asinf(__x);}\n"
49763"\n"
49764"static double\n"
49765" _TG_ATTRS\n"
49766" __tg_asin(double __x) {return asin(__x);}\n"
49767"\n"
49768"static long double\n"
49769" _TG_ATTRS\n"
49770" __tg_asin(long double __x) {return asinl(__x);}\n"
49771"\n"
49772"static float _Complex\n"
49773" _TG_ATTRS\n"
49774" __tg_asin(float _Complex __x) {return casinf(__x);}\n"
49775"\n"
49776"static double _Complex\n"
49777" _TG_ATTRS\n"
49778" __tg_asin(double _Complex __x) {return casin(__x);}\n"
49779"\n"
49780"static long double _Complex\n"
49781" _TG_ATTRS\n"
49782" __tg_asin(long double _Complex __x) {return casinl(__x);}\n"
49783"\n"
49784"#undef asin\n"
49785"#define asin(__x) __tg_asin(__tg_promote1((__x))(__x))\n"
49786"\n"
49787"// atan\n"
49788"\n"
49789"static float\n"
49790" _TG_ATTRS\n"
49791" __tg_atan(float __x) {return atanf(__x);}\n"
49792"\n"
49793"static double\n"
49794" _TG_ATTRS\n"
49795" __tg_atan(double __x) {return atan(__x);}\n"
49796"\n"
49797"static long double\n"
49798" _TG_ATTRS\n"
49799" __tg_atan(long double __x) {return atanl(__x);}\n"
49800"\n"
49801"static float _Complex\n"
49802" _TG_ATTRS\n"
49803" __tg_atan(float _Complex __x) {return catanf(__x);}\n"
49804"\n"
49805"static double _Complex\n"
49806" _TG_ATTRS\n"
49807" __tg_atan(double _Complex __x) {return catan(__x);}\n"
49808"\n"
49809"static long double _Complex\n"
49810" _TG_ATTRS\n"
49811" __tg_atan(long double _Complex __x) {return catanl(__x);}\n"
49812"\n"
49813"#undef atan\n"
49814"#define atan(__x) __tg_atan(__tg_promote1((__x))(__x))\n"
49815"\n"
49816"// acosh\n"
49817"\n"
49818"static float\n"
49819" _TG_ATTRS\n"
49820" __tg_acosh(float __x) {return acoshf(__x);}\n"
49821"\n"
49822"static double\n"
49823" _TG_ATTRS\n"
49824" __tg_acosh(double __x) {return acosh(__x);}\n"
49825"\n"
49826"static long double\n"
49827" _TG_ATTRS\n"
49828" __tg_acosh(long double __x) {return acoshl(__x);}\n"
49829"\n"
49830"static float _Complex\n"
49831" _TG_ATTRS\n"
49832" __tg_acosh(float _Complex __x) {return cacoshf(__x);}\n"
49833"\n"
49834"static double _Complex\n"
49835" _TG_ATTRS\n"
49836" __tg_acosh(double _Complex __x) {return cacosh(__x);}\n"
49837"\n"
49838"static long double _Complex\n"
49839" _TG_ATTRS\n"
49840" __tg_acosh(long double _Complex __x) {return cacoshl(__x);}\n"
49841"\n"
49842"#undef acosh\n"
49843"#define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x))\n"
49844"\n"
49845"// asinh\n"
49846"\n"
49847"static float\n"
49848" _TG_ATTRS\n"
49849" __tg_asinh(float __x) {return asinhf(__x);}\n"
49850"\n"
49851"static double\n"
49852" _TG_ATTRS\n"
49853" __tg_asinh(double __x) {return asinh(__x);}\n"
49854"\n"
49855"static long double\n"
49856" _TG_ATTRS\n"
49857" __tg_asinh(long double __x) {return asinhl(__x);}\n"
49858"\n"
49859"static float _Complex\n"
49860" _TG_ATTRS\n"
49861" __tg_asinh(float _Complex __x) {return casinhf(__x);}\n"
49862"\n"
49863"static double _Complex\n"
49864" _TG_ATTRS\n"
49865" __tg_asinh(double _Complex __x) {return casinh(__x);}\n"
49866"\n"
49867"static long double _Complex\n"
49868" _TG_ATTRS\n"
49869" __tg_asinh(long double _Complex __x) {return casinhl(__x);}\n"
49870"\n"
49871"#undef asinh\n"
49872"#define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x))\n"
49873"\n"
49874"// atanh\n"
49875"\n"
49876"static float\n"
49877" _TG_ATTRS\n"
49878" __tg_atanh(float __x) {return atanhf(__x);}\n"
49879"\n"
49880"static double\n"
49881" _TG_ATTRS\n"
49882" __tg_atanh(double __x) {return atanh(__x);}\n"
49883"\n"
49884"static long double\n"
49885" _TG_ATTRS\n"
49886" __tg_atanh(long double __x) {return atanhl(__x);}\n"
49887"\n"
49888"static float _Complex\n"
49889" _TG_ATTRS\n"
49890" __tg_atanh(float _Complex __x) {return catanhf(__x);}\n"
49891"\n"
49892"static double _Complex\n"
49893" _TG_ATTRS\n"
49894" __tg_atanh(double _Complex __x) {return catanh(__x);}\n"
49895"\n"
49896"static long double _Complex\n"
49897" _TG_ATTRS\n"
49898" __tg_atanh(long double _Complex __x) {return catanhl(__x);}\n"
49899"\n"
49900"#undef atanh\n"
49901"#define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x))\n"
49902"\n"
49903"// cos\n"
49904"\n"
49905"static float\n"
49906" _TG_ATTRS\n"
49907" __tg_cos(float __x) {return cosf(__x);}\n"
49908"\n"
49909"static double\n"
49910" _TG_ATTRS\n"
49911" __tg_cos(double __x) {return cos(__x);}\n"
49912"\n"
49913"static long double\n"
49914" _TG_ATTRS\n"
49915" __tg_cos(long double __x) {return cosl(__x);}\n"
49916"\n"
49917"static float _Complex\n"
49918" _TG_ATTRS\n"
49919" __tg_cos(float _Complex __x) {return ccosf(__x);}\n"
49920"\n"
49921"static double _Complex\n"
49922" _TG_ATTRS\n"
49923" __tg_cos(double _Complex __x) {return ccos(__x);}\n"
49924"\n"
49925"static long double _Complex\n"
49926" _TG_ATTRS\n"
49927" __tg_cos(long double _Complex __x) {return ccosl(__x);}\n"
49928"\n"
49929"#undef cos\n"
49930"#define cos(__x) __tg_cos(__tg_promote1((__x))(__x))\n"
49931"\n"
49932"// sin\n"
49933"\n"
49934"static float\n"
49935" _TG_ATTRS\n"
49936" __tg_sin(float __x) {return sinf(__x);}\n"
49937"\n"
49938"static double\n"
49939" _TG_ATTRS\n"
49940" __tg_sin(double __x) {return sin(__x);}\n"
49941"\n"
49942"static long double\n"
49943" _TG_ATTRS\n"
49944" __tg_sin(long double __x) {return sinl(__x);}\n"
49945"\n"
49946"static float _Complex\n"
49947" _TG_ATTRS\n"
49948" __tg_sin(float _Complex __x) {return csinf(__x);}\n"
49949"\n"
49950"static double _Complex\n"
49951" _TG_ATTRS\n"
49952" __tg_sin(double _Complex __x) {return csin(__x);}\n"
49953"\n"
49954"static long double _Complex\n"
49955" _TG_ATTRS\n"
49956" __tg_sin(long double _Complex __x) {return csinl(__x);}\n"
49957"\n"
49958"#undef sin\n"
49959"#define sin(__x) __tg_sin(__tg_promote1((__x))(__x))\n"
49960"\n"
49961"// tan\n"
49962"\n"
49963"static float\n"
49964" _TG_ATTRS\n"
49965" __tg_tan(float __x) {return tanf(__x);}\n"
49966"\n"
49967"static double\n"
49968" _TG_ATTRS\n"
49969" __tg_tan(double __x) {return tan(__x);}\n"
49970"\n"
49971"static long double\n"
49972" _TG_ATTRS\n"
49973" __tg_tan(long double __x) {return tanl(__x);}\n"
49974"\n"
49975"static float _Complex\n"
49976" _TG_ATTRS\n"
49977" __tg_tan(float _Complex __x) {return ctanf(__x);}\n"
49978"\n"
49979"static double _Complex\n"
49980" _TG_ATTRS\n"
49981" __tg_tan(double _Complex __x) {return ctan(__x);}\n"
49982"\n"
49983"static long double _Complex\n"
49984" _TG_ATTRS\n"
49985" __tg_tan(long double _Complex __x) {return ctanl(__x);}\n"
49986"\n"
49987"#undef tan\n"
49988"#define tan(__x) __tg_tan(__tg_promote1((__x))(__x))\n"
49989"\n"
49990"// cosh\n"
49991"\n"
49992"static float\n"
49993" _TG_ATTRS\n"
49994" __tg_cosh(float __x) {return coshf(__x);}\n"
49995"\n"
49996"static double\n"
49997" _TG_ATTRS\n"
49998" __tg_cosh(double __x) {return cosh(__x);}\n"
49999"\n"
50000"static long double\n"
50001" _TG_ATTRS\n"
50002" __tg_cosh(long double __x) {return coshl(__x);}\n"
50003"\n"
50004"static float _Complex\n"
50005" _TG_ATTRS\n"
50006" __tg_cosh(float _Complex __x) {return ccoshf(__x);}\n"
50007"\n"
50008"static double _Complex\n"
50009" _TG_ATTRS\n"
50010" __tg_cosh(double _Complex __x) {return ccosh(__x);}\n"
50011"\n"
50012"static long double _Complex\n"
50013" _TG_ATTRS\n"
50014" __tg_cosh(long double _Complex __x) {return ccoshl(__x);}\n"
50015"\n"
50016"#undef cosh\n"
50017"#define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x))\n"
50018"\n"
50019"// sinh\n"
50020"\n"
50021"static float\n"
50022" _TG_ATTRS\n"
50023" __tg_sinh(float __x) {return sinhf(__x);}\n"
50024"\n"
50025"static double\n"
50026" _TG_ATTRS\n"
50027" __tg_sinh(double __x) {return sinh(__x);}\n"
50028"\n"
50029"static long double\n"
50030" _TG_ATTRS\n"
50031" __tg_sinh(long double __x) {return sinhl(__x);}\n"
50032"\n"
50033"static float _Complex\n"
50034" _TG_ATTRS\n"
50035" __tg_sinh(float _Complex __x) {return csinhf(__x);}\n"
50036"\n"
50037"static double _Complex\n"
50038" _TG_ATTRS\n"
50039" __tg_sinh(double _Complex __x) {return csinh(__x);}\n"
50040"\n"
50041"static long double _Complex\n"
50042" _TG_ATTRS\n"
50043" __tg_sinh(long double _Complex __x) {return csinhl(__x);}\n"
50044"\n"
50045"#undef sinh\n"
50046"#define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x))\n"
50047"\n"
50048"// tanh\n"
50049"\n"
50050"static float\n"
50051" _TG_ATTRS\n"
50052" __tg_tanh(float __x) {return tanhf(__x);}\n"
50053"\n"
50054"static double\n"
50055" _TG_ATTRS\n"
50056" __tg_tanh(double __x) {return tanh(__x);}\n"
50057"\n"
50058"static long double\n"
50059" _TG_ATTRS\n"
50060" __tg_tanh(long double __x) {return tanhl(__x);}\n"
50061"\n"
50062"static float _Complex\n"
50063" _TG_ATTRS\n"
50064" __tg_tanh(float _Complex __x) {return ctanhf(__x);}\n"
50065"\n"
50066"static double _Complex\n"
50067" _TG_ATTRS\n"
50068" __tg_tanh(double _Complex __x) {return ctanh(__x);}\n"
50069"\n"
50070"static long double _Complex\n"
50071" _TG_ATTRS\n"
50072" __tg_tanh(long double _Complex __x) {return ctanhl(__x);}\n"
50073"\n"
50074"#undef tanh\n"
50075"#define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x))\n"
50076"\n"
50077"// exp\n"
50078"\n"
50079"static float\n"
50080" _TG_ATTRS\n"
50081" __tg_exp(float __x) {return expf(__x);}\n"
50082"\n"
50083"static double\n"
50084" _TG_ATTRS\n"
50085" __tg_exp(double __x) {return exp(__x);}\n"
50086"\n"
50087"static long double\n"
50088" _TG_ATTRS\n"
50089" __tg_exp(long double __x) {return expl(__x);}\n"
50090"\n"
50091"static float _Complex\n"
50092" _TG_ATTRS\n"
50093" __tg_exp(float _Complex __x) {return cexpf(__x);}\n"
50094"\n"
50095"static double _Complex\n"
50096" _TG_ATTRS\n"
50097" __tg_exp(double _Complex __x) {return cexp(__x);}\n"
50098"\n"
50099"static long double _Complex\n"
50100" _TG_ATTRS\n"
50101" __tg_exp(long double _Complex __x) {return cexpl(__x);}\n"
50102"\n"
50103"#undef exp\n"
50104"#define exp(__x) __tg_exp(__tg_promote1((__x))(__x))\n"
50105"\n"
50106"// log\n"
50107"\n"
50108"static float\n"
50109" _TG_ATTRS\n"
50110" __tg_log(float __x) {return logf(__x);}\n"
50111"\n"
50112"static double\n"
50113" _TG_ATTRS\n"
50114" __tg_log(double __x) {return log(__x);}\n"
50115"\n"
50116"static long double\n"
50117" _TG_ATTRS\n"
50118" __tg_log(long double __x) {return logl(__x);}\n"
50119"\n"
50120"static float _Complex\n"
50121" _TG_ATTRS\n"
50122" __tg_log(float _Complex __x) {return clogf(__x);}\n"
50123"\n"
50124"static double _Complex\n"
50125" _TG_ATTRS\n"
50126" __tg_log(double _Complex __x) {return clog(__x);}\n"
50127"\n"
50128"static long double _Complex\n"
50129" _TG_ATTRS\n"
50130" __tg_log(long double _Complex __x) {return clogl(__x);}\n"
50131"\n"
50132"#undef log\n"
50133"#define log(__x) __tg_log(__tg_promote1((__x))(__x))\n"
50134"\n"
50135"// pow\n"
50136"\n"
50137"static float\n"
50138" _TG_ATTRS\n"
50139" __tg_pow(float __x, float __y) {return powf(__x, __y);}\n"
50140"\n"
50141"static double\n"
50142" _TG_ATTRS\n"
50143" __tg_pow(double __x, double __y) {return pow(__x, __y);}\n"
50144"\n"
50145"static long double\n"
50146" _TG_ATTRS\n"
50147" __tg_pow(long double __x, long double __y) {return powl(__x, __y);}\n"
50148"\n"
50149"static float _Complex\n"
50150" _TG_ATTRS\n"
50151" __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);}\n"
50152"\n"
50153"static double _Complex\n"
50154" _TG_ATTRS\n"
50155" __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);}\n"
50156"\n"
50157"static long double _Complex\n"
50158" _TG_ATTRS\n"
50159" __tg_pow(long double _Complex __x, long double _Complex __y)\n"
50160" {return cpowl(__x, __y);}\n"
50161"\n"
50162"#undef pow\n"
50163"#define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \\\n"
50164" __tg_promote2((__x), (__y))(__y))\n"
50165"\n"
50166"// sqrt\n"
50167"\n"
50168"static float\n"
50169" _TG_ATTRS\n"
50170" __tg_sqrt(float __x) {return sqrtf(__x);}\n"
50171"\n"
50172"static double\n"
50173" _TG_ATTRS\n"
50174" __tg_sqrt(double __x) {return sqrt(__x);}\n"
50175"\n"
50176"static long double\n"
50177" _TG_ATTRS\n"
50178" __tg_sqrt(long double __x) {return sqrtl(__x);}\n"
50179"\n"
50180"static float _Complex\n"
50181" _TG_ATTRS\n"
50182" __tg_sqrt(float _Complex __x) {return csqrtf(__x);}\n"
50183"\n"
50184"static double _Complex\n"
50185" _TG_ATTRS\n"
50186" __tg_sqrt(double _Complex __x) {return csqrt(__x);}\n"
50187"\n"
50188"static long double _Complex\n"
50189" _TG_ATTRS\n"
50190" __tg_sqrt(long double _Complex __x) {return csqrtl(__x);}\n"
50191"\n"
50192"#undef sqrt\n"
50193"#define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x))\n"
50194"\n"
50195"// fabs\n"
50196"\n"
50197"static float\n"
50198" _TG_ATTRS\n"
50199" __tg_fabs(float __x) {return fabsf(__x);}\n"
50200"\n"
50201"static double\n"
50202" _TG_ATTRS\n"
50203" __tg_fabs(double __x) {return fabs(__x);}\n"
50204"\n"
50205"static long double\n"
50206" _TG_ATTRS\n"
50207" __tg_fabs(long double __x) {return fabsl(__x);}\n"
50208"\n"
50209"static float\n"
50210" _TG_ATTRS\n"
50211" __tg_fabs(float _Complex __x) {return cabsf(__x);}\n"
50212"\n"
50213"static double\n"
50214" _TG_ATTRS\n"
50215" __tg_fabs(double _Complex __x) {return cabs(__x);}\n"
50216"\n"
50217"static long double\n"
50218" _TG_ATTRS\n"
50219" __tg_fabs(long double _Complex __x) {return cabsl(__x);}\n"
50220"\n"
50221"#undef fabs\n"
50222"#define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x))\n"
50223"\n"
50224"// atan2\n"
50225"\n"
50226"static float\n"
50227" _TG_ATTRS\n"
50228" __tg_atan2(float __x, float __y) {return atan2f(__x, __y);}\n"
50229"\n"
50230"static double\n"
50231" _TG_ATTRS\n"
50232" __tg_atan2(double __x, double __y) {return atan2(__x, __y);}\n"
50233"\n"
50234"static long double\n"
50235" _TG_ATTRS\n"
50236" __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);}\n"
50237"\n"
50238"#undef atan2\n"
50239"#define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \\\n"
50240" __tg_promote2((__x), (__y))(__y))\n"
50241"\n"
50242"// cbrt\n"
50243"\n"
50244"static float\n"
50245" _TG_ATTRS\n"
50246" __tg_cbrt(float __x) {return cbrtf(__x);}\n"
50247"\n"
50248"static double\n"
50249" _TG_ATTRS\n"
50250" __tg_cbrt(double __x) {return cbrt(__x);}\n"
50251"\n"
50252"static long double\n"
50253" _TG_ATTRS\n"
50254" __tg_cbrt(long double __x) {return cbrtl(__x);}\n"
50255"\n"
50256"#undef cbrt\n"
50257"#define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x))\n"
50258"\n"
50259"// ceil\n"
50260"\n"
50261"static float\n"
50262" _TG_ATTRS\n"
50263" __tg_ceil(float __x) {return ceilf(__x);}\n"
50264"\n"
50265"static double\n"
50266" _TG_ATTRS\n"
50267" __tg_ceil(double __x) {return ceil(__x);}\n"
50268"\n"
50269"static long double\n"
50270" _TG_ATTRS\n"
50271" __tg_ceil(long double __x) {return ceill(__x);}\n"
50272"\n"
50273"#undef ceil\n"
50274"#define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x))\n"
50275"\n"
50276"// copysign\n"
50277"\n"
50278"static float\n"
50279" _TG_ATTRS\n"
50280" __tg_copysign(float __x, float __y) {return copysignf(__x, __y);}\n"
50281"\n"
50282"static double\n"
50283" _TG_ATTRS\n"
50284" __tg_copysign(double __x, double __y) {return copysign(__x, __y);}\n"
50285"\n"
50286"static long double\n"
50287" _TG_ATTRS\n"
50288" __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);}\n"
50289"\n"
50290"#undef copysign\n"
50291"#define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \\\n"
50292" __tg_promote2((__x), (__y))(__y))\n"
50293"\n"
50294"// erf\n"
50295"\n"
50296"static float\n"
50297" _TG_ATTRS\n"
50298" __tg_erf(float __x) {return erff(__x);}\n"
50299"\n"
50300"static double\n"
50301" _TG_ATTRS\n"
50302" __tg_erf(double __x) {return erf(__x);}\n"
50303"\n"
50304"static long double\n"
50305" _TG_ATTRS\n"
50306" __tg_erf(long double __x) {return erfl(__x);}\n"
50307"\n"
50308"#undef erf\n"
50309"#define erf(__x) __tg_erf(__tg_promote1((__x))(__x))\n"
50310"\n"
50311"// erfc\n"
50312"\n"
50313"static float\n"
50314" _TG_ATTRS\n"
50315" __tg_erfc(float __x) {return erfcf(__x);}\n"
50316"\n"
50317"static double\n"
50318" _TG_ATTRS\n"
50319" __tg_erfc(double __x) {return erfc(__x);}\n"
50320"\n"
50321"static long double\n"
50322" _TG_ATTRS\n"
50323" __tg_erfc(long double __x) {return erfcl(__x);}\n"
50324"\n"
50325"#undef erfc\n"
50326"#define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x))\n"
50327"\n"
50328"// exp2\n"
50329"\n"
50330"static float\n"
50331" _TG_ATTRS\n"
50332" __tg_exp2(float __x) {return exp2f(__x);}\n"
50333"\n"
50334"static double\n"
50335" _TG_ATTRS\n"
50336" __tg_exp2(double __x) {return exp2(__x);}\n"
50337"\n"
50338"static long double\n"
50339" _TG_ATTRS\n"
50340" __tg_exp2(long double __x) {return exp2l(__x);}\n"
50341"\n"
50342"#undef exp2\n"
50343"#define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x))\n"
50344"\n"
50345"// expm1\n"
50346"\n"
50347"static float\n"
50348" _TG_ATTRS\n"
50349" __tg_expm1(float __x) {return expm1f(__x);}\n"
50350"\n"
50351"static double\n"
50352" _TG_ATTRS\n"
50353" __tg_expm1(double __x) {return expm1(__x);}\n"
50354"\n"
50355"static long double\n"
50356" _TG_ATTRS\n"
50357" __tg_expm1(long double __x) {return expm1l(__x);}\n"
50358"\n"
50359"#undef expm1\n"
50360"#define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x))\n"
50361"\n"
50362"// fdim\n"
50363"\n"
50364"static float\n"
50365" _TG_ATTRS\n"
50366" __tg_fdim(float __x, float __y) {return fdimf(__x, __y);}\n"
50367"\n"
50368"static double\n"
50369" _TG_ATTRS\n"
50370" __tg_fdim(double __x, double __y) {return fdim(__x, __y);}\n"
50371"\n"
50372"static long double\n"
50373" _TG_ATTRS\n"
50374" __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);}\n"
50375"\n"
50376"#undef fdim\n"
50377"#define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \\\n"
50378" __tg_promote2((__x), (__y))(__y))\n"
50379"\n"
50380"// floor\n"
50381"\n"
50382"static float\n"
50383" _TG_ATTRS\n"
50384" __tg_floor(float __x) {return floorf(__x);}\n"
50385"\n"
50386"static double\n"
50387" _TG_ATTRS\n"
50388" __tg_floor(double __x) {return floor(__x);}\n"
50389"\n"
50390"static long double\n"
50391" _TG_ATTRS\n"
50392" __tg_floor(long double __x) {return floorl(__x);}\n"
50393"\n"
50394"#undef floor\n"
50395"#define floor(__x) __tg_floor(__tg_promote1((__x))(__x))\n"
50396"\n"
50397"// fma\n"
50398"\n"
50399"static float\n"
50400" _TG_ATTRS\n"
50401" __tg_fma(float __x, float __y, float __z)\n"
50402" {return fmaf(__x, __y, __z);}\n"
50403"\n"
50404"static double\n"
50405" _TG_ATTRS\n"
50406" __tg_fma(double __x, double __y, double __z)\n"
50407" {return fma(__x, __y, __z);}\n"
50408"\n"
50409"static long double\n"
50410" _TG_ATTRS\n"
50411" __tg_fma(long double __x,long double __y, long double __z)\n"
50412" {return fmal(__x, __y, __z);}\n"
50413"\n"
50414"#undef fma\n"
50415"#define fma(__x, __y, __z) \\\n"
50416" __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \\\n"
50417" __tg_promote3((__x), (__y), (__z))(__y), \\\n"
50418" __tg_promote3((__x), (__y), (__z))(__z))\n"
50419"\n"
50420"// fmax\n"
50421"\n"
50422"static float\n"
50423" _TG_ATTRS\n"
50424" __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);}\n"
50425"\n"
50426"static double\n"
50427" _TG_ATTRS\n"
50428" __tg_fmax(double __x, double __y) {return fmax(__x, __y);}\n"
50429"\n"
50430"static long double\n"
50431" _TG_ATTRS\n"
50432" __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);}\n"
50433"\n"
50434"#undef fmax\n"
50435"#define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \\\n"
50436" __tg_promote2((__x), (__y))(__y))\n"
50437"\n"
50438"// fmin\n"
50439"\n"
50440"static float\n"
50441" _TG_ATTRS\n"
50442" __tg_fmin(float __x, float __y) {return fminf(__x, __y);}\n"
50443"\n"
50444"static double\n"
50445" _TG_ATTRS\n"
50446" __tg_fmin(double __x, double __y) {return fmin(__x, __y);}\n"
50447"\n"
50448"static long double\n"
50449" _TG_ATTRS\n"
50450" __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);}\n"
50451"\n"
50452"#undef fmin\n"
50453"#define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \\\n"
50454" __tg_promote2((__x), (__y))(__y))\n"
50455"\n"
50456"// fmod\n"
50457"\n"
50458"static float\n"
50459" _TG_ATTRS\n"
50460" __tg_fmod(float __x, float __y) {return fmodf(__x, __y);}\n"
50461"\n"
50462"static double\n"
50463" _TG_ATTRS\n"
50464" __tg_fmod(double __x, double __y) {return fmod(__x, __y);}\n"
50465"\n"
50466"static long double\n"
50467" _TG_ATTRS\n"
50468" __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);}\n"
50469"\n"
50470"#undef fmod\n"
50471"#define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \\\n"
50472" __tg_promote2((__x), (__y))(__y))\n"
50473"\n"
50474"// frexp\n"
50475"\n"
50476"static float\n"
50477" _TG_ATTRS\n"
50478" __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);}\n"
50479"\n"
50480"static double\n"
50481" _TG_ATTRS\n"
50482" __tg_frexp(double __x, int* __y) {return frexp(__x, __y);}\n"
50483"\n"
50484"static long double\n"
50485" _TG_ATTRS\n"
50486" __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);}\n"
50487"\n"
50488"#undef frexp\n"
50489"#define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y)\n"
50490"\n"
50491"// hypot\n"
50492"\n"
50493"static float\n"
50494" _TG_ATTRS\n"
50495" __tg_hypot(float __x, float __y) {return hypotf(__x, __y);}\n"
50496"\n"
50497"static double\n"
50498" _TG_ATTRS\n"
50499" __tg_hypot(double __x, double __y) {return hypot(__x, __y);}\n"
50500"\n"
50501"static long double\n"
50502" _TG_ATTRS\n"
50503" __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);}\n"
50504"\n"
50505"#undef hypot\n"
50506"#define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \\\n"
50507" __tg_promote2((__x), (__y))(__y))\n"
50508"\n"
50509"// ilogb\n"
50510"\n"
50511"static int\n"
50512" _TG_ATTRS\n"
50513" __tg_ilogb(float __x) {return ilogbf(__x);}\n"
50514"\n"
50515"static int\n"
50516" _TG_ATTRS\n"
50517" __tg_ilogb(double __x) {return ilogb(__x);}\n"
50518"\n"
50519"static int\n"
50520" _TG_ATTRS\n"
50521" __tg_ilogb(long double __x) {return ilogbl(__x);}\n"
50522"\n"
50523"#undef ilogb\n"
50524"#define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x))\n"
50525"\n"
50526"// ldexp\n"
50527"\n"
50528"static float\n"
50529" _TG_ATTRS\n"
50530" __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);}\n"
50531"\n"
50532"static double\n"
50533" _TG_ATTRS\n"
50534" __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);}\n"
50535"\n"
50536"static long double\n"
50537" _TG_ATTRS\n"
50538" __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);}\n"
50539"\n"
50540"#undef ldexp\n"
50541"#define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y)\n"
50542"\n"
50543"// lgamma\n"
50544"\n"
50545"static float\n"
50546" _TG_ATTRS\n"
50547" __tg_lgamma(float __x) {return lgammaf(__x);}\n"
50548"\n"
50549"static double\n"
50550" _TG_ATTRS\n"
50551" __tg_lgamma(double __x) {return lgamma(__x);}\n"
50552"\n"
50553"static long double\n"
50554" _TG_ATTRS\n"
50555" __tg_lgamma(long double __x) {return lgammal(__x);}\n"
50556"\n"
50557"#undef lgamma\n"
50558"#define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x))\n"
50559"\n"
50560"// llrint\n"
50561"\n"
50562"static long long\n"
50563" _TG_ATTRS\n"
50564" __tg_llrint(float __x) {return llrintf(__x);}\n"
50565"\n"
50566"static long long\n"
50567" _TG_ATTRS\n"
50568" __tg_llrint(double __x) {return llrint(__x);}\n"
50569"\n"
50570"static long long\n"
50571" _TG_ATTRS\n"
50572" __tg_llrint(long double __x) {return llrintl(__x);}\n"
50573"\n"
50574"#undef llrint\n"
50575"#define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x))\n"
50576"\n"
50577"// llround\n"
50578"\n"
50579"static long long\n"
50580" _TG_ATTRS\n"
50581" __tg_llround(float __x) {return llroundf(__x);}\n"
50582"\n"
50583"static long long\n"
50584" _TG_ATTRS\n"
50585" __tg_llround(double __x) {return llround(__x);}\n"
50586"\n"
50587"static long long\n"
50588" _TG_ATTRS\n"
50589" __tg_llround(long double __x) {return llroundl(__x);}\n"
50590"\n"
50591"#undef llround\n"
50592"#define llround(__x) __tg_llround(__tg_promote1((__x))(__x))\n"
50593"\n"
50594"// log10\n"
50595"\n"
50596"static float\n"
50597" _TG_ATTRS\n"
50598" __tg_log10(float __x) {return log10f(__x);}\n"
50599"\n"
50600"static double\n"
50601" _TG_ATTRS\n"
50602" __tg_log10(double __x) {return log10(__x);}\n"
50603"\n"
50604"static long double\n"
50605" _TG_ATTRS\n"
50606" __tg_log10(long double __x) {return log10l(__x);}\n"
50607"\n"
50608"#undef log10\n"
50609"#define log10(__x) __tg_log10(__tg_promote1((__x))(__x))\n"
50610"\n"
50611"// log1p\n"
50612"\n"
50613"static float\n"
50614" _TG_ATTRS\n"
50615" __tg_log1p(float __x) {return log1pf(__x);}\n"
50616"\n"
50617"static double\n"
50618" _TG_ATTRS\n"
50619" __tg_log1p(double __x) {return log1p(__x);}\n"
50620"\n"
50621"static long double\n"
50622" _TG_ATTRS\n"
50623" __tg_log1p(long double __x) {return log1pl(__x);}\n"
50624"\n"
50625"#undef log1p\n"
50626"#define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x))\n"
50627"\n"
50628"// log2\n"
50629"\n"
50630"static float\n"
50631" _TG_ATTRS\n"
50632" __tg_log2(float __x) {return log2f(__x);}\n"
50633"\n"
50634"static double\n"
50635" _TG_ATTRS\n"
50636" __tg_log2(double __x) {return log2(__x);}\n"
50637"\n"
50638"static long double\n"
50639" _TG_ATTRS\n"
50640" __tg_log2(long double __x) {return log2l(__x);}\n"
50641"\n"
50642"#undef log2\n"
50643"#define log2(__x) __tg_log2(__tg_promote1((__x))(__x))\n"
50644"\n"
50645"// logb\n"
50646"\n"
50647"static float\n"
50648" _TG_ATTRS\n"
50649" __tg_logb(float __x) {return logbf(__x);}\n"
50650"\n"
50651"static double\n"
50652" _TG_ATTRS\n"
50653" __tg_logb(double __x) {return logb(__x);}\n"
50654"\n"
50655"static long double\n"
50656" _TG_ATTRS\n"
50657" __tg_logb(long double __x) {return logbl(__x);}\n"
50658"\n"
50659"#undef logb\n"
50660"#define logb(__x) __tg_logb(__tg_promote1((__x))(__x))\n"
50661"\n"
50662"// lrint\n"
50663"\n"
50664"static long\n"
50665" _TG_ATTRS\n"
50666" __tg_lrint(float __x) {return lrintf(__x);}\n"
50667"\n"
50668"static long\n"
50669" _TG_ATTRS\n"
50670" __tg_lrint(double __x) {return lrint(__x);}\n"
50671"\n"
50672"static long\n"
50673" _TG_ATTRS\n"
50674" __tg_lrint(long double __x) {return lrintl(__x);}\n"
50675"\n"
50676"#undef lrint\n"
50677"#define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x))\n"
50678"\n"
50679"// lround\n"
50680"\n"
50681"static long\n"
50682" _TG_ATTRS\n"
50683" __tg_lround(float __x) {return lroundf(__x);}\n"
50684"\n"
50685"static long\n"
50686" _TG_ATTRS\n"
50687" __tg_lround(double __x) {return lround(__x);}\n"
50688"\n"
50689"static long\n"
50690" _TG_ATTRS\n"
50691" __tg_lround(long double __x) {return lroundl(__x);}\n"
50692"\n"
50693"#undef lround\n"
50694"#define lround(__x) __tg_lround(__tg_promote1((__x))(__x))\n"
50695"\n"
50696"// nearbyint\n"
50697"\n"
50698"static float\n"
50699" _TG_ATTRS\n"
50700" __tg_nearbyint(float __x) {return nearbyintf(__x);}\n"
50701"\n"
50702"static double\n"
50703" _TG_ATTRS\n"
50704" __tg_nearbyint(double __x) {return nearbyint(__x);}\n"
50705"\n"
50706"static long double\n"
50707" _TG_ATTRS\n"
50708" __tg_nearbyint(long double __x) {return nearbyintl(__x);}\n"
50709"\n"
50710"#undef nearbyint\n"
50711"#define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x))\n"
50712"\n"
50713"// nextafter\n"
50714"\n"
50715"static float\n"
50716" _TG_ATTRS\n"
50717" __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);}\n"
50718"\n"
50719"static double\n"
50720" _TG_ATTRS\n"
50721" __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);}\n"
50722"\n"
50723"static long double\n"
50724" _TG_ATTRS\n"
50725" __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);}\n"
50726"\n"
50727"#undef nextafter\n"
50728"#define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \\\n"
50729" __tg_promote2((__x), (__y))(__y))\n"
50730"\n"
50731"// nexttoward\n"
50732"\n"
50733"static float\n"
50734" _TG_ATTRS\n"
50735" __tg_nexttoward(float __x, long double __y) {return nexttowardf(__x, __y);}\n"
50736"\n"
50737"static double\n"
50738" _TG_ATTRS\n"
50739" __tg_nexttoward(double __x, long double __y) {return nexttoward(__x, __y);}\n"
50740"\n"
50741"static long double\n"
50742" _TG_ATTRS\n"
50743" __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);}\n"
50744"\n"
50745"#undef nexttoward\n"
50746"#define nexttoward(__x, __y) __tg_nexttoward(__tg_promote1((__x))(__x), (__y))\n"
50747"\n"
50748"// remainder\n"
50749"\n"
50750"static float\n"
50751" _TG_ATTRS\n"
50752" __tg_remainder(float __x, float __y) {return remainderf(__x, __y);}\n"
50753"\n"
50754"static double\n"
50755" _TG_ATTRS\n"
50756" __tg_remainder(double __x, double __y) {return remainder(__x, __y);}\n"
50757"\n"
50758"static long double\n"
50759" _TG_ATTRS\n"
50760" __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);}\n"
50761"\n"
50762"#undef remainder\n"
50763"#define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \\\n"
50764" __tg_promote2((__x), (__y))(__y))\n"
50765"\n"
50766"// remquo\n"
50767"\n"
50768"static float\n"
50769" _TG_ATTRS\n"
50770" __tg_remquo(float __x, float __y, int* __z)\n"
50771" {return remquof(__x, __y, __z);}\n"
50772"\n"
50773"static double\n"
50774" _TG_ATTRS\n"
50775" __tg_remquo(double __x, double __y, int* __z)\n"
50776" {return remquo(__x, __y, __z);}\n"
50777"\n"
50778"static long double\n"
50779" _TG_ATTRS\n"
50780" __tg_remquo(long double __x,long double __y, int* __z)\n"
50781" {return remquol(__x, __y, __z);}\n"
50782"\n"
50783"#undef remquo\n"
50784"#define remquo(__x, __y, __z) \\\n"
50785" __tg_remquo(__tg_promote2((__x), (__y))(__x), \\\n"
50786" __tg_promote2((__x), (__y))(__y), \\\n"
50787" (__z))\n"
50788"\n"
50789"// rint\n"
50790"\n"
50791"static float\n"
50792" _TG_ATTRS\n"
50793" __tg_rint(float __x) {return rintf(__x);}\n"
50794"\n"
50795"static double\n"
50796" _TG_ATTRS\n"
50797" __tg_rint(double __x) {return rint(__x);}\n"
50798"\n"
50799"static long double\n"
50800" _TG_ATTRS\n"
50801" __tg_rint(long double __x) {return rintl(__x);}\n"
50802"\n"
50803"#undef rint\n"
50804"#define rint(__x) __tg_rint(__tg_promote1((__x))(__x))\n"
50805"\n"
50806"// round\n"
50807"\n"
50808"static float\n"
50809" _TG_ATTRS\n"
50810" __tg_round(float __x) {return roundf(__x);}\n"
50811"\n"
50812"static double\n"
50813" _TG_ATTRS\n"
50814" __tg_round(double __x) {return round(__x);}\n"
50815"\n"
50816"static long double\n"
50817" _TG_ATTRS\n"
50818" __tg_round(long double __x) {return roundl(__x);}\n"
50819"\n"
50820"#undef round\n"
50821"#define round(__x) __tg_round(__tg_promote1((__x))(__x))\n"
50822"\n"
50823"// scalbn\n"
50824"\n"
50825"static float\n"
50826" _TG_ATTRS\n"
50827" __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);}\n"
50828"\n"
50829"static double\n"
50830" _TG_ATTRS\n"
50831" __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);}\n"
50832"\n"
50833"static long double\n"
50834" _TG_ATTRS\n"
50835" __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);}\n"
50836"\n"
50837"#undef scalbn\n"
50838"#define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y)\n"
50839"\n"
50840"// scalbln\n"
50841"\n"
50842"static float\n"
50843" _TG_ATTRS\n"
50844" __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);}\n"
50845"\n"
50846"static double\n"
50847" _TG_ATTRS\n"
50848" __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);}\n"
50849"\n"
50850"static long double\n"
50851" _TG_ATTRS\n"
50852" __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);}\n"
50853"\n"
50854"#undef scalbln\n"
50855"#define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y)\n"
50856"\n"
50857"// tgamma\n"
50858"\n"
50859"static float\n"
50860" _TG_ATTRS\n"
50861" __tg_tgamma(float __x) {return tgammaf(__x);}\n"
50862"\n"
50863"static double\n"
50864" _TG_ATTRS\n"
50865" __tg_tgamma(double __x) {return tgamma(__x);}\n"
50866"\n"
50867"static long double\n"
50868" _TG_ATTRS\n"
50869" __tg_tgamma(long double __x) {return tgammal(__x);}\n"
50870"\n"
50871"#undef tgamma\n"
50872"#define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x))\n"
50873"\n"
50874"// trunc\n"
50875"\n"
50876"static float\n"
50877" _TG_ATTRS\n"
50878" __tg_trunc(float __x) {return truncf(__x);}\n"
50879"\n"
50880"static double\n"
50881" _TG_ATTRS\n"
50882" __tg_trunc(double __x) {return trunc(__x);}\n"
50883"\n"
50884"static long double\n"
50885" _TG_ATTRS\n"
50886" __tg_trunc(long double __x) {return truncl(__x);}\n"
50887"\n"
50888"#undef trunc\n"
50889"#define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x))\n"
50890"\n"
50891"// carg\n"
50892"\n"
50893"static float\n"
50894" _TG_ATTRS\n"
50895" __tg_carg(float __x) {return atan2f(0.F, __x);}\n"
50896"\n"
50897"static double\n"
50898" _TG_ATTRS\n"
50899" __tg_carg(double __x) {return atan2(0., __x);}\n"
50900"\n"
50901"static long double\n"
50902" _TG_ATTRS\n"
50903" __tg_carg(long double __x) {return atan2l(0.L, __x);}\n"
50904"\n"
50905"static float\n"
50906" _TG_ATTRS\n"
50907" __tg_carg(float _Complex __x) {return cargf(__x);}\n"
50908"\n"
50909"static double\n"
50910" _TG_ATTRS\n"
50911" __tg_carg(double _Complex __x) {return carg(__x);}\n"
50912"\n"
50913"static long double\n"
50914" _TG_ATTRS\n"
50915" __tg_carg(long double _Complex __x) {return cargl(__x);}\n"
50916"\n"
50917"#undef carg\n"
50918"#define carg(__x) __tg_carg(__tg_promote1((__x))(__x))\n"
50919"\n"
50920"// cimag\n"
50921"\n"
50922"static float\n"
50923" _TG_ATTRS\n"
50924" __tg_cimag(float __x) {return 0;}\n"
50925"\n"
50926"static double\n"
50927" _TG_ATTRS\n"
50928" __tg_cimag(double __x) {return 0;}\n"
50929"\n"
50930"static long double\n"
50931" _TG_ATTRS\n"
50932" __tg_cimag(long double __x) {return 0;}\n"
50933"\n"
50934"static float\n"
50935" _TG_ATTRS\n"
50936" __tg_cimag(float _Complex __x) {return cimagf(__x);}\n"
50937"\n"
50938"static double\n"
50939" _TG_ATTRS\n"
50940" __tg_cimag(double _Complex __x) {return cimag(__x);}\n"
50941"\n"
50942"static long double\n"
50943" _TG_ATTRS\n"
50944" __tg_cimag(long double _Complex __x) {return cimagl(__x);}\n"
50945"\n"
50946"#undef cimag\n"
50947"#define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x))\n"
50948"\n"
50949"// conj\n"
50950"\n"
50951"static float _Complex\n"
50952" _TG_ATTRS\n"
50953" __tg_conj(float __x) {return __x;}\n"
50954"\n"
50955"static double _Complex\n"
50956" _TG_ATTRS\n"
50957" __tg_conj(double __x) {return __x;}\n"
50958"\n"
50959"static long double _Complex\n"
50960" _TG_ATTRS\n"
50961" __tg_conj(long double __x) {return __x;}\n"
50962"\n"
50963"static float _Complex\n"
50964" _TG_ATTRS\n"
50965" __tg_conj(float _Complex __x) {return conjf(__x);}\n"
50966"\n"
50967"static double _Complex\n"
50968" _TG_ATTRS\n"
50969" __tg_conj(double _Complex __x) {return conj(__x);}\n"
50970"\n"
50971"static long double _Complex\n"
50972" _TG_ATTRS\n"
50973" __tg_conj(long double _Complex __x) {return conjl(__x);}\n"
50974"\n"
50975"#undef conj\n"
50976"#define conj(__x) __tg_conj(__tg_promote1((__x))(__x))\n"
50977"\n"
50978"// cproj\n"
50979"\n"
50980"static float _Complex\n"
50981" _TG_ATTRS\n"
50982" __tg_cproj(float __x) {return cprojf(__x);}\n"
50983"\n"
50984"static double _Complex\n"
50985" _TG_ATTRS\n"
50986" __tg_cproj(double __x) {return cproj(__x);}\n"
50987"\n"
50988"static long double _Complex\n"
50989" _TG_ATTRS\n"
50990" __tg_cproj(long double __x) {return cprojl(__x);}\n"
50991"\n"
50992"static float _Complex\n"
50993" _TG_ATTRS\n"
50994" __tg_cproj(float _Complex __x) {return cprojf(__x);}\n"
50995"\n"
50996"static double _Complex\n"
50997" _TG_ATTRS\n"
50998" __tg_cproj(double _Complex __x) {return cproj(__x);}\n"
50999"\n"
51000"static long double _Complex\n"
51001" _TG_ATTRS\n"
51002" __tg_cproj(long double _Complex __x) {return cprojl(__x);}\n"
51003"\n"
51004"#undef cproj\n"
51005"#define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x))\n"
51006"\n"
51007"// creal\n"
51008"\n"
51009"static float\n"
51010" _TG_ATTRS\n"
51011" __tg_creal(float __x) {return __x;}\n"
51012"\n"
51013"static double\n"
51014" _TG_ATTRS\n"
51015" __tg_creal(double __x) {return __x;}\n"
51016"\n"
51017"static long double\n"
51018" _TG_ATTRS\n"
51019" __tg_creal(long double __x) {return __x;}\n"
51020"\n"
51021"static float\n"
51022" _TG_ATTRS\n"
51023" __tg_creal(float _Complex __x) {return crealf(__x);}\n"
51024"\n"
51025"static double\n"
51026" _TG_ATTRS\n"
51027" __tg_creal(double _Complex __x) {return creal(__x);}\n"
51028"\n"
51029"static long double\n"
51030" _TG_ATTRS\n"
51031" __tg_creal(long double _Complex __x) {return creall(__x);}\n"
51032"\n"
51033"#undef creal\n"
51034"#define creal(__x) __tg_creal(__tg_promote1((__x))(__x))\n"
51035"\n"
51036"#undef _TG_ATTRSp\n"
51037"#undef _TG_ATTRS\n"
51038"\n"
51039"#endif /* __cplusplus */\n"
51040"#endif /* __has_include_next */\n"
51041"#endif /* __CLANG_TGMATH_H */\n"
51042"" } ,
51043 { "/builtins/tmmintrin.h" , "/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===\n"
51044" *\n"
51045" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
51046" * of this software and associated documentation files (the \"Software\"), to deal\n"
51047" * in the Software without restriction, including without limitation the rights\n"
51048" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
51049" * copies of the Software, and to permit persons to whom the Software is\n"
51050" * furnished to do so, subject to the following conditions:\n"
51051" *\n"
51052" * The above copyright notice and this permission notice shall be included in\n"
51053" * all copies or substantial portions of the Software.\n"
51054" *\n"
51055" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
51056" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
51057" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
51058" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
51059" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
51060" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
51061" * THE SOFTWARE.\n"
51062" *\n"
51063" *===-----------------------------------------------------------------------===\n"
51064" */\n"
51065"\n"
51066"#ifndef __TMMINTRIN_H\n"
51067"#define __TMMINTRIN_H\n"
51068"\n"
51069"#include <pmmintrin.h>\n"
51070"\n"
51071"/* Define the default attributes for the functions in this file. */\n"
51072"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"ssse3\"), __min_vector_width__(64)))\n"
51073"#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,ssse3\"), __min_vector_width__(64)))\n"
51074"\n"
51075"/// Computes the absolute value of each of the packed 8-bit signed\n"
51076"/// integers in the source operand and stores the 8-bit unsigned integer\n"
51077"/// results in the destination.\n"
51078"///\n"
51079"/// \\headerfile <x86intrin.h>\n"
51080"///\n"
51081"/// This intrinsic corresponds to the \\c PABSB instruction.\n"
51082"///\n"
51083"/// \\param __a\n"
51084"/// A 64-bit vector of [8 x i8].\n"
51085"/// \\returns A 64-bit integer vector containing the absolute values of the\n"
51086"/// elements in the operand.\n"
51087"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51088"_mm_abs_pi8(__m64 __a)\n"
51089"{\n"
51090" return (__m64)__builtin_ia32_pabsb((__v8qi)__a);\n"
51091"}\n"
51092"\n"
51093"/// Computes the absolute value of each of the packed 8-bit signed\n"
51094"/// integers in the source operand and stores the 8-bit unsigned integer\n"
51095"/// results in the destination.\n"
51096"///\n"
51097"/// \\headerfile <x86intrin.h>\n"
51098"///\n"
51099"/// This intrinsic corresponds to the \\c VPABSB instruction.\n"
51100"///\n"
51101"/// \\param __a\n"
51102"/// A 128-bit vector of [16 x i8].\n"
51103"/// \\returns A 128-bit integer vector containing the absolute values of the\n"
51104"/// elements in the operand.\n"
51105"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51106"_mm_abs_epi8(__m128i __a)\n"
51107"{\n"
51108" return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);\n"
51109"}\n"
51110"\n"
51111"/// Computes the absolute value of each of the packed 16-bit signed\n"
51112"/// integers in the source operand and stores the 16-bit unsigned integer\n"
51113"/// results in the destination.\n"
51114"///\n"
51115"/// \\headerfile <x86intrin.h>\n"
51116"///\n"
51117"/// This intrinsic corresponds to the \\c PABSW instruction.\n"
51118"///\n"
51119"/// \\param __a\n"
51120"/// A 64-bit vector of [4 x i16].\n"
51121"/// \\returns A 64-bit integer vector containing the absolute values of the\n"
51122"/// elements in the operand.\n"
51123"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51124"_mm_abs_pi16(__m64 __a)\n"
51125"{\n"
51126" return (__m64)__builtin_ia32_pabsw((__v4hi)__a);\n"
51127"}\n"
51128"\n"
51129"/// Computes the absolute value of each of the packed 16-bit signed\n"
51130"/// integers in the source operand and stores the 16-bit unsigned integer\n"
51131"/// results in the destination.\n"
51132"///\n"
51133"/// \\headerfile <x86intrin.h>\n"
51134"///\n"
51135"/// This intrinsic corresponds to the \\c VPABSW instruction.\n"
51136"///\n"
51137"/// \\param __a\n"
51138"/// A 128-bit vector of [8 x i16].\n"
51139"/// \\returns A 128-bit integer vector containing the absolute values of the\n"
51140"/// elements in the operand.\n"
51141"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51142"_mm_abs_epi16(__m128i __a)\n"
51143"{\n"
51144" return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);\n"
51145"}\n"
51146"\n"
51147"/// Computes the absolute value of each of the packed 32-bit signed\n"
51148"/// integers in the source operand and stores the 32-bit unsigned integer\n"
51149"/// results in the destination.\n"
51150"///\n"
51151"/// \\headerfile <x86intrin.h>\n"
51152"///\n"
51153"/// This intrinsic corresponds to the \\c PABSD instruction.\n"
51154"///\n"
51155"/// \\param __a\n"
51156"/// A 64-bit vector of [2 x i32].\n"
51157"/// \\returns A 64-bit integer vector containing the absolute values of the\n"
51158"/// elements in the operand.\n"
51159"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51160"_mm_abs_pi32(__m64 __a)\n"
51161"{\n"
51162" return (__m64)__builtin_ia32_pabsd((__v2si)__a);\n"
51163"}\n"
51164"\n"
51165"/// Computes the absolute value of each of the packed 32-bit signed\n"
51166"/// integers in the source operand and stores the 32-bit unsigned integer\n"
51167"/// results in the destination.\n"
51168"///\n"
51169"/// \\headerfile <x86intrin.h>\n"
51170"///\n"
51171"/// This intrinsic corresponds to the \\c VPABSD instruction.\n"
51172"///\n"
51173"/// \\param __a\n"
51174"/// A 128-bit vector of [4 x i32].\n"
51175"/// \\returns A 128-bit integer vector containing the absolute values of the\n"
51176"/// elements in the operand.\n"
51177"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51178"_mm_abs_epi32(__m128i __a)\n"
51179"{\n"
51180" return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);\n"
51181"}\n"
51182"\n"
51183"/// Concatenates the two 128-bit integer vector operands, and\n"
51184"/// right-shifts the result by the number of bytes specified in the immediate\n"
51185"/// operand.\n"
51186"///\n"
51187"/// \\headerfile <x86intrin.h>\n"
51188"///\n"
51189"/// \\code\n"
51190"/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);\n"
51191"/// \\endcode\n"
51192"///\n"
51193"/// This intrinsic corresponds to the \\c PALIGNR instruction.\n"
51194"///\n"
51195"/// \\param a\n"
51196"/// A 128-bit vector of [16 x i8] containing one of the source operands.\n"
51197"/// \\param b\n"
51198"/// A 128-bit vector of [16 x i8] containing one of the source operands.\n"
51199"/// \\param n\n"
51200"/// An immediate operand specifying how many bytes to right-shift the result.\n"
51201"/// \\returns A 128-bit integer vector containing the concatenated right-shifted\n"
51202"/// value.\n"
51203"#define _mm_alignr_epi8(a, b, n) \\\n"
51204" (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \\\n"
51205" (__v16qi)(__m128i)(b), (n))\n"
51206"\n"
51207"/// Concatenates the two 64-bit integer vector operands, and right-shifts\n"
51208"/// the result by the number of bytes specified in the immediate operand.\n"
51209"///\n"
51210"/// \\headerfile <x86intrin.h>\n"
51211"///\n"
51212"/// \\code\n"
51213"/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);\n"
51214"/// \\endcode\n"
51215"///\n"
51216"/// This intrinsic corresponds to the \\c PALIGNR instruction.\n"
51217"///\n"
51218"/// \\param a\n"
51219"/// A 64-bit vector of [8 x i8] containing one of the source operands.\n"
51220"/// \\param b\n"
51221"/// A 64-bit vector of [8 x i8] containing one of the source operands.\n"
51222"/// \\param n\n"
51223"/// An immediate operand specifying how many bytes to right-shift the result.\n"
51224"/// \\returns A 64-bit integer vector containing the concatenated right-shifted\n"
51225"/// value.\n"
51226"#define _mm_alignr_pi8(a, b, n) \\\n"
51227" (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))\n"
51228"\n"
51229"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51230"/// 128-bit vectors of [8 x i16].\n"
51231"///\n"
51232"/// \\headerfile <x86intrin.h>\n"
51233"///\n"
51234"/// This intrinsic corresponds to the \\c VPHADDW instruction.\n"
51235"///\n"
51236"/// \\param __a\n"
51237"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51238"/// horizontal sums of the values are stored in the lower bits of the\n"
51239"/// destination.\n"
51240"/// \\param __b\n"
51241"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51242"/// horizontal sums of the values are stored in the upper bits of the\n"
51243"/// destination.\n"
51244"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal sums of\n"
51245"/// both operands.\n"
51246"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51247"_mm_hadd_epi16(__m128i __a, __m128i __b)\n"
51248"{\n"
51249" return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);\n"
51250"}\n"
51251"\n"
51252"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51253"/// 128-bit vectors of [4 x i32].\n"
51254"///\n"
51255"/// \\headerfile <x86intrin.h>\n"
51256"///\n"
51257"/// This intrinsic corresponds to the \\c VPHADDD instruction.\n"
51258"///\n"
51259"/// \\param __a\n"
51260"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
51261"/// horizontal sums of the values are stored in the lower bits of the\n"
51262"/// destination.\n"
51263"/// \\param __b\n"
51264"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
51265"/// horizontal sums of the values are stored in the upper bits of the\n"
51266"/// destination.\n"
51267"/// \\returns A 128-bit vector of [4 x i32] containing the horizontal sums of\n"
51268"/// both operands.\n"
51269"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51270"_mm_hadd_epi32(__m128i __a, __m128i __b)\n"
51271"{\n"
51272" return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);\n"
51273"}\n"
51274"\n"
51275"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51276"/// 64-bit vectors of [4 x i16].\n"
51277"///\n"
51278"/// \\headerfile <x86intrin.h>\n"
51279"///\n"
51280"/// This intrinsic corresponds to the \\c PHADDW instruction.\n"
51281"///\n"
51282"/// \\param __a\n"
51283"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51284"/// horizontal sums of the values are stored in the lower bits of the\n"
51285"/// destination.\n"
51286"/// \\param __b\n"
51287"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51288"/// horizontal sums of the values are stored in the upper bits of the\n"
51289"/// destination.\n"
51290"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal sums of both\n"
51291"/// operands.\n"
51292"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51293"_mm_hadd_pi16(__m64 __a, __m64 __b)\n"
51294"{\n"
51295" return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);\n"
51296"}\n"
51297"\n"
51298"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51299"/// 64-bit vectors of [2 x i32].\n"
51300"///\n"
51301"/// \\headerfile <x86intrin.h>\n"
51302"///\n"
51303"/// This intrinsic corresponds to the \\c PHADDD instruction.\n"
51304"///\n"
51305"/// \\param __a\n"
51306"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
51307"/// horizontal sums of the values are stored in the lower bits of the\n"
51308"/// destination.\n"
51309"/// \\param __b\n"
51310"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
51311"/// horizontal sums of the values are stored in the upper bits of the\n"
51312"/// destination.\n"
51313"/// \\returns A 64-bit vector of [2 x i32] containing the horizontal sums of both\n"
51314"/// operands.\n"
51315"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51316"_mm_hadd_pi32(__m64 __a, __m64 __b)\n"
51317"{\n"
51318" return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);\n"
51319"}\n"
51320"\n"
51321"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51322"/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are\n"
51323"/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n"
51324"/// 0x8000.\n"
51325"///\n"
51326"/// \\headerfile <x86intrin.h>\n"
51327"///\n"
51328"/// This intrinsic corresponds to the \\c VPHADDSW instruction.\n"
51329"///\n"
51330"/// \\param __a\n"
51331"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51332"/// horizontal sums of the values are stored in the lower bits of the\n"
51333"/// destination.\n"
51334"/// \\param __b\n"
51335"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51336"/// horizontal sums of the values are stored in the upper bits of the\n"
51337"/// destination.\n"
51338"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n"
51339"/// sums of both operands.\n"
51340"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51341"_mm_hadds_epi16(__m128i __a, __m128i __b)\n"
51342"{\n"
51343" return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);\n"
51344"}\n"
51345"\n"
51346"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51347"/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are\n"
51348"/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n"
51349"/// 0x8000.\n"
51350"///\n"
51351"/// \\headerfile <x86intrin.h>\n"
51352"///\n"
51353"/// This intrinsic corresponds to the \\c PHADDSW instruction.\n"
51354"///\n"
51355"/// \\param __a\n"
51356"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51357"/// horizontal sums of the values are stored in the lower bits of the\n"
51358"/// destination.\n"
51359"/// \\param __b\n"
51360"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51361"/// horizontal sums of the values are stored in the upper bits of the\n"
51362"/// destination.\n"
51363"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n"
51364"/// sums of both operands.\n"
51365"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51366"_mm_hadds_pi16(__m64 __a, __m64 __b)\n"
51367"{\n"
51368" return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);\n"
51369"}\n"
51370"\n"
51371"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51372"/// packed 128-bit vectors of [8 x i16].\n"
51373"///\n"
51374"/// \\headerfile <x86intrin.h>\n"
51375"///\n"
51376"/// This intrinsic corresponds to the \\c VPHSUBW instruction.\n"
51377"///\n"
51378"/// \\param __a\n"
51379"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51380"/// horizontal differences between the values are stored in the lower bits of\n"
51381"/// the destination.\n"
51382"/// \\param __b\n"
51383"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51384"/// horizontal differences between the values are stored in the upper bits of\n"
51385"/// the destination.\n"
51386"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal differences\n"
51387"/// of both operands.\n"
51388"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51389"_mm_hsub_epi16(__m128i __a, __m128i __b)\n"
51390"{\n"
51391" return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);\n"
51392"}\n"
51393"\n"
51394"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51395"/// packed 128-bit vectors of [4 x i32].\n"
51396"///\n"
51397"/// \\headerfile <x86intrin.h>\n"
51398"///\n"
51399"/// This intrinsic corresponds to the \\c VPHSUBD instruction.\n"
51400"///\n"
51401"/// \\param __a\n"
51402"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
51403"/// horizontal differences between the values are stored in the lower bits of\n"
51404"/// the destination.\n"
51405"/// \\param __b\n"
51406"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
51407"/// horizontal differences between the values are stored in the upper bits of\n"
51408"/// the destination.\n"
51409"/// \\returns A 128-bit vector of [4 x i32] containing the horizontal differences\n"
51410"/// of both operands.\n"
51411"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51412"_mm_hsub_epi32(__m128i __a, __m128i __b)\n"
51413"{\n"
51414" return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);\n"
51415"}\n"
51416"\n"
51417"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51418"/// packed 64-bit vectors of [4 x i16].\n"
51419"///\n"
51420"/// \\headerfile <x86intrin.h>\n"
51421"///\n"
51422"/// This intrinsic corresponds to the \\c PHSUBW instruction.\n"
51423"///\n"
51424"/// \\param __a\n"
51425"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51426"/// horizontal differences between the values are stored in the lower bits of\n"
51427"/// the destination.\n"
51428"/// \\param __b\n"
51429"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51430"/// horizontal differences between the values are stored in the upper bits of\n"
51431"/// the destination.\n"
51432"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal differences\n"
51433"/// of both operands.\n"
51434"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51435"_mm_hsub_pi16(__m64 __a, __m64 __b)\n"
51436"{\n"
51437" return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);\n"
51438"}\n"
51439"\n"
51440"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51441"/// packed 64-bit vectors of [2 x i32].\n"
51442"///\n"
51443"/// \\headerfile <x86intrin.h>\n"
51444"///\n"
51445"/// This intrinsic corresponds to the \\c PHSUBD instruction.\n"
51446"///\n"
51447"/// \\param __a\n"
51448"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
51449"/// horizontal differences between the values are stored in the lower bits of\n"
51450"/// the destination.\n"
51451"/// \\param __b\n"
51452"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
51453"/// horizontal differences between the values are stored in the upper bits of\n"
51454"/// the destination.\n"
51455"/// \\returns A 64-bit vector of [2 x i32] containing the horizontal differences\n"
51456"/// of both operands.\n"
51457"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51458"_mm_hsub_pi32(__m64 __a, __m64 __b)\n"
51459"{\n"
51460" return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);\n"
51461"}\n"
51462"\n"
51463"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51464"/// packed 128-bit vectors of [8 x i16]. Positive differences greater than\n"
51465"/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n"
51466"/// saturated to 0x8000.\n"
51467"///\n"
51468"/// \\headerfile <x86intrin.h>\n"
51469"///\n"
51470"/// This intrinsic corresponds to the \\c VPHSUBSW instruction.\n"
51471"///\n"
51472"/// \\param __a\n"
51473"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51474"/// horizontal differences between the values are stored in the lower bits of\n"
51475"/// the destination.\n"
51476"/// \\param __b\n"
51477"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51478"/// horizontal differences between the values are stored in the upper bits of\n"
51479"/// the destination.\n"
51480"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n"
51481"/// differences of both operands.\n"
51482"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51483"_mm_hsubs_epi16(__m128i __a, __m128i __b)\n"
51484"{\n"
51485" return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);\n"
51486"}\n"
51487"\n"
51488"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51489"/// packed 64-bit vectors of [4 x i16]. Positive differences greater than\n"
51490"/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n"
51491"/// saturated to 0x8000.\n"
51492"///\n"
51493"/// \\headerfile <x86intrin.h>\n"
51494"///\n"
51495"/// This intrinsic corresponds to the \\c PHSUBSW instruction.\n"
51496"///\n"
51497"/// \\param __a\n"
51498"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51499"/// horizontal differences between the values are stored in the lower bits of\n"
51500"/// the destination.\n"
51501"/// \\param __b\n"
51502"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51503"/// horizontal differences between the values are stored in the upper bits of\n"
51504"/// the destination.\n"
51505"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n"
51506"/// differences of both operands.\n"
51507"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51508"_mm_hsubs_pi16(__m64 __a, __m64 __b)\n"
51509"{\n"
51510" return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);\n"
51511"}\n"
51512"\n"
51513"/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n"
51514"/// values contained in the first source operand and packed 8-bit signed\n"
51515"/// integer values contained in the second source operand, adds pairs of\n"
51516"/// contiguous products with signed saturation, and writes the 16-bit sums to\n"
51517"/// the corresponding bits in the destination.\n"
51518"///\n"
51519"/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n"
51520"/// both operands are multiplied, and the sum of both results is written to\n"
51521"/// bits [15:0] of the destination.\n"
51522"///\n"
51523"/// \\headerfile <x86intrin.h>\n"
51524"///\n"
51525"/// This intrinsic corresponds to the \\c VPMADDUBSW instruction.\n"
51526"///\n"
51527"/// \\param __a\n"
51528"/// A 128-bit integer vector containing the first source operand.\n"
51529"/// \\param __b\n"
51530"/// A 128-bit integer vector containing the second source operand.\n"
51531"/// \\returns A 128-bit integer vector containing the sums of products of both\n"
51532"/// operands: \\n\n"
51533"/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n"
51534"/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n"
51535"/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n"
51536"/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7) \\n\n"
51537"/// \\a R4 := (\\a __a8 * \\a __b8) + (\\a __a9 * \\a __b9) \\n\n"
51538"/// \\a R5 := (\\a __a10 * \\a __b10) + (\\a __a11 * \\a __b11) \\n\n"
51539"/// \\a R6 := (\\a __a12 * \\a __b12) + (\\a __a13 * \\a __b13) \\n\n"
51540"/// \\a R7 := (\\a __a14 * \\a __b14) + (\\a __a15 * \\a __b15)\n"
51541"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51542"_mm_maddubs_epi16(__m128i __a, __m128i __b)\n"
51543"{\n"
51544" return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);\n"
51545"}\n"
51546"\n"
51547"/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n"
51548"/// values contained in the first source operand and packed 8-bit signed\n"
51549"/// integer values contained in the second source operand, adds pairs of\n"
51550"/// contiguous products with signed saturation, and writes the 16-bit sums to\n"
51551"/// the corresponding bits in the destination.\n"
51552"///\n"
51553"/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n"
51554"/// both operands are multiplied, and the sum of both results is written to\n"
51555"/// bits [15:0] of the destination.\n"
51556"///\n"
51557"/// \\headerfile <x86intrin.h>\n"
51558"///\n"
51559"/// This intrinsic corresponds to the \\c PMADDUBSW instruction.\n"
51560"///\n"
51561"/// \\param __a\n"
51562"/// A 64-bit integer vector containing the first source operand.\n"
51563"/// \\param __b\n"
51564"/// A 64-bit integer vector containing the second source operand.\n"
51565"/// \\returns A 64-bit integer vector containing the sums of products of both\n"
51566"/// operands: \\n\n"
51567"/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n"
51568"/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n"
51569"/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n"
51570"/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7)\n"
51571"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51572"_mm_maddubs_pi16(__m64 __a, __m64 __b)\n"
51573"{\n"
51574" return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);\n"
51575"}\n"
51576"\n"
51577"/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n"
51578"/// products to the 18 most significant bits by right-shifting, rounds the\n"
51579"/// truncated value by adding 1, and writes bits [16:1] to the destination.\n"
51580"///\n"
51581"/// \\headerfile <x86intrin.h>\n"
51582"///\n"
51583"/// This intrinsic corresponds to the \\c VPMULHRSW instruction.\n"
51584"///\n"
51585"/// \\param __a\n"
51586"/// A 128-bit vector of [8 x i16] containing one of the source operands.\n"
51587"/// \\param __b\n"
51588"/// A 128-bit vector of [8 x i16] containing one of the source operands.\n"
51589"/// \\returns A 128-bit vector of [8 x i16] containing the rounded and scaled\n"
51590"/// products of both operands.\n"
51591"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51592"_mm_mulhrs_epi16(__m128i __a, __m128i __b)\n"
51593"{\n"
51594" return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);\n"
51595"}\n"
51596"\n"
51597"/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n"
51598"/// products to the 18 most significant bits by right-shifting, rounds the\n"
51599"/// truncated value by adding 1, and writes bits [16:1] to the destination.\n"
51600"///\n"
51601"/// \\headerfile <x86intrin.h>\n"
51602"///\n"
51603"/// This intrinsic corresponds to the \\c PMULHRSW instruction.\n"
51604"///\n"
51605"/// \\param __a\n"
51606"/// A 64-bit vector of [4 x i16] containing one of the source operands.\n"
51607"/// \\param __b\n"
51608"/// A 64-bit vector of [4 x i16] containing one of the source operands.\n"
51609"/// \\returns A 64-bit vector of [4 x i16] containing the rounded and scaled\n"
51610"/// products of both operands.\n"
51611"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51612"_mm_mulhrs_pi16(__m64 __a, __m64 __b)\n"
51613"{\n"
51614" return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);\n"
51615"}\n"
51616"\n"
51617"/// Copies the 8-bit integers from a 128-bit integer vector to the\n"
51618"/// destination or clears 8-bit values in the destination, as specified by\n"
51619"/// the second source operand.\n"
51620"///\n"
51621"/// \\headerfile <x86intrin.h>\n"
51622"///\n"
51623"/// This intrinsic corresponds to the \\c VPSHUFB instruction.\n"
51624"///\n"
51625"/// \\param __a\n"
51626"/// A 128-bit integer vector containing the values to be copied.\n"
51627"/// \\param __b\n"
51628"/// A 128-bit integer vector containing control bytes corresponding to\n"
51629"/// positions in the destination:\n"
51630"/// Bit 7: \\n\n"
51631"/// 1: Clear the corresponding byte in the destination. \\n\n"
51632"/// 0: Copy the selected source byte to the corresponding byte in the\n"
51633"/// destination. \\n\n"
51634"/// Bits [6:4] Reserved. \\n\n"
51635"/// Bits [3:0] select the source byte to be copied.\n"
51636"/// \\returns A 128-bit integer vector containing the copied or cleared values.\n"
51637"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51638"_mm_shuffle_epi8(__m128i __a, __m128i __b)\n"
51639"{\n"
51640" return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);\n"
51641"}\n"
51642"\n"
51643"/// Copies the 8-bit integers from a 64-bit integer vector to the\n"
51644"/// destination or clears 8-bit values in the destination, as specified by\n"
51645"/// the second source operand.\n"
51646"///\n"
51647"/// \\headerfile <x86intrin.h>\n"
51648"///\n"
51649"/// This intrinsic corresponds to the \\c PSHUFB instruction.\n"
51650"///\n"
51651"/// \\param __a\n"
51652"/// A 64-bit integer vector containing the values to be copied.\n"
51653"/// \\param __b\n"
51654"/// A 64-bit integer vector containing control bytes corresponding to\n"
51655"/// positions in the destination:\n"
51656"/// Bit 7: \\n\n"
51657"/// 1: Clear the corresponding byte in the destination. \\n\n"
51658"/// 0: Copy the selected source byte to the corresponding byte in the\n"
51659"/// destination. \\n\n"
51660"/// Bits [3:0] select the source byte to be copied.\n"
51661"/// \\returns A 64-bit integer vector containing the copied or cleared values.\n"
51662"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51663"_mm_shuffle_pi8(__m64 __a, __m64 __b)\n"
51664"{\n"
51665" return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);\n"
51666"}\n"
51667"\n"
51668"/// For each 8-bit integer in the first source operand, perform one of\n"
51669"/// the following actions as specified by the second source operand.\n"
51670"///\n"
51671"/// If the byte in the second source is negative, calculate the two's\n"
51672"/// complement of the corresponding byte in the first source, and write that\n"
51673"/// value to the destination. If the byte in the second source is positive,\n"
51674"/// copy the corresponding byte from the first source to the destination. If\n"
51675"/// the byte in the second source is zero, clear the corresponding byte in\n"
51676"/// the destination.\n"
51677"///\n"
51678"/// \\headerfile <x86intrin.h>\n"
51679"///\n"
51680"/// This intrinsic corresponds to the \\c VPSIGNB instruction.\n"
51681"///\n"
51682"/// \\param __a\n"
51683"/// A 128-bit integer vector containing the values to be copied.\n"
51684"/// \\param __b\n"
51685"/// A 128-bit integer vector containing control bytes corresponding to\n"
51686"/// positions in the destination.\n"
51687"/// \\returns A 128-bit integer vector containing the resultant values.\n"
51688"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51689"_mm_sign_epi8(__m128i __a, __m128i __b)\n"
51690"{\n"
51691" return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);\n"
51692"}\n"
51693"\n"
51694"/// For each 16-bit integer in the first source operand, perform one of\n"
51695"/// the following actions as specified by the second source operand.\n"
51696"///\n"
51697"/// If the word in the second source is negative, calculate the two's\n"
51698"/// complement of the corresponding word in the first source, and write that\n"
51699"/// value to the destination. If the word in the second source is positive,\n"
51700"/// copy the corresponding word from the first source to the destination. If\n"
51701"/// the word in the second source is zero, clear the corresponding word in\n"
51702"/// the destination.\n"
51703"///\n"
51704"/// \\headerfile <x86intrin.h>\n"
51705"///\n"
51706"/// This intrinsic corresponds to the \\c VPSIGNW instruction.\n"
51707"///\n"
51708"/// \\param __a\n"
51709"/// A 128-bit integer vector containing the values to be copied.\n"
51710"/// \\param __b\n"
51711"/// A 128-bit integer vector containing control words corresponding to\n"
51712"/// positions in the destination.\n"
51713"/// \\returns A 128-bit integer vector containing the resultant values.\n"
51714"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51715"_mm_sign_epi16(__m128i __a, __m128i __b)\n"
51716"{\n"
51717" return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);\n"
51718"}\n"
51719"\n"
51720"/// For each 32-bit integer in the first source operand, perform one of\n"
51721"/// the following actions as specified by the second source operand.\n"
51722"///\n"
51723"/// If the doubleword in the second source is negative, calculate the two's\n"
51724"/// complement of the corresponding word in the first source, and write that\n"
51725"/// value to the destination. If the doubleword in the second source is\n"
51726"/// positive, copy the corresponding word from the first source to the\n"
51727"/// destination. If the doubleword in the second source is zero, clear the\n"
51728"/// corresponding word in the destination.\n"
51729"///\n"
51730"/// \\headerfile <x86intrin.h>\n"
51731"///\n"
51732"/// This intrinsic corresponds to the \\c VPSIGND instruction.\n"
51733"///\n"
51734"/// \\param __a\n"
51735"/// A 128-bit integer vector containing the values to be copied.\n"
51736"/// \\param __b\n"
51737"/// A 128-bit integer vector containing control doublewords corresponding to\n"
51738"/// positions in the destination.\n"
51739"/// \\returns A 128-bit integer vector containing the resultant values.\n"
51740"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51741"_mm_sign_epi32(__m128i __a, __m128i __b)\n"
51742"{\n"
51743" return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);\n"
51744"}\n"
51745"\n"
51746"/// For each 8-bit integer in the first source operand, perform one of\n"
51747"/// the following actions as specified by the second source operand.\n"
51748"///\n"
51749"/// If the byte in the second source is negative, calculate the two's\n"
51750"/// complement of the corresponding byte in the first source, and write that\n"
51751"/// value to the destination. If the byte in the second source is positive,\n"
51752"/// copy the corresponding byte from the first source to the destination. If\n"
51753"/// the byte in the second source is zero, clear the corresponding byte in\n"
51754"/// the destination.\n"
51755"///\n"
51756"/// \\headerfile <x86intrin.h>\n"
51757"///\n"
51758"/// This intrinsic corresponds to the \\c PSIGNB instruction.\n"
51759"///\n"
51760"/// \\param __a\n"
51761"/// A 64-bit integer vector containing the values to be copied.\n"
51762"/// \\param __b\n"
51763"/// A 64-bit integer vector containing control bytes corresponding to\n"
51764"/// positions in the destination.\n"
51765"/// \\returns A 64-bit integer vector containing the resultant values.\n"
51766"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51767"_mm_sign_pi8(__m64 __a, __m64 __b)\n"
51768"{\n"
51769" return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);\n"
51770"}\n"
51771"\n"
51772"/// For each 16-bit integer in the first source operand, perform one of\n"
51773"/// the following actions as specified by the second source operand.\n"
51774"///\n"
51775"/// If the word in the second source is negative, calculate the two's\n"
51776"/// complement of the corresponding word in the first source, and write that\n"
51777"/// value to the destination. If the word in the second source is positive,\n"
51778"/// copy the corresponding word from the first source to the destination. If\n"
51779"/// the word in the second source is zero, clear the corresponding word in\n"
51780"/// the destination.\n"
51781"///\n"
51782"/// \\headerfile <x86intrin.h>\n"
51783"///\n"
51784"/// This intrinsic corresponds to the \\c PSIGNW instruction.\n"
51785"///\n"
51786"/// \\param __a\n"
51787"/// A 64-bit integer vector containing the values to be copied.\n"
51788"/// \\param __b\n"
51789"/// A 64-bit integer vector containing control words corresponding to\n"
51790"/// positions in the destination.\n"
51791"/// \\returns A 64-bit integer vector containing the resultant values.\n"
51792"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51793"_mm_sign_pi16(__m64 __a, __m64 __b)\n"
51794"{\n"
51795" return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);\n"
51796"}\n"
51797"\n"
51798"/// For each 32-bit integer in the first source operand, perform one of\n"
51799"/// the following actions as specified by the second source operand.\n"
51800"///\n"
51801"/// If the doubleword in the second source is negative, calculate the two's\n"
51802"/// complement of the corresponding doubleword in the first source, and\n"
51803"/// write that value to the destination. If the doubleword in the second\n"
51804"/// source is positive, copy the corresponding doubleword from the first\n"
51805"/// source to the destination. If the doubleword in the second source is\n"
51806"/// zero, clear the corresponding doubleword in the destination.\n"
51807"///\n"
51808"/// \\headerfile <x86intrin.h>\n"
51809"///\n"
51810"/// This intrinsic corresponds to the \\c PSIGND instruction.\n"
51811"///\n"
51812"/// \\param __a\n"
51813"/// A 64-bit integer vector containing the values to be copied.\n"
51814"/// \\param __b\n"
51815"/// A 64-bit integer vector containing two control doublewords corresponding\n"
51816"/// to positions in the destination.\n"
51817"/// \\returns A 64-bit integer vector containing the resultant values.\n"
51818"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51819"_mm_sign_pi32(__m64 __a, __m64 __b)\n"
51820"{\n"
51821" return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);\n"
51822"}\n"
51823"\n"
51824"#undef __DEFAULT_FN_ATTRS\n"
51825"#undef __DEFAULT_FN_ATTRS_MMX\n"
51826"\n"
51827"#endif /* __TMMINTRIN_H */\n"
51828"" } ,
51829 { "/builtins/unwind.h" , "/*===---- unwind.h - Stack unwinding ----------------------------------------===\n"
51830" *\n"
51831" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
51832" * of this software and associated documentation files (the \"Software\"), to deal\n"
51833" * in the Software without restriction, including without limitation the rights\n"
51834" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
51835" * copies of the Software, and to permit persons to whom the Software is\n"
51836" * furnished to do so, subject to the following conditions:\n"
51837" *\n"
51838" * The above copyright notice and this permission notice shall be included in\n"
51839" * all copies or substantial portions of the Software.\n"
51840" *\n"
51841" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
51842" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
51843" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
51844" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
51845" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
51846" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
51847" * THE SOFTWARE.\n"
51848" *\n"
51849" *===-----------------------------------------------------------------------===\n"
51850" */\n"
51851"\n"
51852"/* See \"Data Definitions for libgcc_s\" in the Linux Standard Base.*/\n"
51853"\n"
51854"#ifndef __CLANG_UNWIND_H\n"
51855"#define __CLANG_UNWIND_H\n"
51856"\n"
51857"#if defined(__APPLE__) && __has_include_next(<unwind.h>)\n"
51858"/* Darwin (from 11.x on) provide an unwind.h. If that's available,\n"
51859" * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,\n"
51860" * so define that around the include.*/\n"
51861"# ifndef _GNU_SOURCE\n"
51862"# define _SHOULD_UNDEFINE_GNU_SOURCE\n"
51863"# define _GNU_SOURCE\n"
51864"# endif\n"
51865"// libunwind's unwind.h reflects the current visibility. However, Mozilla\n"
51866"// builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the\n"
51867"// visibility to default and export its contents. gcc also allows users to\n"
51868"// override its override by #defining HIDE_EXPORTS (but note, this only obeys\n"
51869"// the user's -fvisibility setting; it doesn't hide any exports on its own). We\n"
51870"// imitate gcc's header here:\n"
51871"# ifdef HIDE_EXPORTS\n"
51872"# include_next <unwind.h>\n"
51873"# else\n"
51874"# pragma GCC visibility push(default)\n"
51875"# include_next <unwind.h>\n"
51876"# pragma GCC visibility pop\n"
51877"# endif\n"
51878"# ifdef _SHOULD_UNDEFINE_GNU_SOURCE\n"
51879"# undef _GNU_SOURCE\n"
51880"# undef _SHOULD_UNDEFINE_GNU_SOURCE\n"
51881"# endif\n"
51882"#else\n"
51883"\n"
51884"#include <stdint.h>\n"
51885"\n"
51886"#ifdef __cplusplus\n"
51887"extern \"C\" {\n"
51888"#endif\n"
51889"\n"
51890"/* It is a bit strange for a header to play with the visibility of the\n"
51891" symbols it declares, but this matches gcc's behavior and some programs\n"
51892" depend on it */\n"
51893"#ifndef HIDE_EXPORTS\n"
51894"#pragma GCC visibility push(default)\n"
51895"#endif\n"
51896"\n"
51897"typedef uintptr_t _Unwind_Word;\n"
51898"typedef intptr_t _Unwind_Sword;\n"
51899"typedef uintptr_t _Unwind_Ptr;\n"
51900"typedef uintptr_t _Unwind_Internal_Ptr;\n"
51901"typedef uint64_t _Unwind_Exception_Class;\n"
51902"\n"
51903"typedef intptr_t _sleb128_t;\n"
51904"typedef uintptr_t _uleb128_t;\n"
51905"\n"
51906"struct _Unwind_Context;\n"
51907"#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n"
51908"struct _Unwind_Control_Block;\n"
51909"typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */\n"
51910"#else\n"
51911"struct _Unwind_Exception;\n"
51912"typedef struct _Unwind_Exception _Unwind_Exception;\n"
51913"#endif\n"
51914"typedef enum {\n"
51915" _URC_NO_REASON = 0,\n"
51916"#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n"
51917" !defined(__ARM_DWARF_EH__)\n"
51918" _URC_OK = 0, /* used by ARM EHABI */\n"
51919"#endif\n"
51920" _URC_FOREIGN_EXCEPTION_CAUGHT = 1,\n"
51921"\n"
51922" _URC_FATAL_PHASE2_ERROR = 2,\n"
51923" _URC_FATAL_PHASE1_ERROR = 3,\n"
51924" _URC_NORMAL_STOP = 4,\n"
51925"\n"
51926" _URC_END_OF_STACK = 5,\n"
51927" _URC_HANDLER_FOUND = 6,\n"
51928" _URC_INSTALL_CONTEXT = 7,\n"
51929" _URC_CONTINUE_UNWIND = 8,\n"
51930"#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n"
51931" !defined(__ARM_DWARF_EH__)\n"
51932" _URC_FAILURE = 9 /* used by ARM EHABI */\n"
51933"#endif\n"
51934"} _Unwind_Reason_Code;\n"
51935"\n"
51936"typedef enum {\n"
51937" _UA_SEARCH_PHASE = 1,\n"
51938" _UA_CLEANUP_PHASE = 2,\n"
51939"\n"
51940" _UA_HANDLER_FRAME = 4,\n"
51941" _UA_FORCE_UNWIND = 8,\n"
51942" _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */\n"
51943"} _Unwind_Action;\n"
51944"\n"
51945"typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,\n"
51946" _Unwind_Exception *);\n"
51947"\n"
51948"#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n"
51949"typedef struct _Unwind_Control_Block _Unwind_Control_Block;\n"
51950"typedef uint32_t _Unwind_EHT_Header;\n"
51951"\n"
51952"struct _Unwind_Control_Block {\n"
51953" uint64_t exception_class;\n"
51954" void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);\n"
51955" /* unwinder cache (private fields for the unwinder's use) */\n"
51956" struct {\n"
51957" uint32_t reserved1; /* forced unwind stop function, 0 if not forced */\n"
51958" uint32_t reserved2; /* personality routine */\n"
51959" uint32_t reserved3; /* callsite */\n"
51960" uint32_t reserved4; /* forced unwind stop argument */\n"
51961" uint32_t reserved5;\n"
51962" } unwinder_cache;\n"
51963" /* propagation barrier cache (valid after phase 1) */\n"
51964" struct {\n"
51965" uint32_t sp;\n"
51966" uint32_t bitpattern[5];\n"
51967" } barrier_cache;\n"
51968" /* cleanup cache (preserved over cleanup) */\n"
51969" struct {\n"
51970" uint32_t bitpattern[4];\n"
51971" } cleanup_cache;\n"
51972" /* personality cache (for personality's benefit) */\n"
51973" struct {\n"
51974" uint32_t fnstart; /* function start address */\n"
51975" _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */\n"
51976" uint32_t additional; /* additional data */\n"
51977" uint32_t reserved1;\n"
51978" } pr_cache;\n"
51979" long long int : 0; /* force alignment of next item to 8-byte boundary */\n"
51980"} __attribute__((__aligned__(8)));\n"
51981"#else\n"
51982"struct _Unwind_Exception {\n"
51983" _Unwind_Exception_Class exception_class;\n"
51984" _Unwind_Exception_Cleanup_Fn exception_cleanup;\n"
51985"#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)\n"
51986" _Unwind_Word private_[6];\n"
51987"#else\n"
51988" _Unwind_Word private_1;\n"
51989" _Unwind_Word private_2;\n"
51990"#endif\n"
51991" /* The Itanium ABI requires that _Unwind_Exception objects are \"double-word\n"
51992" * aligned\". GCC has interpreted this to mean \"use the maximum useful\n"
51993" * alignment for the target\"; so do we. */\n"
51994"} __attribute__((__aligned__));\n"
51995"#endif\n"
51996"\n"
51997"typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,\n"
51998" _Unwind_Exception_Class,\n"
51999" _Unwind_Exception *,\n"
52000" struct _Unwind_Context *,\n"
52001" void *);\n"
52002"\n"
52003"typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,\n"
52004" _Unwind_Exception_Class,\n"
52005" _Unwind_Exception *,\n"
52006" struct _Unwind_Context *);\n"
52007"typedef _Unwind_Personality_Fn __personality_routine;\n"
52008"\n"
52009"typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,\n"
52010" void *);\n"
52011"\n"
52012"#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n"
52013"typedef enum {\n"
52014" _UVRSC_CORE = 0, /* integer register */\n"
52015" _UVRSC_VFP = 1, /* vfp */\n"
52016" _UVRSC_WMMXD = 3, /* Intel WMMX data register */\n"
52017" _UVRSC_WMMXC = 4 /* Intel WMMX control register */\n"
52018"} _Unwind_VRS_RegClass;\n"
52019"\n"
52020"typedef enum {\n"
52021" _UVRSD_UINT32 = 0,\n"
52022" _UVRSD_VFPX = 1,\n"
52023" _UVRSD_UINT64 = 3,\n"
52024" _UVRSD_FLOAT = 4,\n"
52025" _UVRSD_DOUBLE = 5\n"
52026"} _Unwind_VRS_DataRepresentation;\n"
52027"\n"
52028"typedef enum {\n"
52029" _UVRSR_OK = 0,\n"
52030" _UVRSR_NOT_IMPLEMENTED = 1,\n"
52031" _UVRSR_FAILED = 2\n"
52032"} _Unwind_VRS_Result;\n"
52033"\n"
52034"typedef uint32_t _Unwind_State;\n"
52035"#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)\n"
52036"#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)\n"
52037"#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)\n"
52038"#define _US_ACTION_MASK ((_Unwind_State)3)\n"
52039"#define _US_FORCE_UNWIND ((_Unwind_State)8)\n"
52040"\n"
52041"_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,\n"
52042" _Unwind_VRS_RegClass __regclass,\n"
52043" uint32_t __regno,\n"
52044" _Unwind_VRS_DataRepresentation __representation,\n"
52045" void *__valuep);\n"
52046"\n"
52047"_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context,\n"
52048" _Unwind_VRS_RegClass __regclass,\n"
52049" uint32_t __regno,\n"
52050" _Unwind_VRS_DataRepresentation __representation,\n"
52051" void *__valuep);\n"
52052"\n"
52053"static __inline__\n"
52054"_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) {\n"
52055" _Unwind_Word __value;\n"
52056" _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n"
52057" return __value;\n"
52058"}\n"
52059"\n"
52060"static __inline__\n"
52061"void _Unwind_SetGR(struct _Unwind_Context *__context, int __index,\n"
52062" _Unwind_Word __value) {\n"
52063" _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n"
52064"}\n"
52065"\n"
52066"static __inline__\n"
52067"_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) {\n"
52068" _Unwind_Word __ip = _Unwind_GetGR(__context, 15);\n"
52069" return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */\n"
52070"}\n"
52071"\n"
52072"static __inline__\n"
52073"void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) {\n"
52074" _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1;\n"
52075" _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit);\n"
52076"}\n"
52077"#else\n"
52078"_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int);\n"
52079"void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word);\n"
52080"\n"
52081"_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *);\n"
52082"void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word);\n"
52083"#endif\n"
52084"\n"
52085"\n"
52086"_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *);\n"
52087"\n"
52088"_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);\n"
52089"\n"
52090"_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *);\n"
52091"\n"
52092"void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);\n"
52093"\n"
52094"_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);\n"
52095"\n"
52096"/* DWARF EH functions; currently not available on Darwin/ARM */\n"
52097"#if !defined(__APPLE__) || !defined(__arm__)\n"
52098"_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);\n"
52099"_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,\n"
52100" void *);\n"
52101"void _Unwind_DeleteException(_Unwind_Exception *);\n"
52102"void _Unwind_Resume(_Unwind_Exception *);\n"
52103"_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);\n"
52104"\n"
52105"#endif\n"
52106"\n"
52107"_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);\n"
52108"\n"
52109"/* setjmp(3)/longjmp(3) stuff */\n"
52110"typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;\n"
52111"\n"
52112"void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);\n"
52113"void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);\n"
52114"_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);\n"
52115"_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,\n"
52116" _Unwind_Stop_Fn, void *);\n"
52117"void _Unwind_SjLj_Resume(_Unwind_Exception *);\n"
52118"_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);\n"
52119"\n"
52120"void *_Unwind_FindEnclosingFunction(void *);\n"
52121"\n"
52122"#ifdef __APPLE__\n"
52123"\n"
52124"_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)\n"
52125" __attribute__((__unavailable__));\n"
52126"_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)\n"
52127" __attribute__((__unavailable__));\n"
52128"\n"
52129"/* Darwin-specific functions */\n"
52130"void __register_frame(const void *);\n"
52131"void __deregister_frame(const void *);\n"
52132"\n"
52133"struct dwarf_eh_bases {\n"
52134" uintptr_t tbase;\n"
52135" uintptr_t dbase;\n"
52136" uintptr_t func;\n"
52137"};\n"
52138"void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);\n"
52139"\n"
52140"void __register_frame_info_bases(const void *, void *, void *, void *)\n"
52141" __attribute__((__unavailable__));\n"
52142"void __register_frame_info(const void *, void *) __attribute__((__unavailable__));\n"
52143"void __register_frame_info_table_bases(const void *, void*, void *, void *)\n"
52144" __attribute__((__unavailable__));\n"
52145"void __register_frame_info_table(const void *, void *)\n"
52146" __attribute__((__unavailable__));\n"
52147"void __register_frame_table(const void *) __attribute__((__unavailable__));\n"
52148"void __deregister_frame_info(const void *) __attribute__((__unavailable__));\n"
52149"void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__));\n"
52150"\n"
52151"#else\n"
52152"\n"
52153"_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *);\n"
52154"_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *);\n"
52155"\n"
52156"#endif\n"
52157"\n"
52158"\n"
52159"#ifndef HIDE_EXPORTS\n"
52160"#pragma GCC visibility pop\n"
52161"#endif\n"
52162"\n"
52163"#ifdef __cplusplus\n"
52164"}\n"
52165"#endif\n"
52166"\n"
52167"#endif\n"
52168"\n"
52169"#endif /* __CLANG_UNWIND_H */\n"
52170"" } ,
52171 { "/builtins/vadefs.h" , "/* ===-------- vadefs.h ---------------------------------------------------===\n"
52172" *\n"
52173" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52174" * of this software and associated documentation files (the \"Software\"), to deal\n"
52175" * in the Software without restriction, including without limitation the rights\n"
52176" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52177" * copies of the Software, and to permit persons to whom the Software is\n"
52178" * furnished to do so, subject to the following conditions:\n"
52179" *\n"
52180" * The above copyright notice and this permission notice shall be included in\n"
52181" * all copies or substantial portions of the Software.\n"
52182" *\n"
52183" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52184" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52185" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52186" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52187" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52188" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52189" * THE SOFTWARE.\n"
52190" *\n"
52191" *===-----------------------------------------------------------------------===\n"
52192" */\n"
52193"\n"
52194"/* Only include this if we are aiming for MSVC compatibility. */\n"
52195"#ifndef _MSC_VER\n"
52196"#include_next <vadefs.h>\n"
52197"#else\n"
52198"\n"
52199"#ifndef __clang_vadefs_h\n"
52200"#define __clang_vadefs_h\n"
52201"\n"
52202"#include_next <vadefs.h>\n"
52203"\n"
52204"/* Override macros from vadefs.h with definitions that work with Clang. */\n"
52205"#ifdef _crt_va_start\n"
52206"#undef _crt_va_start\n"
52207"#define _crt_va_start(ap, param) __builtin_va_start(ap, param)\n"
52208"#endif\n"
52209"#ifdef _crt_va_end\n"
52210"#undef _crt_va_end\n"
52211"#define _crt_va_end(ap) __builtin_va_end(ap)\n"
52212"#endif\n"
52213"#ifdef _crt_va_arg\n"
52214"#undef _crt_va_arg\n"
52215"#define _crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n"
52216"#endif\n"
52217"\n"
52218"/* VS 2015 switched to double underscore names, which is an improvement, but now\n"
52219" * we have to intercept those names too.\n"
52220" */\n"
52221"#ifdef __crt_va_start\n"
52222"#undef __crt_va_start\n"
52223"#define __crt_va_start(ap, param) __builtin_va_start(ap, param)\n"
52224"#endif\n"
52225"#ifdef __crt_va_end\n"
52226"#undef __crt_va_end\n"
52227"#define __crt_va_end(ap) __builtin_va_end(ap)\n"
52228"#endif\n"
52229"#ifdef __crt_va_arg\n"
52230"#undef __crt_va_arg\n"
52231"#define __crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n"
52232"#endif\n"
52233"\n"
52234"#endif\n"
52235"#endif\n"
52236"" } ,
52237 { "/builtins/vaesintrin.h" , "/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===\n"
52238" *\n"
52239" *\n"
52240" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52241" * of this software and associated documentation files (the \"Software\"), to deal\n"
52242" * in the Software without restriction, including without limitation the rights\n"
52243" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52244" * copies of the Software, and to permit persons to whom the Software is\n"
52245" * furnished to do so, subject to the following conditions:\n"
52246" *\n"
52247" * The above copyright notice and this permission notice shall be included in\n"
52248" * all copies or substantial portions of the Software.\n"
52249" *\n"
52250" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52251" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52252" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52253" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52254" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52255" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52256" * THE SOFTWARE.\n"
52257" *\n"
52258" *===-----------------------------------------------------------------------===\n"
52259" */\n"
52260"#ifndef __IMMINTRIN_H\n"
52261"#error \"Never use <vaesintrin.h> directly; include <immintrin.h> instead.\"\n"
52262"#endif\n"
52263"\n"
52264"#ifndef __VAESINTRIN_H\n"
52265"#define __VAESINTRIN_H\n"
52266"\n"
52267"/* Default attributes for YMM forms. */\n"
52268"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"vaes\"), __min_vector_width__(256)))\n"
52269"\n"
52270"/* Default attributes for ZMM forms. */\n"
52271"#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__(\"avx512f,vaes\"), __min_vector_width__(512)))\n"
52272"\n"
52273"\n"
52274"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
52275" _mm256_aesenc_epi128(__m256i __A, __m256i __B)\n"
52276"{\n"
52277" return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,\n"
52278" (__v4di) __B);\n"
52279"}\n"
52280"\n"
52281"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
52282" _mm512_aesenc_epi128(__m512i __A, __m512i __B)\n"
52283"{\n"
52284" return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,\n"
52285" (__v8di) __B);\n"
52286"}\n"
52287"\n"
52288"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
52289" _mm256_aesdec_epi128(__m256i __A, __m256i __B)\n"
52290"{\n"
52291" return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,\n"
52292" (__v4di) __B);\n"
52293"}\n"
52294"\n"
52295"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
52296" _mm512_aesdec_epi128(__m512i __A, __m512i __B)\n"
52297"{\n"
52298" return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,\n"
52299" (__v8di) __B);\n"
52300"}\n"
52301"\n"
52302"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
52303" _mm256_aesenclast_epi128(__m256i __A, __m256i __B)\n"
52304"{\n"
52305" return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,\n"
52306" (__v4di) __B);\n"
52307"}\n"
52308"\n"
52309"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
52310" _mm512_aesenclast_epi128(__m512i __A, __m512i __B)\n"
52311"{\n"
52312" return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,\n"
52313" (__v8di) __B);\n"
52314"}\n"
52315"\n"
52316"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
52317" _mm256_aesdeclast_epi128(__m256i __A, __m256i __B)\n"
52318"{\n"
52319" return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,\n"
52320" (__v4di) __B);\n"
52321"}\n"
52322"\n"
52323"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
52324" _mm512_aesdeclast_epi128(__m512i __A, __m512i __B)\n"
52325"{\n"
52326" return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,\n"
52327" (__v8di) __B);\n"
52328"}\n"
52329"\n"
52330"\n"
52331"#undef __DEFAULT_FN_ATTRS\n"
52332"#undef __DEFAULT_FN_ATTRS_F\n"
52333"\n"
52334"#endif\n"
52335"" } ,
52336 { "/builtins/varargs.h" , "/*===---- varargs.h - Variable argument handling -------------------------------------===\n"
52337"*\n"
52338"* Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52339"* of this software and associated documentation files (the \"Software\"), to deal\n"
52340"* in the Software without restriction, including without limitation the rights\n"
52341"* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52342"* copies of the Software, and to permit persons to whom the Software is\n"
52343"* furnished to do so, subject to the following conditions:\n"
52344"*\n"
52345"* The above copyright notice and this permission notice shall be included in\n"
52346"* all copies or substantial portions of the Software.\n"
52347"*\n"
52348"* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52349"* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52350"* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52351"* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52352"* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52353"* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52354"* THE SOFTWARE.\n"
52355"*\n"
52356"*===-----------------------------------------------------------------------===\n"
52357"*/\n"
52358"#ifndef __VARARGS_H\n"
52359"#define __VARARGS_H\n"
52360" #error \"Please use <stdarg.h> instead of <varargs.h>\"\n"
52361"#endif\n"
52362"" } ,
52363 { "/builtins/vpclmulqdqintrin.h" , "/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===\n"
52364" *\n"
52365" *\n"
52366" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52367" * of this software and associated documentation files (the \"Software\"), to deal\n"
52368" * in the Software without restriction, including without limitation the rights\n"
52369" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52370" * copies of the Software, and to permit persons to whom the Software is\n"
52371" * furnished to do so, subject to the following conditions:\n"
52372" *\n"
52373" * The above copyright notice and this permission notice shall be included in\n"
52374" * all copies or substantial portions of the Software.\n"
52375" *\n"
52376" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52377" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52378" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52379" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52380" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52381" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52382" * THE SOFTWARE.\n"
52383" *\n"
52384" *===-----------------------------------------------------------------------===\n"
52385" */\n"
52386"#ifndef __IMMINTRIN_H\n"
52387"#error \"Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead.\"\n"
52388"#endif\n"
52389"\n"
52390"#ifndef __VPCLMULQDQINTRIN_H\n"
52391"#define __VPCLMULQDQINTRIN_H\n"
52392"\n"
52393"#define _mm256_clmulepi64_epi128(A, B, I) \\\n"
52394" (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \\\n"
52395" (__v4di)(__m256i)(B), \\\n"
52396" (char)(I))\n"
52397"\n"
52398"#define _mm512_clmulepi64_epi128(A, B, I) \\\n"
52399" (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \\\n"
52400" (__v8di)(__m512i)(B), \\\n"
52401" (char)(I))\n"
52402"\n"
52403"#endif /* __VPCLMULQDQINTRIN_H */\n"
52404"\n"
52405"" } ,
52406 { "/builtins/waitpkgintrin.h" , "/*===----------------------- waitpkgintrin.h - WAITPKG --------------------===\n"
52407" *\n"
52408" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52409" * of this software and associated documentation files (the \"Software\"), to deal\n"
52410" * in the Software without restriction, including without limitation the rights\n"
52411" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52412" * copies of the Software, and to permit persons to whom the Software is\n"
52413" * furnished to do so, subject to the following conditions:\n"
52414" *\n"
52415" * The above copyright notice and this permission notice shall be included in\n"
52416" * all copies or substantial portions of the Software.\n"
52417" *\n"
52418" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52419" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52420" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52421" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52422" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52423" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52424" * THE SOFTWARE.\n"
52425" *\n"
52426" *===-----------------------------------------------------------------------===\n"
52427" */\n"
52428"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
52429"#error \"Never use <waitpkgintrin.h> directly; include <x86intrin.h> instead.\"\n"
52430"#endif\n"
52431"\n"
52432"#ifndef __WAITPKGINTRIN_H\n"
52433"#define __WAITPKGINTRIN_H\n"
52434"\n"
52435"/* Define the default attributes for the functions in this file. */\n"
52436"#define __DEFAULT_FN_ATTRS \\\n"
52437" __attribute__((__always_inline__, __nodebug__, __target__(\"waitpkg\")))\n"
52438"\n"
52439"static __inline__ void __DEFAULT_FN_ATTRS\n"
52440"_umonitor (void * __address)\n"
52441"{\n"
52442" __builtin_ia32_umonitor (__address);\n"
52443"}\n"
52444"\n"
52445"static __inline__ unsigned char __DEFAULT_FN_ATTRS\n"
52446"_umwait (unsigned int __control, unsigned long long __counter)\n"
52447"{\n"
52448" return __builtin_ia32_umwait (__control,\n"
52449" (unsigned int)(__counter >> 32), (unsigned int)__counter);\n"
52450"}\n"
52451"\n"
52452"static __inline__ unsigned char __DEFAULT_FN_ATTRS\n"
52453"_tpause (unsigned int __control, unsigned long long __counter)\n"
52454"{\n"
52455" return __builtin_ia32_tpause (__control,\n"
52456" (unsigned int)(__counter >> 32), (unsigned int)__counter);\n"
52457"}\n"
52458"\n"
52459"#undef __DEFAULT_FN_ATTRS\n"
52460"\n"
52461"#endif /* __WAITPKGINTRIN_H */\n"
52462"" } ,
52463 { "/builtins/wbnoinvdintrin.h" , "/*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------===\n"
52464" *\n"
52465" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52466" * of this software and associated documentation files (the \"Software\"), to deal\n"
52467" * in the Software without restriction, including without limitation the rights\n"
52468" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52469" * copies of the Software, and to permit persons to whom the Software is\n"
52470" * furnished to do so, subject to the following conditions:\n"
52471" *\n"
52472" * The above copyright notice and this permission notice shall be included in\n"
52473" * all copies or substantial portions of the Software.\n"
52474" *\n"
52475" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52476" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52477" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52478" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52479" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52480" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52481" * THE SOFTWARE.\n"
52482" *\n"
52483" *===-----------------------------------------------------------------------===\n"
52484" */\n"
52485"\n"
52486"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
52487"#error \"Never use <wbnoinvdintrin.h> directly; include <x86intrin.h> instead.\"\n"
52488"#endif\n"
52489"\n"
52490"#ifndef __WBNOINVDINTRIN_H\n"
52491"#define __WBNOINVDINTRIN_H\n"
52492"\n"
52493"static __inline__ void\n"
52494" __attribute__((__always_inline__, __nodebug__, __target__(\"wbnoinvd\")))\n"
52495"_wbnoinvd (void)\n"
52496"{\n"
52497" __builtin_ia32_wbnoinvd ();\n"
52498"}\n"
52499"\n"
52500"#endif /* __WBNOINVDINTRIN_H */\n"
52501"" } ,
52502 { "/builtins/wmmintrin.h" , "/*===---- wmmintrin.h - AES intrinsics ------------------------------------===\n"
52503" *\n"
52504" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52505" * of this software and associated documentation files (the \"Software\"), to deal\n"
52506" * in the Software without restriction, including without limitation the rights\n"
52507" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52508" * copies of the Software, and to permit persons to whom the Software is\n"
52509" * furnished to do so, subject to the following conditions:\n"
52510" *\n"
52511" * The above copyright notice and this permission notice shall be included in\n"
52512" * all copies or substantial portions of the Software.\n"
52513" *\n"
52514" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52515" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52516" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52517" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52518" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52519" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52520" * THE SOFTWARE.\n"
52521" *\n"
52522" *===-----------------------------------------------------------------------===\n"
52523" */\n"
52524"\n"
52525"#ifndef __WMMINTRIN_H\n"
52526"#define __WMMINTRIN_H\n"
52527"\n"
52528"#include <emmintrin.h>\n"
52529"\n"
52530"#include <__wmmintrin_aes.h>\n"
52531"\n"
52532"#include <__wmmintrin_pclmul.h>\n"
52533"\n"
52534"#endif /* __WMMINTRIN_H */\n"
52535"" } ,
52536 { "/builtins/x86intrin.h" , "/*===---- x86intrin.h - X86 intrinsics -------------------------------------===\n"
52537" *\n"
52538" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52539" * of this software and associated documentation files (the \"Software\"), to deal\n"
52540" * in the Software without restriction, including without limitation the rights\n"
52541" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52542" * copies of the Software, and to permit persons to whom the Software is\n"
52543" * furnished to do so, subject to the following conditions:\n"
52544" *\n"
52545" * The above copyright notice and this permission notice shall be included in\n"
52546" * all copies or substantial portions of the Software.\n"
52547" *\n"
52548" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52549" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52550" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52551" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52552" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52553" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52554" * THE SOFTWARE.\n"
52555" *\n"
52556" *===-----------------------------------------------------------------------===\n"
52557" */\n"
52558"\n"
52559"#ifndef __X86INTRIN_H\n"
52560"#define __X86INTRIN_H\n"
52561"\n"
52562"#include <ia32intrin.h>\n"
52563"\n"
52564"#include <immintrin.h>\n"
52565"\n"
52566"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__3dNOW__)\n"
52567"#include <mm3dnow.h>\n"
52568"#endif\n"
52569"\n"
52570"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__)\n"
52571"#include <prfchwintrin.h>\n"
52572"#endif\n"
52573"\n"
52574"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE4A__)\n"
52575"#include <ammintrin.h>\n"
52576"#endif\n"
52577"\n"
52578"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA4__)\n"
52579"#include <fma4intrin.h>\n"
52580"#endif\n"
52581"\n"
52582"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XOP__)\n"
52583"#include <xopintrin.h>\n"
52584"#endif\n"
52585"\n"
52586"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__TBM__)\n"
52587"#include <tbmintrin.h>\n"
52588"#endif\n"
52589"\n"
52590"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__)\n"
52591"#include <lwpintrin.h>\n"
52592"#endif\n"
52593"\n"
52594"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__)\n"
52595"#include <mwaitxintrin.h>\n"
52596"#endif\n"
52597"\n"
52598"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)\n"
52599"#include <clzerointrin.h>\n"
52600"#endif\n"
52601"\n"
52602"\n"
52603"#endif /* __X86INTRIN_H */\n"
52604"" } ,
52605 { "/builtins/xmmintrin.h" , "/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===\n"
52606" *\n"
52607" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52608" * of this software and associated documentation files (the \"Software\"), to deal\n"
52609" * in the Software without restriction, including without limitation the rights\n"
52610" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52611" * copies of the Software, and to permit persons to whom the Software is\n"
52612" * furnished to do so, subject to the following conditions:\n"
52613" *\n"
52614" * The above copyright notice and this permission notice shall be included in\n"
52615" * all copies or substantial portions of the Software.\n"
52616" *\n"
52617" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52618" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52619" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52620" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52621" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52622" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52623" * THE SOFTWARE.\n"
52624" *\n"
52625" *===-----------------------------------------------------------------------===\n"
52626" */\n"
52627"\n"
52628"#ifndef __XMMINTRIN_H\n"
52629"#define __XMMINTRIN_H\n"
52630"\n"
52631"#include <mmintrin.h>\n"
52632"\n"
52633"typedef int __v4si __attribute__((__vector_size__(16)));\n"
52634"typedef float __v4sf __attribute__((__vector_size__(16)));\n"
52635"typedef float __m128 __attribute__((__vector_size__(16)));\n"
52636"\n"
52637"/* Unsigned types */\n"
52638"typedef unsigned int __v4su __attribute__((__vector_size__(16)));\n"
52639"\n"
52640"/* This header should only be included in a hosted environment as it depends on\n"
52641" * a standard library to provide allocation routines. */\n"
52642"#if __STDC_HOSTED__\n"
52643"#include <mm_malloc.h>\n"
52644"#endif\n"
52645"\n"
52646"/* Define the default attributes for the functions in this file. */\n"
52647"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse\"), __min_vector_width__(128)))\n"
52648"#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse\"), __min_vector_width__(64)))\n"
52649"\n"
52650"/// Adds the 32-bit float values in the low-order bits of the operands.\n"
52651"///\n"
52652"/// \\headerfile <x86intrin.h>\n"
52653"///\n"
52654"/// This intrinsic corresponds to the <c> VADDSS / ADDSS </c> instructions.\n"
52655"///\n"
52656"/// \\param __a\n"
52657"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52658"/// The lower 32 bits of this operand are used in the calculation.\n"
52659"/// \\param __b\n"
52660"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52661"/// The lower 32 bits of this operand are used in the calculation.\n"
52662"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum\n"
52663"/// of the lower 32 bits of both operands. The upper 96 bits are copied from\n"
52664"/// the upper 96 bits of the first source operand.\n"
52665"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52666"_mm_add_ss(__m128 __a, __m128 __b)\n"
52667"{\n"
52668" __a[0] += __b[0];\n"
52669" return __a;\n"
52670"}\n"
52671"\n"
52672"/// Adds two 128-bit vectors of [4 x float], and returns the results of\n"
52673"/// the addition.\n"
52674"///\n"
52675"/// \\headerfile <x86intrin.h>\n"
52676"///\n"
52677"/// This intrinsic corresponds to the <c> VADDPS / ADDPS </c> instructions.\n"
52678"///\n"
52679"/// \\param __a\n"
52680"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52681"/// \\param __b\n"
52682"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52683"/// \\returns A 128-bit vector of [4 x float] containing the sums of both\n"
52684"/// operands.\n"
52685"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52686"_mm_add_ps(__m128 __a, __m128 __b)\n"
52687"{\n"
52688" return (__m128)((__v4sf)__a + (__v4sf)__b);\n"
52689"}\n"
52690"\n"
52691"/// Subtracts the 32-bit float value in the low-order bits of the second\n"
52692"/// operand from the corresponding value in the first operand.\n"
52693"///\n"
52694"/// \\headerfile <x86intrin.h>\n"
52695"///\n"
52696"/// This intrinsic corresponds to the <c> VSUBSS / SUBSS </c> instructions.\n"
52697"///\n"
52698"/// \\param __a\n"
52699"/// A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits\n"
52700"/// of this operand are used in the calculation.\n"
52701"/// \\param __b\n"
52702"/// A 128-bit vector of [4 x float] containing the subtrahend. The lower 32\n"
52703"/// bits of this operand are used in the calculation.\n"
52704"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
52705"/// difference of the lower 32 bits of both operands. The upper 96 bits are\n"
52706"/// copied from the upper 96 bits of the first source operand.\n"
52707"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52708"_mm_sub_ss(__m128 __a, __m128 __b)\n"
52709"{\n"
52710" __a[0] -= __b[0];\n"
52711" return __a;\n"
52712"}\n"
52713"\n"
52714"/// Subtracts each of the values of the second operand from the first\n"
52715"/// operand, both of which are 128-bit vectors of [4 x float] and returns\n"
52716"/// the results of the subtraction.\n"
52717"///\n"
52718"/// \\headerfile <x86intrin.h>\n"
52719"///\n"
52720"/// This intrinsic corresponds to the <c> VSUBPS / SUBPS </c> instructions.\n"
52721"///\n"
52722"/// \\param __a\n"
52723"/// A 128-bit vector of [4 x float] containing the minuend.\n"
52724"/// \\param __b\n"
52725"/// A 128-bit vector of [4 x float] containing the subtrahend.\n"
52726"/// \\returns A 128-bit vector of [4 x float] containing the differences between\n"
52727"/// both operands.\n"
52728"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52729"_mm_sub_ps(__m128 __a, __m128 __b)\n"
52730"{\n"
52731" return (__m128)((__v4sf)__a - (__v4sf)__b);\n"
52732"}\n"
52733"\n"
52734"/// Multiplies two 32-bit float values in the low-order bits of the\n"
52735"/// operands.\n"
52736"///\n"
52737"/// \\headerfile <x86intrin.h>\n"
52738"///\n"
52739"/// This intrinsic corresponds to the <c> VMULSS / MULSS </c> instructions.\n"
52740"///\n"
52741"/// \\param __a\n"
52742"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52743"/// The lower 32 bits of this operand are used in the calculation.\n"
52744"/// \\param __b\n"
52745"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52746"/// The lower 32 bits of this operand are used in the calculation.\n"
52747"/// \\returns A 128-bit vector of [4 x float] containing the product of the lower\n"
52748"/// 32 bits of both operands. The upper 96 bits are copied from the upper 96\n"
52749"/// bits of the first source operand.\n"
52750"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52751"_mm_mul_ss(__m128 __a, __m128 __b)\n"
52752"{\n"
52753" __a[0] *= __b[0];\n"
52754" return __a;\n"
52755"}\n"
52756"\n"
52757"/// Multiplies two 128-bit vectors of [4 x float] and returns the\n"
52758"/// results of the multiplication.\n"
52759"///\n"
52760"/// \\headerfile <x86intrin.h>\n"
52761"///\n"
52762"/// This intrinsic corresponds to the <c> VMULPS / MULPS </c> instructions.\n"
52763"///\n"
52764"/// \\param __a\n"
52765"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52766"/// \\param __b\n"
52767"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52768"/// \\returns A 128-bit vector of [4 x float] containing the products of both\n"
52769"/// operands.\n"
52770"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52771"_mm_mul_ps(__m128 __a, __m128 __b)\n"
52772"{\n"
52773" return (__m128)((__v4sf)__a * (__v4sf)__b);\n"
52774"}\n"
52775"\n"
52776"/// Divides the value in the low-order 32 bits of the first operand by\n"
52777"/// the corresponding value in the second operand.\n"
52778"///\n"
52779"/// \\headerfile <x86intrin.h>\n"
52780"///\n"
52781"/// This intrinsic corresponds to the <c> VDIVSS / DIVSS </c> instructions.\n"
52782"///\n"
52783"/// \\param __a\n"
52784"/// A 128-bit vector of [4 x float] containing the dividend. The lower 32\n"
52785"/// bits of this operand are used in the calculation.\n"
52786"/// \\param __b\n"
52787"/// A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits\n"
52788"/// of this operand are used in the calculation.\n"
52789"/// \\returns A 128-bit vector of [4 x float] containing the quotients of the\n"
52790"/// lower 32 bits of both operands. The upper 96 bits are copied from the\n"
52791"/// upper 96 bits of the first source operand.\n"
52792"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52793"_mm_div_ss(__m128 __a, __m128 __b)\n"
52794"{\n"
52795" __a[0] /= __b[0];\n"
52796" return __a;\n"
52797"}\n"
52798"\n"
52799"/// Divides two 128-bit vectors of [4 x float].\n"
52800"///\n"
52801"/// \\headerfile <x86intrin.h>\n"
52802"///\n"
52803"/// This intrinsic corresponds to the <c> VDIVPS / DIVPS </c> instructions.\n"
52804"///\n"
52805"/// \\param __a\n"
52806"/// A 128-bit vector of [4 x float] containing the dividend.\n"
52807"/// \\param __b\n"
52808"/// A 128-bit vector of [4 x float] containing the divisor.\n"
52809"/// \\returns A 128-bit vector of [4 x float] containing the quotients of both\n"
52810"/// operands.\n"
52811"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52812"_mm_div_ps(__m128 __a, __m128 __b)\n"
52813"{\n"
52814" return (__m128)((__v4sf)__a / (__v4sf)__b);\n"
52815"}\n"
52816"\n"
52817"/// Calculates the square root of the value stored in the low-order bits\n"
52818"/// of a 128-bit vector of [4 x float].\n"
52819"///\n"
52820"/// \\headerfile <x86intrin.h>\n"
52821"///\n"
52822"/// This intrinsic corresponds to the <c> VSQRTSS / SQRTSS </c> instructions.\n"
52823"///\n"
52824"/// \\param __a\n"
52825"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
52826"/// used in the calculation.\n"
52827"/// \\returns A 128-bit vector of [4 x float] containing the square root of the\n"
52828"/// value in the low-order bits of the operand.\n"
52829"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52830"_mm_sqrt_ss(__m128 __a)\n"
52831"{\n"
52832" return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);\n"
52833"}\n"
52834"\n"
52835"/// Calculates the square roots of the values stored in a 128-bit vector\n"
52836"/// of [4 x float].\n"
52837"///\n"
52838"/// \\headerfile <x86intrin.h>\n"
52839"///\n"
52840"/// This intrinsic corresponds to the <c> VSQRTPS / SQRTPS </c> instructions.\n"
52841"///\n"
52842"/// \\param __a\n"
52843"/// A 128-bit vector of [4 x float].\n"
52844"/// \\returns A 128-bit vector of [4 x float] containing the square roots of the\n"
52845"/// values in the operand.\n"
52846"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52847"_mm_sqrt_ps(__m128 __a)\n"
52848"{\n"
52849" return __builtin_ia32_sqrtps((__v4sf)__a);\n"
52850"}\n"
52851"\n"
52852"/// Calculates the approximate reciprocal of the value stored in the\n"
52853"/// low-order bits of a 128-bit vector of [4 x float].\n"
52854"///\n"
52855"/// \\headerfile <x86intrin.h>\n"
52856"///\n"
52857"/// This intrinsic corresponds to the <c> VRCPSS / RCPSS </c> instructions.\n"
52858"///\n"
52859"/// \\param __a\n"
52860"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
52861"/// used in the calculation.\n"
52862"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
52863"/// reciprocal of the value in the low-order bits of the operand.\n"
52864"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52865"_mm_rcp_ss(__m128 __a)\n"
52866"{\n"
52867" return (__m128)__builtin_ia32_rcpss((__v4sf)__a);\n"
52868"}\n"
52869"\n"
52870"/// Calculates the approximate reciprocals of the values stored in a\n"
52871"/// 128-bit vector of [4 x float].\n"
52872"///\n"
52873"/// \\headerfile <x86intrin.h>\n"
52874"///\n"
52875"/// This intrinsic corresponds to the <c> VRCPPS / RCPPS </c> instructions.\n"
52876"///\n"
52877"/// \\param __a\n"
52878"/// A 128-bit vector of [4 x float].\n"
52879"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
52880"/// reciprocals of the values in the operand.\n"
52881"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52882"_mm_rcp_ps(__m128 __a)\n"
52883"{\n"
52884" return (__m128)__builtin_ia32_rcpps((__v4sf)__a);\n"
52885"}\n"
52886"\n"
52887"/// Calculates the approximate reciprocal of the square root of the value\n"
52888"/// stored in the low-order bits of a 128-bit vector of [4 x float].\n"
52889"///\n"
52890"/// \\headerfile <x86intrin.h>\n"
52891"///\n"
52892"/// This intrinsic corresponds to the <c> VRSQRTSS / RSQRTSS </c> instructions.\n"
52893"///\n"
52894"/// \\param __a\n"
52895"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
52896"/// used in the calculation.\n"
52897"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
52898"/// reciprocal of the square root of the value in the low-order bits of the\n"
52899"/// operand.\n"
52900"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52901"_mm_rsqrt_ss(__m128 __a)\n"
52902"{\n"
52903" return __builtin_ia32_rsqrtss((__v4sf)__a);\n"
52904"}\n"
52905"\n"
52906"/// Calculates the approximate reciprocals of the square roots of the\n"
52907"/// values stored in a 128-bit vector of [4 x float].\n"
52908"///\n"
52909"/// \\headerfile <x86intrin.h>\n"
52910"///\n"
52911"/// This intrinsic corresponds to the <c> VRSQRTPS / RSQRTPS </c> instructions.\n"
52912"///\n"
52913"/// \\param __a\n"
52914"/// A 128-bit vector of [4 x float].\n"
52915"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
52916"/// reciprocals of the square roots of the values in the operand.\n"
52917"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52918"_mm_rsqrt_ps(__m128 __a)\n"
52919"{\n"
52920" return __builtin_ia32_rsqrtps((__v4sf)__a);\n"
52921"}\n"
52922"\n"
52923"/// Compares two 32-bit float values in the low-order bits of both\n"
52924"/// operands and returns the lesser value in the low-order bits of the\n"
52925"/// vector of [4 x float].\n"
52926"///\n"
52927"/// \\headerfile <x86intrin.h>\n"
52928"///\n"
52929"/// This intrinsic corresponds to the <c> VMINSS / MINSS </c> instructions.\n"
52930"///\n"
52931"/// \\param __a\n"
52932"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
52933"/// 32 bits of this operand are used in the comparison.\n"
52934"/// \\param __b\n"
52935"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
52936"/// 32 bits of this operand are used in the comparison.\n"
52937"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
52938"/// minimum value between both operands. The upper 96 bits are copied from\n"
52939"/// the upper 96 bits of the first source operand.\n"
52940"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52941"_mm_min_ss(__m128 __a, __m128 __b)\n"
52942"{\n"
52943" return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);\n"
52944"}\n"
52945"\n"
52946"/// Compares two 128-bit vectors of [4 x float] and returns the lesser\n"
52947"/// of each pair of values.\n"
52948"///\n"
52949"/// \\headerfile <x86intrin.h>\n"
52950"///\n"
52951"/// This intrinsic corresponds to the <c> VMINPS / MINPS </c> instructions.\n"
52952"///\n"
52953"/// \\param __a\n"
52954"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
52955"/// \\param __b\n"
52956"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
52957"/// \\returns A 128-bit vector of [4 x float] containing the minimum values\n"
52958"/// between both operands.\n"
52959"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52960"_mm_min_ps(__m128 __a, __m128 __b)\n"
52961"{\n"
52962" return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);\n"
52963"}\n"
52964"\n"
52965"/// Compares two 32-bit float values in the low-order bits of both\n"
52966"/// operands and returns the greater value in the low-order bits of a 128-bit\n"
52967"/// vector of [4 x float].\n"
52968"///\n"
52969"/// \\headerfile <x86intrin.h>\n"
52970"///\n"
52971"/// This intrinsic corresponds to the <c> VMAXSS / MAXSS </c> instructions.\n"
52972"///\n"
52973"/// \\param __a\n"
52974"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
52975"/// 32 bits of this operand are used in the comparison.\n"
52976"/// \\param __b\n"
52977"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
52978"/// 32 bits of this operand are used in the comparison.\n"
52979"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
52980"/// maximum value between both operands. The upper 96 bits are copied from\n"
52981"/// the upper 96 bits of the first source operand.\n"
52982"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52983"_mm_max_ss(__m128 __a, __m128 __b)\n"
52984"{\n"
52985" return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);\n"
52986"}\n"
52987"\n"
52988"/// Compares two 128-bit vectors of [4 x float] and returns the greater\n"
52989"/// of each pair of values.\n"
52990"///\n"
52991"/// \\headerfile <x86intrin.h>\n"
52992"///\n"
52993"/// This intrinsic corresponds to the <c> VMAXPS / MAXPS </c> instructions.\n"
52994"///\n"
52995"/// \\param __a\n"
52996"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
52997"/// \\param __b\n"
52998"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
52999"/// \\returns A 128-bit vector of [4 x float] containing the maximum values\n"
53000"/// between both operands.\n"
53001"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53002"_mm_max_ps(__m128 __a, __m128 __b)\n"
53003"{\n"
53004" return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);\n"
53005"}\n"
53006"\n"
53007"/// Performs a bitwise AND of two 128-bit vectors of [4 x float].\n"
53008"///\n"
53009"/// \\headerfile <x86intrin.h>\n"
53010"///\n"
53011"/// This intrinsic corresponds to the <c> VANDPS / ANDPS </c> instructions.\n"
53012"///\n"
53013"/// \\param __a\n"
53014"/// A 128-bit vector containing one of the source operands.\n"
53015"/// \\param __b\n"
53016"/// A 128-bit vector containing one of the source operands.\n"
53017"/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n"
53018"/// values between both operands.\n"
53019"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53020"_mm_and_ps(__m128 __a, __m128 __b)\n"
53021"{\n"
53022" return (__m128)((__v4su)__a & (__v4su)__b);\n"
53023"}\n"
53024"\n"
53025"/// Performs a bitwise AND of two 128-bit vectors of [4 x float], using\n"
53026"/// the one's complement of the values contained in the first source\n"
53027"/// operand.\n"
53028"///\n"
53029"/// \\headerfile <x86intrin.h>\n"
53030"///\n"
53031"/// This intrinsic corresponds to the <c> VANDNPS / ANDNPS </c> instructions.\n"
53032"///\n"
53033"/// \\param __a\n"
53034"/// A 128-bit vector of [4 x float] containing the first source operand. The\n"
53035"/// one's complement of this value is used in the bitwise AND.\n"
53036"/// \\param __b\n"
53037"/// A 128-bit vector of [4 x float] containing the second source operand.\n"
53038"/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n"
53039"/// one's complement of the first operand and the values in the second\n"
53040"/// operand.\n"
53041"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53042"_mm_andnot_ps(__m128 __a, __m128 __b)\n"
53043"{\n"
53044" return (__m128)(~(__v4su)__a & (__v4su)__b);\n"
53045"}\n"
53046"\n"
53047"/// Performs a bitwise OR of two 128-bit vectors of [4 x float].\n"
53048"///\n"
53049"/// \\headerfile <x86intrin.h>\n"
53050"///\n"
53051"/// This intrinsic corresponds to the <c> VORPS / ORPS </c> instructions.\n"
53052"///\n"
53053"/// \\param __a\n"
53054"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
53055"/// \\param __b\n"
53056"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
53057"/// \\returns A 128-bit vector of [4 x float] containing the bitwise OR of the\n"
53058"/// values between both operands.\n"
53059"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53060"_mm_or_ps(__m128 __a, __m128 __b)\n"
53061"{\n"
53062" return (__m128)((__v4su)__a | (__v4su)__b);\n"
53063"}\n"
53064"\n"
53065"/// Performs a bitwise exclusive OR of two 128-bit vectors of\n"
53066"/// [4 x float].\n"
53067"///\n"
53068"/// \\headerfile <x86intrin.h>\n"
53069"///\n"
53070"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instructions.\n"
53071"///\n"
53072"/// \\param __a\n"
53073"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
53074"/// \\param __b\n"
53075"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
53076"/// \\returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR\n"
53077"/// of the values between both operands.\n"
53078"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53079"_mm_xor_ps(__m128 __a, __m128 __b)\n"
53080"{\n"
53081" return (__m128)((__v4su)__a ^ (__v4su)__b);\n"
53082"}\n"
53083"\n"
53084"/// Compares two 32-bit float values in the low-order bits of both\n"
53085"/// operands for equality and returns the result of the comparison in the\n"
53086"/// low-order bits of a vector [4 x float].\n"
53087"///\n"
53088"/// \\headerfile <x86intrin.h>\n"
53089"///\n"
53090"/// This intrinsic corresponds to the <c> VCMPEQSS / CMPEQSS </c> instructions.\n"
53091"///\n"
53092"/// \\param __a\n"
53093"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53094"/// 32 bits of this operand are used in the comparison.\n"
53095"/// \\param __b\n"
53096"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53097"/// 32 bits of this operand are used in the comparison.\n"
53098"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53099"/// in the low-order bits.\n"
53100"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53101"_mm_cmpeq_ss(__m128 __a, __m128 __b)\n"
53102"{\n"
53103" return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);\n"
53104"}\n"
53105"\n"
53106"/// Compares each of the corresponding 32-bit float values of the\n"
53107"/// 128-bit vectors of [4 x float] for equality.\n"
53108"///\n"
53109"/// \\headerfile <x86intrin.h>\n"
53110"///\n"
53111"/// This intrinsic corresponds to the <c> VCMPEQPS / CMPEQPS </c> instructions.\n"
53112"///\n"
53113"/// \\param __a\n"
53114"/// A 128-bit vector of [4 x float].\n"
53115"/// \\param __b\n"
53116"/// A 128-bit vector of [4 x float].\n"
53117"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53118"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53119"_mm_cmpeq_ps(__m128 __a, __m128 __b)\n"
53120"{\n"
53121" return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);\n"
53122"}\n"
53123"\n"
53124"/// Compares two 32-bit float values in the low-order bits of both\n"
53125"/// operands to determine if the value in the first operand is less than the\n"
53126"/// corresponding value in the second operand and returns the result of the\n"
53127"/// comparison in the low-order bits of a vector of [4 x float].\n"
53128"///\n"
53129"/// \\headerfile <x86intrin.h>\n"
53130"///\n"
53131"/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n"
53132"///\n"
53133"/// \\param __a\n"
53134"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53135"/// 32 bits of this operand are used in the comparison.\n"
53136"/// \\param __b\n"
53137"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53138"/// 32 bits of this operand are used in the comparison.\n"
53139"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53140"/// in the low-order bits.\n"
53141"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53142"_mm_cmplt_ss(__m128 __a, __m128 __b)\n"
53143"{\n"
53144" return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);\n"
53145"}\n"
53146"\n"
53147"/// Compares each of the corresponding 32-bit float values of the\n"
53148"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53149"/// operand are less than those in the second operand.\n"
53150"///\n"
53151"/// \\headerfile <x86intrin.h>\n"
53152"///\n"
53153"/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n"
53154"///\n"
53155"/// \\param __a\n"
53156"/// A 128-bit vector of [4 x float].\n"
53157"/// \\param __b\n"
53158"/// A 128-bit vector of [4 x float].\n"
53159"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53160"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53161"_mm_cmplt_ps(__m128 __a, __m128 __b)\n"
53162"{\n"
53163" return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);\n"
53164"}\n"
53165"\n"
53166"/// Compares two 32-bit float values in the low-order bits of both\n"
53167"/// operands to determine if the value in the first operand is less than or\n"
53168"/// equal to the corresponding value in the second operand and returns the\n"
53169"/// result of the comparison in the low-order bits of a vector of\n"
53170"/// [4 x float].\n"
53171"///\n"
53172"/// \\headerfile <x86intrin.h>\n"
53173"///\n"
53174"/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n"
53175"///\n"
53176"/// \\param __a\n"
53177"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53178"/// 32 bits of this operand are used in the comparison.\n"
53179"/// \\param __b\n"
53180"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53181"/// 32 bits of this operand are used in the comparison.\n"
53182"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53183"/// in the low-order bits.\n"
53184"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53185"_mm_cmple_ss(__m128 __a, __m128 __b)\n"
53186"{\n"
53187" return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);\n"
53188"}\n"
53189"\n"
53190"/// Compares each of the corresponding 32-bit float values of the\n"
53191"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53192"/// operand are less than or equal to those in the second operand.\n"
53193"///\n"
53194"/// \\headerfile <x86intrin.h>\n"
53195"///\n"
53196"/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n"
53197"///\n"
53198"/// \\param __a\n"
53199"/// A 128-bit vector of [4 x float].\n"
53200"/// \\param __b\n"
53201"/// A 128-bit vector of [4 x float].\n"
53202"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53203"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53204"_mm_cmple_ps(__m128 __a, __m128 __b)\n"
53205"{\n"
53206" return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);\n"
53207"}\n"
53208"\n"
53209"/// Compares two 32-bit float values in the low-order bits of both\n"
53210"/// operands to determine if the value in the first operand is greater than\n"
53211"/// the corresponding value in the second operand and returns the result of\n"
53212"/// the comparison in the low-order bits of a vector of [4 x float].\n"
53213"///\n"
53214"/// \\headerfile <x86intrin.h>\n"
53215"///\n"
53216"/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n"
53217"///\n"
53218"/// \\param __a\n"
53219"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53220"/// 32 bits of this operand are used in the comparison.\n"
53221"/// \\param __b\n"
53222"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53223"/// 32 bits of this operand are used in the comparison.\n"
53224"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53225"/// in the low-order bits.\n"
53226"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53227"_mm_cmpgt_ss(__m128 __a, __m128 __b)\n"
53228"{\n"
53229" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
53230" (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),\n"
53231" 4, 1, 2, 3);\n"
53232"}\n"
53233"\n"
53234"/// Compares each of the corresponding 32-bit float values of the\n"
53235"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53236"/// operand are greater than those in the second operand.\n"
53237"///\n"
53238"/// \\headerfile <x86intrin.h>\n"
53239"///\n"
53240"/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n"
53241"///\n"
53242"/// \\param __a\n"
53243"/// A 128-bit vector of [4 x float].\n"
53244"/// \\param __b\n"
53245"/// A 128-bit vector of [4 x float].\n"
53246"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53247"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53248"_mm_cmpgt_ps(__m128 __a, __m128 __b)\n"
53249"{\n"
53250" return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);\n"
53251"}\n"
53252"\n"
53253"/// Compares two 32-bit float values in the low-order bits of both\n"
53254"/// operands to determine if the value in the first operand is greater than\n"
53255"/// or equal to the corresponding value in the second operand and returns\n"
53256"/// the result of the comparison in the low-order bits of a vector of\n"
53257"/// [4 x float].\n"
53258"///\n"
53259"/// \\headerfile <x86intrin.h>\n"
53260"///\n"
53261"/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n"
53262"///\n"
53263"/// \\param __a\n"
53264"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53265"/// 32 bits of this operand are used in the comparison.\n"
53266"/// \\param __b\n"
53267"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53268"/// 32 bits of this operand are used in the comparison.\n"
53269"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53270"/// in the low-order bits.\n"
53271"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53272"_mm_cmpge_ss(__m128 __a, __m128 __b)\n"
53273"{\n"
53274" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
53275" (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),\n"
53276" 4, 1, 2, 3);\n"
53277"}\n"
53278"\n"
53279"/// Compares each of the corresponding 32-bit float values of the\n"
53280"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53281"/// operand are greater than or equal to those in the second operand.\n"
53282"///\n"
53283"/// \\headerfile <x86intrin.h>\n"
53284"///\n"
53285"/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n"
53286"///\n"
53287"/// \\param __a\n"
53288"/// A 128-bit vector of [4 x float].\n"
53289"/// \\param __b\n"
53290"/// A 128-bit vector of [4 x float].\n"
53291"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53292"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53293"_mm_cmpge_ps(__m128 __a, __m128 __b)\n"
53294"{\n"
53295" return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);\n"
53296"}\n"
53297"\n"
53298"/// Compares two 32-bit float values in the low-order bits of both\n"
53299"/// operands for inequality and returns the result of the comparison in the\n"
53300"/// low-order bits of a vector of [4 x float].\n"
53301"///\n"
53302"/// \\headerfile <x86intrin.h>\n"
53303"///\n"
53304"/// This intrinsic corresponds to the <c> VCMPNEQSS / CMPNEQSS </c>\n"
53305"/// instructions.\n"
53306"///\n"
53307"/// \\param __a\n"
53308"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53309"/// 32 bits of this operand are used in the comparison.\n"
53310"/// \\param __b\n"
53311"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53312"/// 32 bits of this operand are used in the comparison.\n"
53313"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53314"/// in the low-order bits.\n"
53315"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53316"_mm_cmpneq_ss(__m128 __a, __m128 __b)\n"
53317"{\n"
53318" return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);\n"
53319"}\n"
53320"\n"
53321"/// Compares each of the corresponding 32-bit float values of the\n"
53322"/// 128-bit vectors of [4 x float] for inequality.\n"
53323"///\n"
53324"/// \\headerfile <x86intrin.h>\n"
53325"///\n"
53326"/// This intrinsic corresponds to the <c> VCMPNEQPS / CMPNEQPS </c>\n"
53327"/// instructions.\n"
53328"///\n"
53329"/// \\param __a\n"
53330"/// A 128-bit vector of [4 x float].\n"
53331"/// \\param __b\n"
53332"/// A 128-bit vector of [4 x float].\n"
53333"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53334"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53335"_mm_cmpneq_ps(__m128 __a, __m128 __b)\n"
53336"{\n"
53337" return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);\n"
53338"}\n"
53339"\n"
53340"/// Compares two 32-bit float values in the low-order bits of both\n"
53341"/// operands to determine if the value in the first operand is not less than\n"
53342"/// the corresponding value in the second operand and returns the result of\n"
53343"/// the comparison in the low-order bits of a vector of [4 x float].\n"
53344"///\n"
53345"/// \\headerfile <x86intrin.h>\n"
53346"///\n"
53347"/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n"
53348"/// instructions.\n"
53349"///\n"
53350"/// \\param __a\n"
53351"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53352"/// 32 bits of this operand are used in the comparison.\n"
53353"/// \\param __b\n"
53354"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53355"/// 32 bits of this operand are used in the comparison.\n"
53356"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53357"/// in the low-order bits.\n"
53358"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53359"_mm_cmpnlt_ss(__m128 __a, __m128 __b)\n"
53360"{\n"
53361" return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);\n"
53362"}\n"
53363"\n"
53364"/// Compares each of the corresponding 32-bit float values of the\n"
53365"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53366"/// operand are not less than those in the second operand.\n"
53367"///\n"
53368"/// \\headerfile <x86intrin.h>\n"
53369"///\n"
53370"/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n"
53371"/// instructions.\n"
53372"///\n"
53373"/// \\param __a\n"
53374"/// A 128-bit vector of [4 x float].\n"
53375"/// \\param __b\n"
53376"/// A 128-bit vector of [4 x float].\n"
53377"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53378"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53379"_mm_cmpnlt_ps(__m128 __a, __m128 __b)\n"
53380"{\n"
53381" return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);\n"
53382"}\n"
53383"\n"
53384"/// Compares two 32-bit float values in the low-order bits of both\n"
53385"/// operands to determine if the value in the first operand is not less than\n"
53386"/// or equal to the corresponding value in the second operand and returns\n"
53387"/// the result of the comparison in the low-order bits of a vector of\n"
53388"/// [4 x float].\n"
53389"///\n"
53390"/// \\headerfile <x86intrin.h>\n"
53391"///\n"
53392"/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n"
53393"/// instructions.\n"
53394"///\n"
53395"/// \\param __a\n"
53396"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53397"/// 32 bits of this operand are used in the comparison.\n"
53398"/// \\param __b\n"
53399"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53400"/// 32 bits of this operand are used in the comparison.\n"
53401"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53402"/// in the low-order bits.\n"
53403"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53404"_mm_cmpnle_ss(__m128 __a, __m128 __b)\n"
53405"{\n"
53406" return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);\n"
53407"}\n"
53408"\n"
53409"/// Compares each of the corresponding 32-bit float values of the\n"
53410"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53411"/// operand are not less than or equal to those in the second operand.\n"
53412"///\n"
53413"/// \\headerfile <x86intrin.h>\n"
53414"///\n"
53415"/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n"
53416"/// instructions.\n"
53417"///\n"
53418"/// \\param __a\n"
53419"/// A 128-bit vector of [4 x float].\n"
53420"/// \\param __b\n"
53421"/// A 128-bit vector of [4 x float].\n"
53422"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53423"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53424"_mm_cmpnle_ps(__m128 __a, __m128 __b)\n"
53425"{\n"
53426" return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);\n"
53427"}\n"
53428"\n"
53429"/// Compares two 32-bit float values in the low-order bits of both\n"
53430"/// operands to determine if the value in the first operand is not greater\n"
53431"/// than the corresponding value in the second operand and returns the\n"
53432"/// result of the comparison in the low-order bits of a vector of\n"
53433"/// [4 x float].\n"
53434"///\n"
53435"/// \\headerfile <x86intrin.h>\n"
53436"///\n"
53437"/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n"
53438"/// instructions.\n"
53439"///\n"
53440"/// \\param __a\n"
53441"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53442"/// 32 bits of this operand are used in the comparison.\n"
53443"/// \\param __b\n"
53444"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53445"/// 32 bits of this operand are used in the comparison.\n"
53446"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53447"/// in the low-order bits.\n"
53448"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53449"_mm_cmpngt_ss(__m128 __a, __m128 __b)\n"
53450"{\n"
53451" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
53452" (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),\n"
53453" 4, 1, 2, 3);\n"
53454"}\n"
53455"\n"
53456"/// Compares each of the corresponding 32-bit float values of the\n"
53457"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53458"/// operand are not greater than those in the second operand.\n"
53459"///\n"
53460"/// \\headerfile <x86intrin.h>\n"
53461"///\n"
53462"/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n"
53463"/// instructions.\n"
53464"///\n"
53465"/// \\param __a\n"
53466"/// A 128-bit vector of [4 x float].\n"
53467"/// \\param __b\n"
53468"/// A 128-bit vector of [4 x float].\n"
53469"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53470"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53471"_mm_cmpngt_ps(__m128 __a, __m128 __b)\n"
53472"{\n"
53473" return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);\n"
53474"}\n"
53475"\n"
53476"/// Compares two 32-bit float values in the low-order bits of both\n"
53477"/// operands to determine if the value in the first operand is not greater\n"
53478"/// than or equal to the corresponding value in the second operand and\n"
53479"/// returns the result of the comparison in the low-order bits of a vector\n"
53480"/// of [4 x float].\n"
53481"///\n"
53482"/// \\headerfile <x86intrin.h>\n"
53483"///\n"
53484"/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n"
53485"/// instructions.\n"
53486"///\n"
53487"/// \\param __a\n"
53488"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53489"/// 32 bits of this operand are used in the comparison.\n"
53490"/// \\param __b\n"
53491"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53492"/// 32 bits of this operand are used in the comparison.\n"
53493"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53494"/// in the low-order bits.\n"
53495"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53496"_mm_cmpnge_ss(__m128 __a, __m128 __b)\n"
53497"{\n"
53498" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
53499" (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),\n"
53500" 4, 1, 2, 3);\n"
53501"}\n"
53502"\n"
53503"/// Compares each of the corresponding 32-bit float values of the\n"
53504"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53505"/// operand are not greater than or equal to those in the second operand.\n"
53506"///\n"
53507"/// \\headerfile <x86intrin.h>\n"
53508"///\n"
53509"/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n"
53510"/// instructions.\n"
53511"///\n"
53512"/// \\param __a\n"
53513"/// A 128-bit vector of [4 x float].\n"
53514"/// \\param __b\n"
53515"/// A 128-bit vector of [4 x float].\n"
53516"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53517"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53518"_mm_cmpnge_ps(__m128 __a, __m128 __b)\n"
53519"{\n"
53520" return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);\n"
53521"}\n"
53522"\n"
53523"/// Compares two 32-bit float values in the low-order bits of both\n"
53524"/// operands to determine if the value in the first operand is ordered with\n"
53525"/// respect to the corresponding value in the second operand and returns the\n"
53526"/// result of the comparison in the low-order bits of a vector of\n"
53527"/// [4 x float].\n"
53528"///\n"
53529"/// \\headerfile <x86intrin.h>\n"
53530"///\n"
53531"/// This intrinsic corresponds to the <c> VCMPORDSS / CMPORDSS </c>\n"
53532"/// instructions.\n"
53533"///\n"
53534"/// \\param __a\n"
53535"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53536"/// 32 bits of this operand are used in the comparison.\n"
53537"/// \\param __b\n"
53538"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53539"/// 32 bits of this operand are used in the comparison.\n"
53540"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53541"/// in the low-order bits.\n"
53542"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53543"_mm_cmpord_ss(__m128 __a, __m128 __b)\n"
53544"{\n"
53545" return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);\n"
53546"}\n"
53547"\n"
53548"/// Compares each of the corresponding 32-bit float values of the\n"
53549"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53550"/// operand are ordered with respect to those in the second operand.\n"
53551"///\n"
53552"/// \\headerfile <x86intrin.h>\n"
53553"///\n"
53554"/// This intrinsic corresponds to the <c> VCMPORDPS / CMPORDPS </c>\n"
53555"/// instructions.\n"
53556"///\n"
53557"/// \\param __a\n"
53558"/// A 128-bit vector of [4 x float].\n"
53559"/// \\param __b\n"
53560"/// A 128-bit vector of [4 x float].\n"
53561"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53562"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53563"_mm_cmpord_ps(__m128 __a, __m128 __b)\n"
53564"{\n"
53565" return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);\n"
53566"}\n"
53567"\n"
53568"/// Compares two 32-bit float values in the low-order bits of both\n"
53569"/// operands to determine if the value in the first operand is unordered\n"
53570"/// with respect to the corresponding value in the second operand and\n"
53571"/// returns the result of the comparison in the low-order bits of a vector\n"
53572"/// of [4 x float].\n"
53573"///\n"
53574"/// \\headerfile <x86intrin.h>\n"
53575"///\n"
53576"/// This intrinsic corresponds to the <c> VCMPUNORDSS / CMPUNORDSS </c>\n"
53577"/// instructions.\n"
53578"///\n"
53579"/// \\param __a\n"
53580"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53581"/// 32 bits of this operand are used in the comparison.\n"
53582"/// \\param __b\n"
53583"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53584"/// 32 bits of this operand are used in the comparison.\n"
53585"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53586"/// in the low-order bits.\n"
53587"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53588"_mm_cmpunord_ss(__m128 __a, __m128 __b)\n"
53589"{\n"
53590" return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);\n"
53591"}\n"
53592"\n"
53593"/// Compares each of the corresponding 32-bit float values of the\n"
53594"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53595"/// operand are unordered with respect to those in the second operand.\n"
53596"///\n"
53597"/// \\headerfile <x86intrin.h>\n"
53598"///\n"
53599"/// This intrinsic corresponds to the <c> VCMPUNORDPS / CMPUNORDPS </c>\n"
53600"/// instructions.\n"
53601"///\n"
53602"/// \\param __a\n"
53603"/// A 128-bit vector of [4 x float].\n"
53604"/// \\param __b\n"
53605"/// A 128-bit vector of [4 x float].\n"
53606"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53607"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53608"_mm_cmpunord_ps(__m128 __a, __m128 __b)\n"
53609"{\n"
53610" return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);\n"
53611"}\n"
53612"\n"
53613"/// Compares two 32-bit float values in the low-order bits of both\n"
53614"/// operands for equality and returns the result of the comparison.\n"
53615"///\n"
53616"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53617"///\n"
53618"/// \\headerfile <x86intrin.h>\n"
53619"///\n"
53620"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n"
53621"/// instructions.\n"
53622"///\n"
53623"/// \\param __a\n"
53624"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53625"/// used in the comparison.\n"
53626"/// \\param __b\n"
53627"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53628"/// used in the comparison.\n"
53629"/// \\returns An integer containing the comparison results. If either of the\n"
53630"/// two lower 32-bit values is NaN, 0 is returned.\n"
53631"static __inline__ int __DEFAULT_FN_ATTRS\n"
53632"_mm_comieq_ss(__m128 __a, __m128 __b)\n"
53633"{\n"
53634" return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);\n"
53635"}\n"
53636"\n"
53637"/// Compares two 32-bit float values in the low-order bits of both\n"
53638"/// operands to determine if the first operand is less than the second\n"
53639"/// operand and returns the result of the comparison.\n"
53640"///\n"
53641"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53642"///\n"
53643"/// \\headerfile <x86intrin.h>\n"
53644"///\n"
53645"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n"
53646"/// instructions.\n"
53647"///\n"
53648"/// \\param __a\n"
53649"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53650"/// used in the comparison.\n"
53651"/// \\param __b\n"
53652"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53653"/// used in the comparison.\n"
53654"/// \\returns An integer containing the comparison results. If either of the two\n"
53655"/// lower 32-bit values is NaN, 0 is returned.\n"
53656"static __inline__ int __DEFAULT_FN_ATTRS\n"
53657"_mm_comilt_ss(__m128 __a, __m128 __b)\n"
53658"{\n"
53659" return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);\n"
53660"}\n"
53661"\n"
53662"/// Compares two 32-bit float values in the low-order bits of both\n"
53663"/// operands to determine if the first operand is less than or equal to the\n"
53664"/// second operand and returns the result of the comparison.\n"
53665"///\n"
53666"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53667"///\n"
53668"/// \\headerfile <x86intrin.h>\n"
53669"///\n"
53670"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
53671"///\n"
53672"/// \\param __a\n"
53673"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53674"/// used in the comparison.\n"
53675"/// \\param __b\n"
53676"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53677"/// used in the comparison.\n"
53678"/// \\returns An integer containing the comparison results. If either of the two\n"
53679"/// lower 32-bit values is NaN, 0 is returned.\n"
53680"static __inline__ int __DEFAULT_FN_ATTRS\n"
53681"_mm_comile_ss(__m128 __a, __m128 __b)\n"
53682"{\n"
53683" return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);\n"
53684"}\n"
53685"\n"
53686"/// Compares two 32-bit float values in the low-order bits of both\n"
53687"/// operands to determine if the first operand is greater than the second\n"
53688"/// operand and returns the result of the comparison.\n"
53689"///\n"
53690"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53691"///\n"
53692"/// \\headerfile <x86intrin.h>\n"
53693"///\n"
53694"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
53695"///\n"
53696"/// \\param __a\n"
53697"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53698"/// used in the comparison.\n"
53699"/// \\param __b\n"
53700"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53701"/// used in the comparison.\n"
53702"/// \\returns An integer containing the comparison results. If either of the\n"
53703"/// two lower 32-bit values is NaN, 0 is returned.\n"
53704"static __inline__ int __DEFAULT_FN_ATTRS\n"
53705"_mm_comigt_ss(__m128 __a, __m128 __b)\n"
53706"{\n"
53707" return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);\n"
53708"}\n"
53709"\n"
53710"/// Compares two 32-bit float values in the low-order bits of both\n"
53711"/// operands to determine if the first operand is greater than or equal to\n"
53712"/// the second operand and returns the result of the comparison.\n"
53713"///\n"
53714"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53715"///\n"
53716"/// \\headerfile <x86intrin.h>\n"
53717"///\n"
53718"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
53719"///\n"
53720"/// \\param __a\n"
53721"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53722"/// used in the comparison.\n"
53723"/// \\param __b\n"
53724"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53725"/// used in the comparison.\n"
53726"/// \\returns An integer containing the comparison results. If either of the two\n"
53727"/// lower 32-bit values is NaN, 0 is returned.\n"
53728"static __inline__ int __DEFAULT_FN_ATTRS\n"
53729"_mm_comige_ss(__m128 __a, __m128 __b)\n"
53730"{\n"
53731" return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);\n"
53732"}\n"
53733"\n"
53734"/// Compares two 32-bit float values in the low-order bits of both\n"
53735"/// operands to determine if the first operand is not equal to the second\n"
53736"/// operand and returns the result of the comparison.\n"
53737"///\n"
53738"/// If either of the two lower 32-bit values is NaN, 1 is returned.\n"
53739"///\n"
53740"/// \\headerfile <x86intrin.h>\n"
53741"///\n"
53742"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
53743"///\n"
53744"/// \\param __a\n"
53745"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53746"/// used in the comparison.\n"
53747"/// \\param __b\n"
53748"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53749"/// used in the comparison.\n"
53750"/// \\returns An integer containing the comparison results. If either of the\n"
53751"/// two lower 32-bit values is NaN, 1 is returned.\n"
53752"static __inline__ int __DEFAULT_FN_ATTRS\n"
53753"_mm_comineq_ss(__m128 __a, __m128 __b)\n"
53754"{\n"
53755" return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);\n"
53756"}\n"
53757"\n"
53758"/// Performs an unordered comparison of two 32-bit float values using\n"
53759"/// the low-order bits of both operands to determine equality and returns\n"
53760"/// the result of the comparison.\n"
53761"///\n"
53762"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53763"///\n"
53764"/// \\headerfile <x86intrin.h>\n"
53765"///\n"
53766"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53767"///\n"
53768"/// \\param __a\n"
53769"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53770"/// used in the comparison.\n"
53771"/// \\param __b\n"
53772"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53773"/// used in the comparison.\n"
53774"/// \\returns An integer containing the comparison results. If either of the two\n"
53775"/// lower 32-bit values is NaN, 0 is returned.\n"
53776"static __inline__ int __DEFAULT_FN_ATTRS\n"
53777"_mm_ucomieq_ss(__m128 __a, __m128 __b)\n"
53778"{\n"
53779" return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);\n"
53780"}\n"
53781"\n"
53782"/// Performs an unordered comparison of two 32-bit float values using\n"
53783"/// the low-order bits of both operands to determine if the first operand is\n"
53784"/// less than the second operand and returns the result of the comparison.\n"
53785"///\n"
53786"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53787"///\n"
53788"/// \\headerfile <x86intrin.h>\n"
53789"///\n"
53790"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53791"///\n"
53792"/// \\param __a\n"
53793"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53794"/// used in the comparison.\n"
53795"/// \\param __b\n"
53796"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53797"/// used in the comparison.\n"
53798"/// \\returns An integer containing the comparison results. If either of the two\n"
53799"/// lower 32-bit values is NaN, 0 is returned.\n"
53800"static __inline__ int __DEFAULT_FN_ATTRS\n"
53801"_mm_ucomilt_ss(__m128 __a, __m128 __b)\n"
53802"{\n"
53803" return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);\n"
53804"}\n"
53805"\n"
53806"/// Performs an unordered comparison of two 32-bit float values using\n"
53807"/// the low-order bits of both operands to determine if the first operand is\n"
53808"/// less than or equal to the second operand and returns the result of the\n"
53809"/// comparison.\n"
53810"///\n"
53811"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53812"///\n"
53813"/// \\headerfile <x86intrin.h>\n"
53814"///\n"
53815"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53816"///\n"
53817"/// \\param __a\n"
53818"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53819"/// used in the comparison.\n"
53820"/// \\param __b\n"
53821"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53822"/// used in the comparison.\n"
53823"/// \\returns An integer containing the comparison results. If either of the two\n"
53824"/// lower 32-bit values is NaN, 0 is returned.\n"
53825"static __inline__ int __DEFAULT_FN_ATTRS\n"
53826"_mm_ucomile_ss(__m128 __a, __m128 __b)\n"
53827"{\n"
53828" return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);\n"
53829"}\n"
53830"\n"
53831"/// Performs an unordered comparison of two 32-bit float values using\n"
53832"/// the low-order bits of both operands to determine if the first operand is\n"
53833"/// greater than the second operand and returns the result of the\n"
53834"/// comparison.\n"
53835"///\n"
53836"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53837"///\n"
53838"/// \\headerfile <x86intrin.h>\n"
53839"///\n"
53840"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53841"///\n"
53842"/// \\param __a\n"
53843"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53844"/// used in the comparison.\n"
53845"/// \\param __b\n"
53846"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53847"/// used in the comparison.\n"
53848"/// \\returns An integer containing the comparison results. If either of the two\n"
53849"/// lower 32-bit values is NaN, 0 is returned.\n"
53850"static __inline__ int __DEFAULT_FN_ATTRS\n"
53851"_mm_ucomigt_ss(__m128 __a, __m128 __b)\n"
53852"{\n"
53853" return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);\n"
53854"}\n"
53855"\n"
53856"/// Performs an unordered comparison of two 32-bit float values using\n"
53857"/// the low-order bits of both operands to determine if the first operand is\n"
53858"/// greater than or equal to the second operand and returns the result of\n"
53859"/// the comparison.\n"
53860"///\n"
53861"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53862"///\n"
53863"/// \\headerfile <x86intrin.h>\n"
53864"///\n"
53865"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53866"///\n"
53867"/// \\param __a\n"
53868"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53869"/// used in the comparison.\n"
53870"/// \\param __b\n"
53871"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53872"/// used in the comparison.\n"
53873"/// \\returns An integer containing the comparison results. If either of the two\n"
53874"/// lower 32-bit values is NaN, 0 is returned.\n"
53875"static __inline__ int __DEFAULT_FN_ATTRS\n"
53876"_mm_ucomige_ss(__m128 __a, __m128 __b)\n"
53877"{\n"
53878" return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);\n"
53879"}\n"
53880"\n"
53881"/// Performs an unordered comparison of two 32-bit float values using\n"
53882"/// the low-order bits of both operands to determine inequality and returns\n"
53883"/// the result of the comparison.\n"
53884"///\n"
53885"/// If either of the two lower 32-bit values is NaN, 1 is returned.\n"
53886"///\n"
53887"/// \\headerfile <x86intrin.h>\n"
53888"///\n"
53889"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53890"///\n"
53891"/// \\param __a\n"
53892"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53893"/// used in the comparison.\n"
53894"/// \\param __b\n"
53895"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53896"/// used in the comparison.\n"
53897"/// \\returns An integer containing the comparison results. If either of the two\n"
53898"/// lower 32-bit values is NaN, 1 is returned.\n"
53899"static __inline__ int __DEFAULT_FN_ATTRS\n"
53900"_mm_ucomineq_ss(__m128 __a, __m128 __b)\n"
53901"{\n"
53902" return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);\n"
53903"}\n"
53904"\n"
53905"/// Converts a float value contained in the lower 32 bits of a vector of\n"
53906"/// [4 x float] into a 32-bit integer.\n"
53907"///\n"
53908"/// \\headerfile <x86intrin.h>\n"
53909"///\n"
53910"/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n"
53911"/// instructions.\n"
53912"///\n"
53913"/// \\param __a\n"
53914"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53915"/// used in the conversion.\n"
53916"/// \\returns A 32-bit integer containing the converted value.\n"
53917"static __inline__ int __DEFAULT_FN_ATTRS\n"
53918"_mm_cvtss_si32(__m128 __a)\n"
53919"{\n"
53920" return __builtin_ia32_cvtss2si((__v4sf)__a);\n"
53921"}\n"
53922"\n"
53923"/// Converts a float value contained in the lower 32 bits of a vector of\n"
53924"/// [4 x float] into a 32-bit integer.\n"
53925"///\n"
53926"/// \\headerfile <x86intrin.h>\n"
53927"///\n"
53928"/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n"
53929"/// instructions.\n"
53930"///\n"
53931"/// \\param __a\n"
53932"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53933"/// used in the conversion.\n"
53934"/// \\returns A 32-bit integer containing the converted value.\n"
53935"static __inline__ int __DEFAULT_FN_ATTRS\n"
53936"_mm_cvt_ss2si(__m128 __a)\n"
53937"{\n"
53938" return _mm_cvtss_si32(__a);\n"
53939"}\n"
53940"\n"
53941"#ifdef __x86_64__\n"
53942"\n"
53943"/// Converts a float value contained in the lower 32 bits of a vector of\n"
53944"/// [4 x float] into a 64-bit integer.\n"
53945"///\n"
53946"/// \\headerfile <x86intrin.h>\n"
53947"///\n"
53948"/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n"
53949"/// instructions.\n"
53950"///\n"
53951"/// \\param __a\n"
53952"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53953"/// used in the conversion.\n"
53954"/// \\returns A 64-bit integer containing the converted value.\n"
53955"static __inline__ long long __DEFAULT_FN_ATTRS\n"
53956"_mm_cvtss_si64(__m128 __a)\n"
53957"{\n"
53958" return __builtin_ia32_cvtss2si64((__v4sf)__a);\n"
53959"}\n"
53960"\n"
53961"#endif\n"
53962"\n"
53963"/// Converts two low-order float values in a 128-bit vector of\n"
53964"/// [4 x float] into a 64-bit vector of [2 x i32].\n"
53965"///\n"
53966"/// \\headerfile <x86intrin.h>\n"
53967"///\n"
53968"/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n"
53969"///\n"
53970"/// \\param __a\n"
53971"/// A 128-bit vector of [4 x float].\n"
53972"/// \\returns A 64-bit integer vector containing the converted values.\n"
53973"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
53974"_mm_cvtps_pi32(__m128 __a)\n"
53975"{\n"
53976" return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);\n"
53977"}\n"
53978"\n"
53979"/// Converts two low-order float values in a 128-bit vector of\n"
53980"/// [4 x float] into a 64-bit vector of [2 x i32].\n"
53981"///\n"
53982"/// \\headerfile <x86intrin.h>\n"
53983"///\n"
53984"/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n"
53985"///\n"
53986"/// \\param __a\n"
53987"/// A 128-bit vector of [4 x float].\n"
53988"/// \\returns A 64-bit integer vector containing the converted values.\n"
53989"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
53990"_mm_cvt_ps2pi(__m128 __a)\n"
53991"{\n"
53992" return _mm_cvtps_pi32(__a);\n"
53993"}\n"
53994"\n"
53995"/// Converts a float value contained in the lower 32 bits of a vector of\n"
53996"/// [4 x float] into a 32-bit integer, truncating the result when it is\n"
53997"/// inexact.\n"
53998"///\n"
53999"/// \\headerfile <x86intrin.h>\n"
54000"///\n"
54001"/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n"
54002"/// instructions.\n"
54003"///\n"
54004"/// \\param __a\n"
54005"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
54006"/// used in the conversion.\n"
54007"/// \\returns A 32-bit integer containing the converted value.\n"
54008"static __inline__ int __DEFAULT_FN_ATTRS\n"
54009"_mm_cvttss_si32(__m128 __a)\n"
54010"{\n"
54011" return __builtin_ia32_cvttss2si((__v4sf)__a);\n"
54012"}\n"
54013"\n"
54014"/// Converts a float value contained in the lower 32 bits of a vector of\n"
54015"/// [4 x float] into a 32-bit integer, truncating the result when it is\n"
54016"/// inexact.\n"
54017"///\n"
54018"/// \\headerfile <x86intrin.h>\n"
54019"///\n"
54020"/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n"
54021"/// instructions.\n"
54022"///\n"
54023"/// \\param __a\n"
54024"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
54025"/// used in the conversion.\n"
54026"/// \\returns A 32-bit integer containing the converted value.\n"
54027"static __inline__ int __DEFAULT_FN_ATTRS\n"
54028"_mm_cvtt_ss2si(__m128 __a)\n"
54029"{\n"
54030" return _mm_cvttss_si32(__a);\n"
54031"}\n"
54032"\n"
54033"#ifdef __x86_64__\n"
54034"/// Converts a float value contained in the lower 32 bits of a vector of\n"
54035"/// [4 x float] into a 64-bit integer, truncating the result when it is\n"
54036"/// inexact.\n"
54037"///\n"
54038"/// \\headerfile <x86intrin.h>\n"
54039"///\n"
54040"/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n"
54041"/// instructions.\n"
54042"///\n"
54043"/// \\param __a\n"
54044"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
54045"/// used in the conversion.\n"
54046"/// \\returns A 64-bit integer containing the converted value.\n"
54047"static __inline__ long long __DEFAULT_FN_ATTRS\n"
54048"_mm_cvttss_si64(__m128 __a)\n"
54049"{\n"
54050" return __builtin_ia32_cvttss2si64((__v4sf)__a);\n"
54051"}\n"
54052"#endif\n"
54053"\n"
54054"/// Converts two low-order float values in a 128-bit vector of\n"
54055"/// [4 x float] into a 64-bit vector of [2 x i32], truncating the result\n"
54056"/// when it is inexact.\n"
54057"///\n"
54058"/// \\headerfile <x86intrin.h>\n"
54059"///\n"
54060"/// This intrinsic corresponds to the <c> CVTTPS2PI / VTTPS2PI </c>\n"
54061"/// instructions.\n"
54062"///\n"
54063"/// \\param __a\n"
54064"/// A 128-bit vector of [4 x float].\n"
54065"/// \\returns A 64-bit integer vector containing the converted values.\n"
54066"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54067"_mm_cvttps_pi32(__m128 __a)\n"
54068"{\n"
54069" return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);\n"
54070"}\n"
54071"\n"
54072"/// Converts two low-order float values in a 128-bit vector of [4 x\n"
54073"/// float] into a 64-bit vector of [2 x i32], truncating the result when it\n"
54074"/// is inexact.\n"
54075"///\n"
54076"/// \\headerfile <x86intrin.h>\n"
54077"///\n"
54078"/// This intrinsic corresponds to the <c> CVTTPS2PI </c> instruction.\n"
54079"///\n"
54080"/// \\param __a\n"
54081"/// A 128-bit vector of [4 x float].\n"
54082"/// \\returns A 64-bit integer vector containing the converted values.\n"
54083"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54084"_mm_cvtt_ps2pi(__m128 __a)\n"
54085"{\n"
54086" return _mm_cvttps_pi32(__a);\n"
54087"}\n"
54088"\n"
54089"/// Converts a 32-bit signed integer value into a floating point value\n"
54090"/// and writes it to the lower 32 bits of the destination. The remaining\n"
54091"/// higher order elements of the destination vector are copied from the\n"
54092"/// corresponding elements in the first operand.\n"
54093"///\n"
54094"/// \\headerfile <x86intrin.h>\n"
54095"///\n"
54096"/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n"
54097"///\n"
54098"/// \\param __a\n"
54099"/// A 128-bit vector of [4 x float].\n"
54100"/// \\param __b\n"
54101"/// A 32-bit signed integer operand containing the value to be converted.\n"
54102"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
54103"/// converted value of the second operand. The upper 96 bits are copied from\n"
54104"/// the upper 96 bits of the first operand.\n"
54105"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54106"_mm_cvtsi32_ss(__m128 __a, int __b)\n"
54107"{\n"
54108" __a[0] = __b;\n"
54109" return __a;\n"
54110"}\n"
54111"\n"
54112"/// Converts a 32-bit signed integer value into a floating point value\n"
54113"/// and writes it to the lower 32 bits of the destination. The remaining\n"
54114"/// higher order elements of the destination are copied from the\n"
54115"/// corresponding elements in the first operand.\n"
54116"///\n"
54117"/// \\headerfile <x86intrin.h>\n"
54118"///\n"
54119"/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n"
54120"///\n"
54121"/// \\param __a\n"
54122"/// A 128-bit vector of [4 x float].\n"
54123"/// \\param __b\n"
54124"/// A 32-bit signed integer operand containing the value to be converted.\n"
54125"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
54126"/// converted value of the second operand. The upper 96 bits are copied from\n"
54127"/// the upper 96 bits of the first operand.\n"
54128"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54129"_mm_cvt_si2ss(__m128 __a, int __b)\n"
54130"{\n"
54131" return _mm_cvtsi32_ss(__a, __b);\n"
54132"}\n"
54133"\n"
54134"#ifdef __x86_64__\n"
54135"\n"
54136"/// Converts a 64-bit signed integer value into a floating point value\n"
54137"/// and writes it to the lower 32 bits of the destination. The remaining\n"
54138"/// higher order elements of the destination are copied from the\n"
54139"/// corresponding elements in the first operand.\n"
54140"///\n"
54141"/// \\headerfile <x86intrin.h>\n"
54142"///\n"
54143"/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n"
54144"///\n"
54145"/// \\param __a\n"
54146"/// A 128-bit vector of [4 x float].\n"
54147"/// \\param __b\n"
54148"/// A 64-bit signed integer operand containing the value to be converted.\n"
54149"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
54150"/// converted value of the second operand. The upper 96 bits are copied from\n"
54151"/// the upper 96 bits of the first operand.\n"
54152"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54153"_mm_cvtsi64_ss(__m128 __a, long long __b)\n"
54154"{\n"
54155" __a[0] = __b;\n"
54156" return __a;\n"
54157"}\n"
54158"\n"
54159"#endif\n"
54160"\n"
54161"/// Converts two elements of a 64-bit vector of [2 x i32] into two\n"
54162"/// floating point values and writes them to the lower 64-bits of the\n"
54163"/// destination. The remaining higher order elements of the destination are\n"
54164"/// copied from the corresponding elements in the first operand.\n"
54165"///\n"
54166"/// \\headerfile <x86intrin.h>\n"
54167"///\n"
54168"/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n"
54169"///\n"
54170"/// \\param __a\n"
54171"/// A 128-bit vector of [4 x float].\n"
54172"/// \\param __b\n"
54173"/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n"
54174"/// and written to the corresponding low-order elements in the destination.\n"
54175"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
54176"/// converted value of the second operand. The upper 64 bits are copied from\n"
54177"/// the upper 64 bits of the first operand.\n"
54178"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
54179"_mm_cvtpi32_ps(__m128 __a, __m64 __b)\n"
54180"{\n"
54181" return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);\n"
54182"}\n"
54183"\n"
54184"/// Converts two elements of a 64-bit vector of [2 x i32] into two\n"
54185"/// floating point values and writes them to the lower 64-bits of the\n"
54186"/// destination. The remaining higher order elements of the destination are\n"
54187"/// copied from the corresponding elements in the first operand.\n"
54188"///\n"
54189"/// \\headerfile <x86intrin.h>\n"
54190"///\n"
54191"/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n"
54192"///\n"
54193"/// \\param __a\n"
54194"/// A 128-bit vector of [4 x float].\n"
54195"/// \\param __b\n"
54196"/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n"
54197"/// and written to the corresponding low-order elements in the destination.\n"
54198"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
54199"/// converted value from the second operand. The upper 64 bits are copied\n"
54200"/// from the upper 64 bits of the first operand.\n"
54201"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
54202"_mm_cvt_pi2ps(__m128 __a, __m64 __b)\n"
54203"{\n"
54204" return _mm_cvtpi32_ps(__a, __b);\n"
54205"}\n"
54206"\n"
54207"/// Extracts a float value contained in the lower 32 bits of a vector of\n"
54208"/// [4 x float].\n"
54209"///\n"
54210"/// \\headerfile <x86intrin.h>\n"
54211"///\n"
54212"/// This intrinsic has no corresponding instruction.\n"
54213"///\n"
54214"/// \\param __a\n"
54215"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
54216"/// used in the extraction.\n"
54217"/// \\returns A 32-bit float containing the extracted value.\n"
54218"static __inline__ float __DEFAULT_FN_ATTRS\n"
54219"_mm_cvtss_f32(__m128 __a)\n"
54220"{\n"
54221" return __a[0];\n"
54222"}\n"
54223"\n"
54224"/// Loads two packed float values from the address \\a __p into the\n"
54225"/// high-order bits of a 128-bit vector of [4 x float]. The low-order bits\n"
54226"/// are copied from the low-order bits of the first operand.\n"
54227"///\n"
54228"/// \\headerfile <x86intrin.h>\n"
54229"///\n"
54230"/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n"
54231"///\n"
54232"/// \\param __a\n"
54233"/// A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0]\n"
54234"/// of the destination.\n"
54235"/// \\param __p\n"
54236"/// A pointer to two packed float values. Bits [63:0] are written to bits\n"
54237"/// [127:64] of the destination.\n"
54238"/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n"
54239"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54240"_mm_loadh_pi(__m128 __a, const __m64 *__p)\n"
54241"{\n"
54242" typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));\n"
54243" struct __mm_loadh_pi_struct {\n"
54244" __mm_loadh_pi_v2f32 __u;\n"
54245" } __attribute__((__packed__, __may_alias__));\n"
54246" __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;\n"
54247" __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n"
54248" return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);\n"
54249"}\n"
54250"\n"
54251"/// Loads two packed float values from the address \\a __p into the\n"
54252"/// low-order bits of a 128-bit vector of [4 x float]. The high-order bits\n"
54253"/// are copied from the high-order bits of the first operand.\n"
54254"///\n"
54255"/// \\headerfile <x86intrin.h>\n"
54256"///\n"
54257"/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n"
54258"///\n"
54259"/// \\param __a\n"
54260"/// A 128-bit vector of [4 x float]. Bits [127:64] are written to bits\n"
54261"/// [127:64] of the destination.\n"
54262"/// \\param __p\n"
54263"/// A pointer to two packed float values. Bits [63:0] are written to bits\n"
54264"/// [63:0] of the destination.\n"
54265"/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n"
54266"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54267"_mm_loadl_pi(__m128 __a, const __m64 *__p)\n"
54268"{\n"
54269" typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));\n"
54270" struct __mm_loadl_pi_struct {\n"
54271" __mm_loadl_pi_v2f32 __u;\n"
54272" } __attribute__((__packed__, __may_alias__));\n"
54273" __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;\n"
54274" __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n"
54275" return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);\n"
54276"}\n"
54277"\n"
54278"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
54279"/// 32 bits of the vector are initialized with the single-precision\n"
54280"/// floating-point value loaded from a specified memory location. The upper\n"
54281"/// 96 bits are set to zero.\n"
54282"///\n"
54283"/// \\headerfile <x86intrin.h>\n"
54284"///\n"
54285"/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n"
54286"///\n"
54287"/// \\param __p\n"
54288"/// A pointer to a 32-bit memory location containing a single-precision\n"
54289"/// floating-point value.\n"
54290"/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n"
54291"/// lower 32 bits contain the value loaded from the memory location. The\n"
54292"/// upper 96 bits are set to zero.\n"
54293"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54294"_mm_load_ss(const float *__p)\n"
54295"{\n"
54296" struct __mm_load_ss_struct {\n"
54297" float __u;\n"
54298" } __attribute__((__packed__, __may_alias__));\n"
54299" float __u = ((struct __mm_load_ss_struct*)__p)->__u;\n"
54300" return __extension__ (__m128){ __u, 0, 0, 0 };\n"
54301"}\n"
54302"\n"
54303"/// Loads a 32-bit float value and duplicates it to all four vector\n"
54304"/// elements of a 128-bit vector of [4 x float].\n"
54305"///\n"
54306"/// \\headerfile <x86intrin.h>\n"
54307"///\n"
54308"/// This intrinsic corresponds to the <c> VBROADCASTSS / MOVSS + shuffling </c>\n"
54309"/// instruction.\n"
54310"///\n"
54311"/// \\param __p\n"
54312"/// A pointer to a float value to be loaded and duplicated.\n"
54313"/// \\returns A 128-bit vector of [4 x float] containing the loaded and\n"
54314"/// duplicated values.\n"
54315"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54316"_mm_load1_ps(const float *__p)\n"
54317"{\n"
54318" struct __mm_load1_ps_struct {\n"
54319" float __u;\n"
54320" } __attribute__((__packed__, __may_alias__));\n"
54321" float __u = ((struct __mm_load1_ps_struct*)__p)->__u;\n"
54322" return __extension__ (__m128){ __u, __u, __u, __u };\n"
54323"}\n"
54324"\n"
54325"#define _mm_load_ps1(p) _mm_load1_ps(p)\n"
54326"\n"
54327"/// Loads a 128-bit floating-point vector of [4 x float] from an aligned\n"
54328"/// memory location.\n"
54329"///\n"
54330"/// \\headerfile <x86intrin.h>\n"
54331"///\n"
54332"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n"
54333"///\n"
54334"/// \\param __p\n"
54335"/// A pointer to a 128-bit memory location. The address of the memory\n"
54336"/// location has to be 128-bit aligned.\n"
54337"/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n"
54338"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54339"_mm_load_ps(const float *__p)\n"
54340"{\n"
54341" return *(__m128*)__p;\n"
54342"}\n"
54343"\n"
54344"/// Loads a 128-bit floating-point vector of [4 x float] from an\n"
54345"/// unaligned memory location.\n"
54346"///\n"
54347"/// \\headerfile <x86intrin.h>\n"
54348"///\n"
54349"/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n"
54350"///\n"
54351"/// \\param __p\n"
54352"/// A pointer to a 128-bit memory location. The address of the memory\n"
54353"/// location does not have to be aligned.\n"
54354"/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n"
54355"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54356"_mm_loadu_ps(const float *__p)\n"
54357"{\n"
54358" struct __loadu_ps {\n"
54359" __m128 __v;\n"
54360" } __attribute__((__packed__, __may_alias__));\n"
54361" return ((struct __loadu_ps*)__p)->__v;\n"
54362"}\n"
54363"\n"
54364"/// Loads four packed float values, in reverse order, from an aligned\n"
54365"/// memory location to 32-bit elements in a 128-bit vector of [4 x float].\n"
54366"///\n"
54367"/// \\headerfile <x86intrin.h>\n"
54368"///\n"
54369"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n"
54370"/// instruction.\n"
54371"///\n"
54372"/// \\param __p\n"
54373"/// A pointer to a 128-bit memory location. The address of the memory\n"
54374"/// location has to be 128-bit aligned.\n"
54375"/// \\returns A 128-bit vector of [4 x float] containing the moved values, loaded\n"
54376"/// in reverse order.\n"
54377"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54378"_mm_loadr_ps(const float *__p)\n"
54379"{\n"
54380" __m128 __a = _mm_load_ps(__p);\n"
54381" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n"
54382"}\n"
54383"\n"
54384"/// Create a 128-bit vector of [4 x float] with undefined values.\n"
54385"///\n"
54386"/// \\headerfile <x86intrin.h>\n"
54387"///\n"
54388"/// This intrinsic has no corresponding instruction.\n"
54389"///\n"
54390"/// \\returns A 128-bit vector of [4 x float] containing undefined values.\n"
54391"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54392"_mm_undefined_ps(void)\n"
54393"{\n"
54394" return (__m128)__builtin_ia32_undef128();\n"
54395"}\n"
54396"\n"
54397"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
54398"/// 32 bits of the vector are initialized with the specified single-precision\n"
54399"/// floating-point value. The upper 96 bits are set to zero.\n"
54400"///\n"
54401"/// \\headerfile <x86intrin.h>\n"
54402"///\n"
54403"/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n"
54404"///\n"
54405"/// \\param __w\n"
54406"/// A single-precision floating-point value used to initialize the lower 32\n"
54407"/// bits of the result.\n"
54408"/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n"
54409"/// lower 32 bits contain the value provided in the source operand. The\n"
54410"/// upper 96 bits are set to zero.\n"
54411"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54412"_mm_set_ss(float __w)\n"
54413"{\n"
54414" return __extension__ (__m128){ __w, 0, 0, 0 };\n"
54415"}\n"
54416"\n"
54417"/// Constructs a 128-bit floating-point vector of [4 x float], with each\n"
54418"/// of the four single-precision floating-point vector elements set to the\n"
54419"/// specified single-precision floating-point value.\n"
54420"///\n"
54421"/// \\headerfile <x86intrin.h>\n"
54422"///\n"
54423"/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n"
54424"///\n"
54425"/// \\param __w\n"
54426"/// A single-precision floating-point value used to initialize each vector\n"
54427"/// element of the result.\n"
54428"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
54429"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54430"_mm_set1_ps(float __w)\n"
54431"{\n"
54432" return __extension__ (__m128){ __w, __w, __w, __w };\n"
54433"}\n"
54434"\n"
54435"/* Microsoft specific. */\n"
54436"/// Constructs a 128-bit floating-point vector of [4 x float], with each\n"
54437"/// of the four single-precision floating-point vector elements set to the\n"
54438"/// specified single-precision floating-point value.\n"
54439"///\n"
54440"/// \\headerfile <x86intrin.h>\n"
54441"///\n"
54442"/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n"
54443"///\n"
54444"/// \\param __w\n"
54445"/// A single-precision floating-point value used to initialize each vector\n"
54446"/// element of the result.\n"
54447"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
54448"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54449"_mm_set_ps1(float __w)\n"
54450"{\n"
54451" return _mm_set1_ps(__w);\n"
54452"}\n"
54453"\n"
54454"/// Constructs a 128-bit floating-point vector of [4 x float]\n"
54455"/// initialized with the specified single-precision floating-point values.\n"
54456"///\n"
54457"/// \\headerfile <x86intrin.h>\n"
54458"///\n"
54459"/// This intrinsic is a utility function and does not correspond to a specific\n"
54460"/// instruction.\n"
54461"///\n"
54462"/// \\param __z\n"
54463"/// A single-precision floating-point value used to initialize bits [127:96]\n"
54464"/// of the result.\n"
54465"/// \\param __y\n"
54466"/// A single-precision floating-point value used to initialize bits [95:64]\n"
54467"/// of the result.\n"
54468"/// \\param __x\n"
54469"/// A single-precision floating-point value used to initialize bits [63:32]\n"
54470"/// of the result.\n"
54471"/// \\param __w\n"
54472"/// A single-precision floating-point value used to initialize bits [31:0]\n"
54473"/// of the result.\n"
54474"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
54475"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54476"_mm_set_ps(float __z, float __y, float __x, float __w)\n"
54477"{\n"
54478" return __extension__ (__m128){ __w, __x, __y, __z };\n"
54479"}\n"
54480"\n"
54481"/// Constructs a 128-bit floating-point vector of [4 x float],\n"
54482"/// initialized in reverse order with the specified 32-bit single-precision\n"
54483"/// float-point values.\n"
54484"///\n"
54485"/// \\headerfile <x86intrin.h>\n"
54486"///\n"
54487"/// This intrinsic is a utility function and does not correspond to a specific\n"
54488"/// instruction.\n"
54489"///\n"
54490"/// \\param __z\n"
54491"/// A single-precision floating-point value used to initialize bits [31:0]\n"
54492"/// of the result.\n"
54493"/// \\param __y\n"
54494"/// A single-precision floating-point value used to initialize bits [63:32]\n"
54495"/// of the result.\n"
54496"/// \\param __x\n"
54497"/// A single-precision floating-point value used to initialize bits [95:64]\n"
54498"/// of the result.\n"
54499"/// \\param __w\n"
54500"/// A single-precision floating-point value used to initialize bits [127:96]\n"
54501"/// of the result.\n"
54502"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
54503"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54504"_mm_setr_ps(float __z, float __y, float __x, float __w)\n"
54505"{\n"
54506" return __extension__ (__m128){ __z, __y, __x, __w };\n"
54507"}\n"
54508"\n"
54509"/// Constructs a 128-bit floating-point vector of [4 x float] initialized\n"
54510"/// to zero.\n"
54511"///\n"
54512"/// \\headerfile <x86intrin.h>\n"
54513"///\n"
54514"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n"
54515"///\n"
54516"/// \\returns An initialized 128-bit floating-point vector of [4 x float] with\n"
54517"/// all elements set to zero.\n"
54518"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54519"_mm_setzero_ps(void)\n"
54520"{\n"
54521" return __extension__ (__m128){ 0, 0, 0, 0 };\n"
54522"}\n"
54523"\n"
54524"/// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a\n"
54525"/// memory location.\n"
54526"///\n"
54527"/// \\headerfile <x86intrin.h>\n"
54528"///\n"
54529"/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n"
54530"///\n"
54531"/// \\param __p\n"
54532"/// A pointer to a 64-bit memory location.\n"
54533"/// \\param __a\n"
54534"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54535"static __inline__ void __DEFAULT_FN_ATTRS\n"
54536"_mm_storeh_pi(__m64 *__p, __m128 __a)\n"
54537"{\n"
54538" __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a);\n"
54539"}\n"
54540"\n"
54541"/// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a\n"
54542"/// memory location.\n"
54543"///\n"
54544"/// \\headerfile <x86intrin.h>\n"
54545"///\n"
54546"/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n"
54547"///\n"
54548"/// \\param __p\n"
54549"/// A pointer to a memory location that will receive the float values.\n"
54550"/// \\param __a\n"
54551"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54552"static __inline__ void __DEFAULT_FN_ATTRS\n"
54553"_mm_storel_pi(__m64 *__p, __m128 __a)\n"
54554"{\n"
54555" __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a);\n"
54556"}\n"
54557"\n"
54558"/// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a\n"
54559"/// memory location.\n"
54560"///\n"
54561"/// \\headerfile <x86intrin.h>\n"
54562"///\n"
54563"/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n"
54564"///\n"
54565"/// \\param __p\n"
54566"/// A pointer to a 32-bit memory location.\n"
54567"/// \\param __a\n"
54568"/// A 128-bit vector of [4 x float] containing the value to be stored.\n"
54569"static __inline__ void __DEFAULT_FN_ATTRS\n"
54570"_mm_store_ss(float *__p, __m128 __a)\n"
54571"{\n"
54572" struct __mm_store_ss_struct {\n"
54573" float __u;\n"
54574" } __attribute__((__packed__, __may_alias__));\n"
54575" ((struct __mm_store_ss_struct*)__p)->__u = __a[0];\n"
54576"}\n"
54577"\n"
54578"/// Stores a 128-bit vector of [4 x float] to an unaligned memory\n"
54579"/// location.\n"
54580"///\n"
54581"/// \\headerfile <x86intrin.h>\n"
54582"///\n"
54583"/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n"
54584"///\n"
54585"/// \\param __p\n"
54586"/// A pointer to a 128-bit memory location. The address of the memory\n"
54587"/// location does not have to be aligned.\n"
54588"/// \\param __a\n"
54589"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54590"static __inline__ void __DEFAULT_FN_ATTRS\n"
54591"_mm_storeu_ps(float *__p, __m128 __a)\n"
54592"{\n"
54593" struct __storeu_ps {\n"
54594" __m128 __v;\n"
54595" } __attribute__((__packed__, __may_alias__));\n"
54596" ((struct __storeu_ps*)__p)->__v = __a;\n"
54597"}\n"
54598"\n"
54599"/// Stores a 128-bit vector of [4 x float] into an aligned memory\n"
54600"/// location.\n"
54601"///\n"
54602"/// \\headerfile <x86intrin.h>\n"
54603"///\n"
54604"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n"
54605"///\n"
54606"/// \\param __p\n"
54607"/// A pointer to a 128-bit memory location. The address of the memory\n"
54608"/// location has to be 16-byte aligned.\n"
54609"/// \\param __a\n"
54610"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54611"static __inline__ void __DEFAULT_FN_ATTRS\n"
54612"_mm_store_ps(float *__p, __m128 __a)\n"
54613"{\n"
54614" *(__m128*)__p = __a;\n"
54615"}\n"
54616"\n"
54617"/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n"
54618"/// four contiguous elements in an aligned memory location.\n"
54619"///\n"
54620"/// \\headerfile <x86intrin.h>\n"
54621"///\n"
54622"/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n"
54623"/// instruction.\n"
54624"///\n"
54625"/// \\param __p\n"
54626"/// A pointer to a 128-bit memory location.\n"
54627"/// \\param __a\n"
54628"/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n"
54629"/// of the four contiguous elements pointed by \\a __p.\n"
54630"static __inline__ void __DEFAULT_FN_ATTRS\n"
54631"_mm_store1_ps(float *__p, __m128 __a)\n"
54632"{\n"
54633" __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);\n"
54634" _mm_store_ps(__p, __a);\n"
54635"}\n"
54636"\n"
54637"/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n"
54638"/// four contiguous elements in an aligned memory location.\n"
54639"///\n"
54640"/// \\headerfile <x86intrin.h>\n"
54641"///\n"
54642"/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n"
54643"/// instruction.\n"
54644"///\n"
54645"/// \\param __p\n"
54646"/// A pointer to a 128-bit memory location.\n"
54647"/// \\param __a\n"
54648"/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n"
54649"/// of the four contiguous elements pointed by \\a __p.\n"
54650"static __inline__ void __DEFAULT_FN_ATTRS\n"
54651"_mm_store_ps1(float *__p, __m128 __a)\n"
54652"{\n"
54653" _mm_store1_ps(__p, __a);\n"
54654"}\n"
54655"\n"
54656"/// Stores float values from a 128-bit vector of [4 x float] to an\n"
54657"/// aligned memory location in reverse order.\n"
54658"///\n"
54659"/// \\headerfile <x86intrin.h>\n"
54660"///\n"
54661"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n"
54662"/// instruction.\n"
54663"///\n"
54664"/// \\param __p\n"
54665"/// A pointer to a 128-bit memory location. The address of the memory\n"
54666"/// location has to be 128-bit aligned.\n"
54667"/// \\param __a\n"
54668"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54669"static __inline__ void __DEFAULT_FN_ATTRS\n"
54670"_mm_storer_ps(float *__p, __m128 __a)\n"
54671"{\n"
54672" __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n"
54673" _mm_store_ps(__p, __a);\n"
54674"}\n"
54675"\n"
54676"#define _MM_HINT_ET0 7\n"
54677"#define _MM_HINT_ET1 6\n"
54678"#define _MM_HINT_T0 3\n"
54679"#define _MM_HINT_T1 2\n"
54680"#define _MM_HINT_T2 1\n"
54681"#define _MM_HINT_NTA 0\n"
54682"\n"
54683"#ifndef _MSC_VER\n"
54684"/* FIXME: We have to #define this because \"sel\" must be a constant integer, and\n"
54685" Sema doesn't do any form of constant propagation yet. */\n"
54686"\n"
54687"/// Loads one cache line of data from the specified address to a location\n"
54688"/// closer to the processor.\n"
54689"///\n"
54690"/// \\headerfile <x86intrin.h>\n"
54691"///\n"
54692"/// \\code\n"
54693"/// void _mm_prefetch(const void * a, const int sel);\n"
54694"/// \\endcode\n"
54695"///\n"
54696"/// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction.\n"
54697"///\n"
54698"/// \\param a\n"
54699"/// A pointer to a memory location containing a cache line of data.\n"
54700"/// \\param sel\n"
54701"/// A predefined integer constant specifying the type of prefetch\n"
54702"/// operation: \\n\n"
54703"/// _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The\n"
54704"/// PREFETCHNTA instruction will be generated. \\n\n"
54705"/// _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will\n"
54706"/// be generated. \\n\n"
54707"/// _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will\n"
54708"/// be generated. \\n\n"
54709"/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will\n"
54710"/// be generated.\n"
54711"#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \\\n"
54712" ((sel) >> 2) & 1, (sel) & 0x3))\n"
54713"#endif\n"
54714"\n"
54715"/// Stores a 64-bit integer in the specified aligned memory location. To\n"
54716"/// minimize caching, the data is flagged as non-temporal (unlikely to be\n"
54717"/// used again soon).\n"
54718"///\n"
54719"/// \\headerfile <x86intrin.h>\n"
54720"///\n"
54721"/// This intrinsic corresponds to the <c> MOVNTQ </c> instruction.\n"
54722"///\n"
54723"/// \\param __p\n"
54724"/// A pointer to an aligned memory location used to store the register value.\n"
54725"/// \\param __a\n"
54726"/// A 64-bit integer containing the value to be stored.\n"
54727"static __inline__ void __DEFAULT_FN_ATTRS_MMX\n"
54728"_mm_stream_pi(__m64 *__p, __m64 __a)\n"
54729"{\n"
54730" __builtin_ia32_movntq(__p, __a);\n"
54731"}\n"
54732"\n"
54733"/// Moves packed float values from a 128-bit vector of [4 x float] to a\n"
54734"/// 128-bit aligned memory location. To minimize caching, the data is flagged\n"
54735"/// as non-temporal (unlikely to be used again soon).\n"
54736"///\n"
54737"/// \\headerfile <x86intrin.h>\n"
54738"///\n"
54739"/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n"
54740"///\n"
54741"/// \\param __p\n"
54742"/// A pointer to a 128-bit aligned memory location that will receive the\n"
54743"/// single-precision floating-point values.\n"
54744"/// \\param __a\n"
54745"/// A 128-bit vector of [4 x float] containing the values to be moved.\n"
54746"static __inline__ void __DEFAULT_FN_ATTRS\n"
54747"_mm_stream_ps(float *__p, __m128 __a)\n"
54748"{\n"
54749" __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);\n"
54750"}\n"
54751"\n"
54752"#if defined(__cplusplus)\n"
54753"extern \"C\" {\n"
54754"#endif\n"
54755"\n"
54756"/// Forces strong memory ordering (serialization) between store\n"
54757"/// instructions preceding this instruction and store instructions following\n"
54758"/// this instruction, ensuring the system completes all previous stores\n"
54759"/// before executing subsequent stores.\n"
54760"///\n"
54761"/// \\headerfile <x86intrin.h>\n"
54762"///\n"
54763"/// This intrinsic corresponds to the <c> SFENCE </c> instruction.\n"
54764"///\n"
54765"void _mm_sfence(void);\n"
54766"\n"
54767"#if defined(__cplusplus)\n"
54768"} // extern \"C\"\n"
54769"#endif\n"
54770"\n"
54771"/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and\n"
54772"/// returns it, as specified by the immediate integer operand.\n"
54773"///\n"
54774"/// \\headerfile <x86intrin.h>\n"
54775"///\n"
54776"/// \\code\n"
54777"/// int _mm_extract_pi16(__m64 a, int n);\n"
54778"/// \\endcode\n"
54779"///\n"
54780"/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n"
54781"///\n"
54782"/// \\param a\n"
54783"/// A 64-bit vector of [4 x i16].\n"
54784"/// \\param n\n"
54785"/// An immediate integer operand that determines which bits are extracted: \\n\n"
54786"/// 0: Bits [15:0] are copied to the destination. \\n\n"
54787"/// 1: Bits [31:16] are copied to the destination. \\n\n"
54788"/// 2: Bits [47:32] are copied to the destination. \\n\n"
54789"/// 3: Bits [63:48] are copied to the destination.\n"
54790"/// \\returns A 16-bit integer containing the extracted 16 bits of packed data.\n"
54791"#define _mm_extract_pi16(a, n) \\\n"
54792" (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n)\n"
54793"\n"
54794"/// Copies data from the 64-bit vector of [4 x i16] to the destination,\n"
54795"/// and inserts the lower 16-bits of an integer operand at the 16-bit offset\n"
54796"/// specified by the immediate operand \\a n.\n"
54797"///\n"
54798"/// \\headerfile <x86intrin.h>\n"
54799"///\n"
54800"/// \\code\n"
54801"/// __m64 _mm_insert_pi16(__m64 a, int d, int n);\n"
54802"/// \\endcode\n"
54803"///\n"
54804"/// This intrinsic corresponds to the <c> PINSRW </c> instruction.\n"
54805"///\n"
54806"/// \\param a\n"
54807"/// A 64-bit vector of [4 x i16].\n"
54808"/// \\param d\n"
54809"/// An integer. The lower 16-bit value from this operand is written to the\n"
54810"/// destination at the offset specified by operand \\a n.\n"
54811"/// \\param n\n"
54812"/// An immediate integer operant that determines which the bits to be used\n"
54813"/// in the destination. \\n\n"
54814"/// 0: Bits [15:0] are copied to the destination. \\n\n"
54815"/// 1: Bits [31:16] are copied to the destination. \\n\n"
54816"/// 2: Bits [47:32] are copied to the destination. \\n\n"
54817"/// 3: Bits [63:48] are copied to the destination. \\n\n"
54818"/// The remaining bits in the destination are copied from the corresponding\n"
54819"/// bits in operand \\a a.\n"
54820"/// \\returns A 64-bit integer vector containing the copied packed data from the\n"
54821"/// operands.\n"
54822"#define _mm_insert_pi16(a, d, n) \\\n"
54823" (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n)\n"
54824"\n"
54825"/// Compares each of the corresponding packed 16-bit integer values of\n"
54826"/// the 64-bit integer vectors, and writes the greater value to the\n"
54827"/// corresponding bits in the destination.\n"
54828"///\n"
54829"/// \\headerfile <x86intrin.h>\n"
54830"///\n"
54831"/// This intrinsic corresponds to the <c> PMAXSW </c> instruction.\n"
54832"///\n"
54833"/// \\param __a\n"
54834"/// A 64-bit integer vector containing one of the source operands.\n"
54835"/// \\param __b\n"
54836"/// A 64-bit integer vector containing one of the source operands.\n"
54837"/// \\returns A 64-bit integer vector containing the comparison results.\n"
54838"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54839"_mm_max_pi16(__m64 __a, __m64 __b)\n"
54840"{\n"
54841" return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);\n"
54842"}\n"
54843"\n"
54844"/// Compares each of the corresponding packed 8-bit unsigned integer\n"
54845"/// values of the 64-bit integer vectors, and writes the greater value to the\n"
54846"/// corresponding bits in the destination.\n"
54847"///\n"
54848"/// \\headerfile <x86intrin.h>\n"
54849"///\n"
54850"/// This intrinsic corresponds to the <c> PMAXUB </c> instruction.\n"
54851"///\n"
54852"/// \\param __a\n"
54853"/// A 64-bit integer vector containing one of the source operands.\n"
54854"/// \\param __b\n"
54855"/// A 64-bit integer vector containing one of the source operands.\n"
54856"/// \\returns A 64-bit integer vector containing the comparison results.\n"
54857"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54858"_mm_max_pu8(__m64 __a, __m64 __b)\n"
54859"{\n"
54860" return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);\n"
54861"}\n"
54862"\n"
54863"/// Compares each of the corresponding packed 16-bit integer values of\n"
54864"/// the 64-bit integer vectors, and writes the lesser value to the\n"
54865"/// corresponding bits in the destination.\n"
54866"///\n"
54867"/// \\headerfile <x86intrin.h>\n"
54868"///\n"
54869"/// This intrinsic corresponds to the <c> PMINSW </c> instruction.\n"
54870"///\n"
54871"/// \\param __a\n"
54872"/// A 64-bit integer vector containing one of the source operands.\n"
54873"/// \\param __b\n"
54874"/// A 64-bit integer vector containing one of the source operands.\n"
54875"/// \\returns A 64-bit integer vector containing the comparison results.\n"
54876"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54877"_mm_min_pi16(__m64 __a, __m64 __b)\n"
54878"{\n"
54879" return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);\n"
54880"}\n"
54881"\n"
54882"/// Compares each of the corresponding packed 8-bit unsigned integer\n"
54883"/// values of the 64-bit integer vectors, and writes the lesser value to the\n"
54884"/// corresponding bits in the destination.\n"
54885"///\n"
54886"/// \\headerfile <x86intrin.h>\n"
54887"///\n"
54888"/// This intrinsic corresponds to the <c> PMINUB </c> instruction.\n"
54889"///\n"
54890"/// \\param __a\n"
54891"/// A 64-bit integer vector containing one of the source operands.\n"
54892"/// \\param __b\n"
54893"/// A 64-bit integer vector containing one of the source operands.\n"
54894"/// \\returns A 64-bit integer vector containing the comparison results.\n"
54895"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54896"_mm_min_pu8(__m64 __a, __m64 __b)\n"
54897"{\n"
54898" return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);\n"
54899"}\n"
54900"\n"
54901"/// Takes the most significant bit from each 8-bit element in a 64-bit\n"
54902"/// integer vector to create an 8-bit mask value. Zero-extends the value to\n"
54903"/// 32-bit integer and writes it to the destination.\n"
54904"///\n"
54905"/// \\headerfile <x86intrin.h>\n"
54906"///\n"
54907"/// This intrinsic corresponds to the <c> PMOVMSKB </c> instruction.\n"
54908"///\n"
54909"/// \\param __a\n"
54910"/// A 64-bit integer vector containing the values with bits to be extracted.\n"
54911"/// \\returns The most significant bit from each 8-bit element in \\a __a,\n"
54912"/// written to bits [7:0].\n"
54913"static __inline__ int __DEFAULT_FN_ATTRS_MMX\n"
54914"_mm_movemask_pi8(__m64 __a)\n"
54915"{\n"
54916" return __builtin_ia32_pmovmskb((__v8qi)__a);\n"
54917"}\n"
54918"\n"
54919"/// Multiplies packed 16-bit unsigned integer values and writes the\n"
54920"/// high-order 16 bits of each 32-bit product to the corresponding bits in\n"
54921"/// the destination.\n"
54922"///\n"
54923"/// \\headerfile <x86intrin.h>\n"
54924"///\n"
54925"/// This intrinsic corresponds to the <c> PMULHUW </c> instruction.\n"
54926"///\n"
54927"/// \\param __a\n"
54928"/// A 64-bit integer vector containing one of the source operands.\n"
54929"/// \\param __b\n"
54930"/// A 64-bit integer vector containing one of the source operands.\n"
54931"/// \\returns A 64-bit integer vector containing the products of both operands.\n"
54932"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54933"_mm_mulhi_pu16(__m64 __a, __m64 __b)\n"
54934"{\n"
54935" return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);\n"
54936"}\n"
54937"\n"
54938"/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the\n"
54939"/// destination, as specified by the immediate value operand.\n"
54940"///\n"
54941"/// \\headerfile <x86intrin.h>\n"
54942"///\n"
54943"/// \\code\n"
54944"/// __m64 _mm_shuffle_pi16(__m64 a, const int n);\n"
54945"/// \\endcode\n"
54946"///\n"
54947"/// This intrinsic corresponds to the <c> PSHUFW </c> instruction.\n"
54948"///\n"
54949"/// \\param a\n"
54950"/// A 64-bit integer vector containing the values to be shuffled.\n"
54951"/// \\param n\n"
54952"/// An immediate value containing an 8-bit value specifying which elements to\n"
54953"/// copy from \\a a. The destinations within the 64-bit destination are\n"
54954"/// assigned values as follows: \\n\n"
54955"/// Bits [1:0] are used to assign values to bits [15:0] in the\n"
54956"/// destination. \\n\n"
54957"/// Bits [3:2] are used to assign values to bits [31:16] in the\n"
54958"/// destination. \\n\n"
54959"/// Bits [5:4] are used to assign values to bits [47:32] in the\n"
54960"/// destination. \\n\n"
54961"/// Bits [7:6] are used to assign values to bits [63:48] in the\n"
54962"/// destination. \\n\n"
54963"/// Bit value assignments: \\n\n"
54964"/// 00: assigned from bits [15:0] of \\a a. \\n\n"
54965"/// 01: assigned from bits [31:16] of \\a a. \\n\n"
54966"/// 10: assigned from bits [47:32] of \\a a. \\n\n"
54967"/// 11: assigned from bits [63:48] of \\a a.\n"
54968"/// \\returns A 64-bit integer vector containing the shuffled values.\n"
54969"#define _mm_shuffle_pi16(a, n) \\\n"
54970" (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))\n"
54971"\n"
54972"/// Conditionally copies the values from each 8-bit element in the first\n"
54973"/// 64-bit integer vector operand to the specified memory location, as\n"
54974"/// specified by the most significant bit in the corresponding element in the\n"
54975"/// second 64-bit integer vector operand.\n"
54976"///\n"
54977"/// To minimize caching, the data is flagged as non-temporal\n"
54978"/// (unlikely to be used again soon).\n"
54979"///\n"
54980"/// \\headerfile <x86intrin.h>\n"
54981"///\n"
54982"/// This intrinsic corresponds to the <c> MASKMOVQ </c> instruction.\n"
54983"///\n"
54984"/// \\param __d\n"
54985"/// A 64-bit integer vector containing the values with elements to be copied.\n"
54986"/// \\param __n\n"
54987"/// A 64-bit integer vector operand. The most significant bit from each 8-bit\n"
54988"/// element determines whether the corresponding element in operand \\a __d\n"
54989"/// is copied. If the most significant bit of a given element is 1, the\n"
54990"/// corresponding element in operand \\a __d is copied.\n"
54991"/// \\param __p\n"
54992"/// A pointer to a 64-bit memory location that will receive the conditionally\n"
54993"/// copied integer values. The address of the memory location does not have\n"
54994"/// to be aligned.\n"
54995"static __inline__ void __DEFAULT_FN_ATTRS_MMX\n"
54996"_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)\n"
54997"{\n"
54998" __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);\n"
54999"}\n"
55000"\n"
55001"/// Computes the rounded averages of the packed unsigned 8-bit integer\n"
55002"/// values and writes the averages to the corresponding bits in the\n"
55003"/// destination.\n"
55004"///\n"
55005"/// \\headerfile <x86intrin.h>\n"
55006"///\n"
55007"/// This intrinsic corresponds to the <c> PAVGB </c> instruction.\n"
55008"///\n"
55009"/// \\param __a\n"
55010"/// A 64-bit integer vector containing one of the source operands.\n"
55011"/// \\param __b\n"
55012"/// A 64-bit integer vector containing one of the source operands.\n"
55013"/// \\returns A 64-bit integer vector containing the averages of both operands.\n"
55014"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55015"_mm_avg_pu8(__m64 __a, __m64 __b)\n"
55016"{\n"
55017" return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);\n"
55018"}\n"
55019"\n"
55020"/// Computes the rounded averages of the packed unsigned 16-bit integer\n"
55021"/// values and writes the averages to the corresponding bits in the\n"
55022"/// destination.\n"
55023"///\n"
55024"/// \\headerfile <x86intrin.h>\n"
55025"///\n"
55026"/// This intrinsic corresponds to the <c> PAVGW </c> instruction.\n"
55027"///\n"
55028"/// \\param __a\n"
55029"/// A 64-bit integer vector containing one of the source operands.\n"
55030"/// \\param __b\n"
55031"/// A 64-bit integer vector containing one of the source operands.\n"
55032"/// \\returns A 64-bit integer vector containing the averages of both operands.\n"
55033"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55034"_mm_avg_pu16(__m64 __a, __m64 __b)\n"
55035"{\n"
55036" return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);\n"
55037"}\n"
55038"\n"
55039"/// Subtracts the corresponding 8-bit unsigned integer values of the two\n"
55040"/// 64-bit vector operands and computes the absolute value for each of the\n"
55041"/// difference. Then sum of the 8 absolute differences is written to the\n"
55042"/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.\n"
55043"///\n"
55044"/// \\headerfile <x86intrin.h>\n"
55045"///\n"
55046"/// This intrinsic corresponds to the <c> PSADBW </c> instruction.\n"
55047"///\n"
55048"/// \\param __a\n"
55049"/// A 64-bit integer vector containing one of the source operands.\n"
55050"/// \\param __b\n"
55051"/// A 64-bit integer vector containing one of the source operands.\n"
55052"/// \\returns A 64-bit integer vector whose lower 16 bits contain the sums of the\n"
55053"/// sets of absolute differences between both operands. The upper bits are\n"
55054"/// cleared.\n"
55055"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55056"_mm_sad_pu8(__m64 __a, __m64 __b)\n"
55057"{\n"
55058" return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);\n"
55059"}\n"
55060"\n"
55061"#if defined(__cplusplus)\n"
55062"extern \"C\" {\n"
55063"#endif\n"
55064"\n"
55065"/// Returns the contents of the MXCSR register as a 32-bit unsigned\n"
55066"/// integer value.\n"
55067"///\n"
55068"/// There are several groups of macros associated with this\n"
55069"/// intrinsic, including:\n"
55070"/// <ul>\n"
55071"/// <li>\n"
55072"/// For checking exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n"
55073"/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n"
55074"/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n"
55075"/// _MM_GET_EXCEPTION_STATE().\n"
55076"/// </li>\n"
55077"/// <li>\n"
55078"/// For checking exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n"
55079"/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n"
55080"/// There is a convenience wrapper _MM_GET_EXCEPTION_MASK().\n"
55081"/// </li>\n"
55082"/// <li>\n"
55083"/// For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n"
55084"/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n"
55085"/// _MM_GET_ROUNDING_MODE().\n"
55086"/// </li>\n"
55087"/// <li>\n"
55088"/// For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n"
55089"/// There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE().\n"
55090"/// </li>\n"
55091"/// <li>\n"
55092"/// For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n"
55093"/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n"
55094"/// _MM_GET_DENORMALS_ZERO_MODE().\n"
55095"/// </li>\n"
55096"/// </ul>\n"
55097"///\n"
55098"/// For example, the following expression checks if an overflow exception has\n"
55099"/// occurred:\n"
55100"/// \\code\n"
55101"/// ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW )\n"
55102"/// \\endcode\n"
55103"///\n"
55104"/// The following expression gets the current rounding mode:\n"
55105"/// \\code\n"
55106"/// _MM_GET_ROUNDING_MODE()\n"
55107"/// \\endcode\n"
55108"///\n"
55109"/// \\headerfile <x86intrin.h>\n"
55110"///\n"
55111"/// This intrinsic corresponds to the <c> VSTMXCSR / STMXCSR </c> instruction.\n"
55112"///\n"
55113"/// \\returns A 32-bit unsigned integer containing the contents of the MXCSR\n"
55114"/// register.\n"
55115"unsigned int _mm_getcsr(void);\n"
55116"\n"
55117"/// Sets the MXCSR register with the 32-bit unsigned integer value.\n"
55118"///\n"
55119"/// There are several groups of macros associated with this intrinsic,\n"
55120"/// including:\n"
55121"/// <ul>\n"
55122"/// <li>\n"
55123"/// For setting exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n"
55124"/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n"
55125"/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n"
55126"/// _MM_SET_EXCEPTION_STATE(x) where x is one of these macros.\n"
55127"/// </li>\n"
55128"/// <li>\n"
55129"/// For setting exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n"
55130"/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n"
55131"/// There is a convenience wrapper _MM_SET_EXCEPTION_MASK(x) where x is one\n"
55132"/// of these macros.\n"
55133"/// </li>\n"
55134"/// <li>\n"
55135"/// For setting rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n"
55136"/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n"
55137"/// _MM_SET_ROUNDING_MODE(x) where x is one of these macros.\n"
55138"/// </li>\n"
55139"/// <li>\n"
55140"/// For setting flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n"
55141"/// There is a convenience wrapper _MM_SET_FLUSH_ZERO_MODE(x) where x is\n"
55142"/// one of these macros.\n"
55143"/// </li>\n"
55144"/// <li>\n"
55145"/// For setting denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n"
55146"/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n"
55147"/// _MM_SET_DENORMALS_ZERO_MODE(x) where x is one of these macros.\n"
55148"/// </li>\n"
55149"/// </ul>\n"
55150"///\n"
55151"/// For example, the following expression causes subsequent floating-point\n"
55152"/// operations to round up:\n"
55153"/// _mm_setcsr(_mm_getcsr() | _MM_ROUND_UP)\n"
55154"///\n"
55155"/// The following example sets the DAZ and FTZ flags:\n"
55156"/// \\code\n"
55157"/// void setFlags() {\n"
55158"/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);\n"
55159"/// _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);\n"
55160"/// }\n"
55161"/// \\endcode\n"
55162"///\n"
55163"/// \\headerfile <x86intrin.h>\n"
55164"///\n"
55165"/// This intrinsic corresponds to the <c> VLDMXCSR / LDMXCSR </c> instruction.\n"
55166"///\n"
55167"/// \\param __i\n"
55168"/// A 32-bit unsigned integer value to be written to the MXCSR register.\n"
55169"void _mm_setcsr(unsigned int __i);\n"
55170"\n"
55171"#if defined(__cplusplus)\n"
55172"} // extern \"C\"\n"
55173"#endif\n"
55174"\n"
55175"/// Selects 4 float values from the 128-bit operands of [4 x float], as\n"
55176"/// specified by the immediate value operand.\n"
55177"///\n"
55178"/// \\headerfile <x86intrin.h>\n"
55179"///\n"
55180"/// \\code\n"
55181"/// __m128 _mm_shuffle_ps(__m128 a, __m128 b, const int mask);\n"
55182"/// \\endcode\n"
55183"///\n"
55184"/// This intrinsic corresponds to the <c> VSHUFPS / SHUFPS </c> instruction.\n"
55185"///\n"
55186"/// \\param a\n"
55187"/// A 128-bit vector of [4 x float].\n"
55188"/// \\param b\n"
55189"/// A 128-bit vector of [4 x float].\n"
55190"/// \\param mask\n"
55191"/// An immediate value containing an 8-bit value specifying which elements to\n"
55192"/// copy from \\a a and \\a b. \\n\n"
55193"/// Bits [3:0] specify the values copied from operand \\a a. \\n\n"
55194"/// Bits [7:4] specify the values copied from operand \\a b. \\n\n"
55195"/// The destinations within the 128-bit destination are assigned values as\n"
55196"/// follows: \\n\n"
55197"/// Bits [1:0] are used to assign values to bits [31:0] in the\n"
55198"/// destination. \\n\n"
55199"/// Bits [3:2] are used to assign values to bits [63:32] in the\n"
55200"/// destination. \\n\n"
55201"/// Bits [5:4] are used to assign values to bits [95:64] in the\n"
55202"/// destination. \\n\n"
55203"/// Bits [7:6] are used to assign values to bits [127:96] in the\n"
55204"/// destination. \\n\n"
55205"/// Bit value assignments: \\n\n"
55206"/// 00: Bits [31:0] copied from the specified operand. \\n\n"
55207"/// 01: Bits [63:32] copied from the specified operand. \\n\n"
55208"/// 10: Bits [95:64] copied from the specified operand. \\n\n"
55209"/// 11: Bits [127:96] copied from the specified operand.\n"
55210"/// \\returns A 128-bit vector of [4 x float] containing the shuffled values.\n"
55211"#define _mm_shuffle_ps(a, b, mask) \\\n"
55212" (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \\\n"
55213" (int)(mask))\n"
55214"\n"
55215"/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n"
55216"/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n"
55217"///\n"
55218"/// \\headerfile <x86intrin.h>\n"
55219"///\n"
55220"/// This intrinsic corresponds to the <c> VUNPCKHPS / UNPCKHPS </c> instruction.\n"
55221"///\n"
55222"/// \\param __a\n"
55223"/// A 128-bit vector of [4 x float]. \\n\n"
55224"/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n"
55225"/// Bits [127:96] are written to bits [95:64] of the destination.\n"
55226"/// \\param __b\n"
55227"/// A 128-bit vector of [4 x float].\n"
55228"/// Bits [95:64] are written to bits [63:32] of the destination. \\n\n"
55229"/// Bits [127:96] are written to bits [127:96] of the destination.\n"
55230"/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n"
55231"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55232"_mm_unpackhi_ps(__m128 __a, __m128 __b)\n"
55233"{\n"
55234" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);\n"
55235"}\n"
55236"\n"
55237"/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n"
55238"/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n"
55239"///\n"
55240"/// \\headerfile <x86intrin.h>\n"
55241"///\n"
55242"/// This intrinsic corresponds to the <c> VUNPCKLPS / UNPCKLPS </c> instruction.\n"
55243"///\n"
55244"/// \\param __a\n"
55245"/// A 128-bit vector of [4 x float]. \\n\n"
55246"/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n"
55247"/// Bits [63:32] are written to bits [95:64] of the destination.\n"
55248"/// \\param __b\n"
55249"/// A 128-bit vector of [4 x float]. \\n\n"
55250"/// Bits [31:0] are written to bits [63:32] of the destination. \\n\n"
55251"/// Bits [63:32] are written to bits [127:96] of the destination.\n"
55252"/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n"
55253"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55254"_mm_unpacklo_ps(__m128 __a, __m128 __b)\n"
55255"{\n"
55256" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);\n"
55257"}\n"
55258"\n"
55259"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
55260"/// 32 bits are set to the lower 32 bits of the second parameter. The upper\n"
55261"/// 96 bits are set to the upper 96 bits of the first parameter.\n"
55262"///\n"
55263"/// \\headerfile <x86intrin.h>\n"
55264"///\n"
55265"/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS / MOVSS </c>\n"
55266"/// instruction.\n"
55267"///\n"
55268"/// \\param __a\n"
55269"/// A 128-bit floating-point vector of [4 x float]. The upper 96 bits are\n"
55270"/// written to the upper 96 bits of the result.\n"
55271"/// \\param __b\n"
55272"/// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are\n"
55273"/// written to the lower 32 bits of the result.\n"
55274"/// \\returns A 128-bit floating-point vector of [4 x float].\n"
55275"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55276"_mm_move_ss(__m128 __a, __m128 __b)\n"
55277"{\n"
55278" __a[0] = __b[0];\n"
55279" return __a;\n"
55280"}\n"
55281"\n"
55282"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
55283"/// 64 bits are set to the upper 64 bits of the second parameter. The upper\n"
55284"/// 64 bits are set to the upper 64 bits of the first parameter.\n"
55285"///\n"
55286"/// \\headerfile <x86intrin.h>\n"
55287"///\n"
55288"/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n"
55289"///\n"
55290"/// \\param __a\n"
55291"/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n"
55292"/// written to the upper 64 bits of the result.\n"
55293"/// \\param __b\n"
55294"/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n"
55295"/// written to the lower 64 bits of the result.\n"
55296"/// \\returns A 128-bit floating-point vector of [4 x float].\n"
55297"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55298"_mm_movehl_ps(__m128 __a, __m128 __b)\n"
55299"{\n"
55300" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);\n"
55301"}\n"
55302"\n"
55303"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
55304"/// 64 bits are set to the lower 64 bits of the first parameter. The upper\n"
55305"/// 64 bits are set to the lower 64 bits of the second parameter.\n"
55306"///\n"
55307"/// \\headerfile <x86intrin.h>\n"
55308"///\n"
55309"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
55310"///\n"
55311"/// \\param __a\n"
55312"/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n"
55313"/// written to the lower 64 bits of the result.\n"
55314"/// \\param __b\n"
55315"/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n"
55316"/// written to the upper 64 bits of the result.\n"
55317"/// \\returns A 128-bit floating-point vector of [4 x float].\n"
55318"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55319"_mm_movelh_ps(__m128 __a, __m128 __b)\n"
55320"{\n"
55321" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);\n"
55322"}\n"
55323"\n"
55324"/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x\n"
55325"/// float].\n"
55326"///\n"
55327"/// \\headerfile <x86intrin.h>\n"
55328"///\n"
55329"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55330"///\n"
55331"/// \\param __a\n"
55332"/// A 64-bit vector of [4 x i16]. The elements of the destination are copied\n"
55333"/// from the corresponding elements in this operand.\n"
55334"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
55335"/// values from the operand.\n"
55336"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55337"_mm_cvtpi16_ps(__m64 __a)\n"
55338"{\n"
55339" __m64 __b, __c;\n"
55340" __m128 __r;\n"
55341"\n"
55342" __b = _mm_setzero_si64();\n"
55343" __b = _mm_cmpgt_pi16(__b, __a);\n"
55344" __c = _mm_unpackhi_pi16(__a, __b);\n"
55345" __r = _mm_setzero_ps();\n"
55346" __r = _mm_cvtpi32_ps(__r, __c);\n"
55347" __r = _mm_movelh_ps(__r, __r);\n"
55348" __c = _mm_unpacklo_pi16(__a, __b);\n"
55349" __r = _mm_cvtpi32_ps(__r, __c);\n"
55350"\n"
55351" return __r;\n"
55352"}\n"
55353"\n"
55354"/// Converts a 64-bit vector of 16-bit unsigned integer values into a\n"
55355"/// 128-bit vector of [4 x float].\n"
55356"///\n"
55357"/// \\headerfile <x86intrin.h>\n"
55358"///\n"
55359"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55360"///\n"
55361"/// \\param __a\n"
55362"/// A 64-bit vector of 16-bit unsigned integer values. The elements of the\n"
55363"/// destination are copied from the corresponding elements in this operand.\n"
55364"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
55365"/// values from the operand.\n"
55366"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55367"_mm_cvtpu16_ps(__m64 __a)\n"
55368"{\n"
55369" __m64 __b, __c;\n"
55370" __m128 __r;\n"
55371"\n"
55372" __b = _mm_setzero_si64();\n"
55373" __c = _mm_unpackhi_pi16(__a, __b);\n"
55374" __r = _mm_setzero_ps();\n"
55375" __r = _mm_cvtpi32_ps(__r, __c);\n"
55376" __r = _mm_movelh_ps(__r, __r);\n"
55377" __c = _mm_unpacklo_pi16(__a, __b);\n"
55378" __r = _mm_cvtpi32_ps(__r, __c);\n"
55379"\n"
55380" return __r;\n"
55381"}\n"
55382"\n"
55383"/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]\n"
55384"/// into a 128-bit vector of [4 x float].\n"
55385"///\n"
55386"/// \\headerfile <x86intrin.h>\n"
55387"///\n"
55388"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55389"///\n"
55390"/// \\param __a\n"
55391"/// A 64-bit vector of [8 x i8]. The elements of the destination are copied\n"
55392"/// from the corresponding lower 4 elements in this operand.\n"
55393"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
55394"/// values from the operand.\n"
55395"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55396"_mm_cvtpi8_ps(__m64 __a)\n"
55397"{\n"
55398" __m64 __b;\n"
55399"\n"
55400" __b = _mm_setzero_si64();\n"
55401" __b = _mm_cmpgt_pi8(__b, __a);\n"
55402" __b = _mm_unpacklo_pi8(__a, __b);\n"
55403"\n"
55404" return _mm_cvtpi16_ps(__b);\n"
55405"}\n"
55406"\n"
55407"/// Converts the lower four unsigned 8-bit integer values from a 64-bit\n"
55408"/// vector of [8 x u8] into a 128-bit vector of [4 x float].\n"
55409"///\n"
55410"/// \\headerfile <x86intrin.h>\n"
55411"///\n"
55412"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55413"///\n"
55414"/// \\param __a\n"
55415"/// A 64-bit vector of unsigned 8-bit integer values. The elements of the\n"
55416"/// destination are copied from the corresponding lower 4 elements in this\n"
55417"/// operand.\n"
55418"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
55419"/// values from the source operand.\n"
55420"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55421"_mm_cvtpu8_ps(__m64 __a)\n"
55422"{\n"
55423" __m64 __b;\n"
55424"\n"
55425" __b = _mm_setzero_si64();\n"
55426" __b = _mm_unpacklo_pi8(__a, __b);\n"
55427"\n"
55428" return _mm_cvtpi16_ps(__b);\n"
55429"}\n"
55430"\n"
55431"/// Converts the two 32-bit signed integer values from each 64-bit vector\n"
55432"/// operand of [2 x i32] into a 128-bit vector of [4 x float].\n"
55433"///\n"
55434"/// \\headerfile <x86intrin.h>\n"
55435"///\n"
55436"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55437"///\n"
55438"/// \\param __a\n"
55439"/// A 64-bit vector of [2 x i32]. The lower elements of the destination are\n"
55440"/// copied from the elements in this operand.\n"
55441"/// \\param __b\n"
55442"/// A 64-bit vector of [2 x i32]. The upper elements of the destination are\n"
55443"/// copied from the elements in this operand.\n"
55444"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
55445"/// copied and converted values from the first operand. The upper 64 bits\n"
55446"/// contain the copied and converted values from the second operand.\n"
55447"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55448"_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)\n"
55449"{\n"
55450" __m128 __c;\n"
55451"\n"
55452" __c = _mm_setzero_ps();\n"
55453" __c = _mm_cvtpi32_ps(__c, __b);\n"
55454" __c = _mm_movelh_ps(__c, __c);\n"
55455"\n"
55456" return _mm_cvtpi32_ps(__c, __a);\n"
55457"}\n"
55458"\n"
55459"/// Converts each single-precision floating-point element of a 128-bit\n"
55460"/// floating-point vector of [4 x float] into a 16-bit signed integer, and\n"
55461"/// packs the results into a 64-bit integer vector of [4 x i16].\n"
55462"///\n"
55463"/// If the floating-point element is NaN or infinity, or if the\n"
55464"/// floating-point element is greater than 0x7FFFFFFF or less than -0x8000,\n"
55465"/// it is converted to 0x8000. Otherwise if the floating-point element is\n"
55466"/// greater than 0x7FFF, it is converted to 0x7FFF.\n"
55467"///\n"
55468"/// \\headerfile <x86intrin.h>\n"
55469"///\n"
55470"/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n"
55471"///\n"
55472"/// \\param __a\n"
55473"/// A 128-bit floating-point vector of [4 x float].\n"
55474"/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n"
55475"/// values.\n"
55476"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55477"_mm_cvtps_pi16(__m128 __a)\n"
55478"{\n"
55479" __m64 __b, __c;\n"
55480"\n"
55481" __b = _mm_cvtps_pi32(__a);\n"
55482" __a = _mm_movehl_ps(__a, __a);\n"
55483" __c = _mm_cvtps_pi32(__a);\n"
55484"\n"
55485" return _mm_packs_pi32(__b, __c);\n"
55486"}\n"
55487"\n"
55488"/// Converts each single-precision floating-point element of a 128-bit\n"
55489"/// floating-point vector of [4 x float] into an 8-bit signed integer, and\n"
55490"/// packs the results into the lower 32 bits of a 64-bit integer vector of\n"
55491"/// [8 x i8]. The upper 32 bits of the vector are set to 0.\n"
55492"///\n"
55493"/// If the floating-point element is NaN or infinity, or if the\n"
55494"/// floating-point element is greater than 0x7FFFFFFF or less than -0x80, it\n"
55495"/// is converted to 0x80. Otherwise if the floating-point element is greater\n"
55496"/// than 0x7F, it is converted to 0x7F.\n"
55497"///\n"
55498"/// \\headerfile <x86intrin.h>\n"
55499"///\n"
55500"/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n"
55501"///\n"
55502"/// \\param __a\n"
55503"/// 128-bit floating-point vector of [4 x float].\n"
55504"/// \\returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the\n"
55505"/// converted values and the uppper 32 bits are set to zero.\n"
55506"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55507"_mm_cvtps_pi8(__m128 __a)\n"
55508"{\n"
55509" __m64 __b, __c;\n"
55510"\n"
55511" __b = _mm_cvtps_pi16(__a);\n"
55512" __c = _mm_setzero_si64();\n"
55513"\n"
55514" return _mm_packs_pi16(__b, __c);\n"
55515"}\n"
55516"\n"
55517"/// Extracts the sign bits from each single-precision floating-point\n"
55518"/// element of a 128-bit floating-point vector of [4 x float] and returns the\n"
55519"/// sign bits in bits [0:3] of the result. Bits [31:4] of the result are set\n"
55520"/// to zero.\n"
55521"///\n"
55522"/// \\headerfile <x86intrin.h>\n"
55523"///\n"
55524"/// This intrinsic corresponds to the <c> VMOVMSKPS / MOVMSKPS </c> instruction.\n"
55525"///\n"
55526"/// \\param __a\n"
55527"/// A 128-bit floating-point vector of [4 x float].\n"
55528"/// \\returns A 32-bit integer value. Bits [3:0] contain the sign bits from each\n"
55529"/// single-precision floating-point element of the parameter. Bits [31:4] are\n"
55530"/// set to zero.\n"
55531"static __inline__ int __DEFAULT_FN_ATTRS\n"
55532"_mm_movemask_ps(__m128 __a)\n"
55533"{\n"
55534" return __builtin_ia32_movmskps((__v4sf)__a);\n"
55535"}\n"
55536"\n"
55537"\n"
55538"#define _MM_ALIGN16 __attribute__((aligned(16)))\n"
55539"\n"
55540"#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))\n"
55541"\n"
55542"#define _MM_EXCEPT_INVALID (0x0001)\n"
55543"#define _MM_EXCEPT_DENORM (0x0002)\n"
55544"#define _MM_EXCEPT_DIV_ZERO (0x0004)\n"
55545"#define _MM_EXCEPT_OVERFLOW (0x0008)\n"
55546"#define _MM_EXCEPT_UNDERFLOW (0x0010)\n"
55547"#define _MM_EXCEPT_INEXACT (0x0020)\n"
55548"#define _MM_EXCEPT_MASK (0x003f)\n"
55549"\n"
55550"#define _MM_MASK_INVALID (0x0080)\n"
55551"#define _MM_MASK_DENORM (0x0100)\n"
55552"#define _MM_MASK_DIV_ZERO (0x0200)\n"
55553"#define _MM_MASK_OVERFLOW (0x0400)\n"
55554"#define _MM_MASK_UNDERFLOW (0x0800)\n"
55555"#define _MM_MASK_INEXACT (0x1000)\n"
55556"#define _MM_MASK_MASK (0x1f80)\n"
55557"\n"
55558"#define _MM_ROUND_NEAREST (0x0000)\n"
55559"#define _MM_ROUND_DOWN (0x2000)\n"
55560"#define _MM_ROUND_UP (0x4000)\n"
55561"#define _MM_ROUND_TOWARD_ZERO (0x6000)\n"
55562"#define _MM_ROUND_MASK (0x6000)\n"
55563"\n"
55564"#define _MM_FLUSH_ZERO_MASK (0x8000)\n"
55565"#define _MM_FLUSH_ZERO_ON (0x8000)\n"
55566"#define _MM_FLUSH_ZERO_OFF (0x0000)\n"
55567"\n"
55568"#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)\n"
55569"#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)\n"
55570"#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)\n"
55571"#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)\n"
55572"\n"
55573"#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))\n"
55574"#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))\n"
55575"#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))\n"
55576"#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))\n"
55577"\n"
55578"#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \\\n"
55579"do { \\\n"
55580" __m128 tmp3, tmp2, tmp1, tmp0; \\\n"
55581" tmp0 = _mm_unpacklo_ps((row0), (row1)); \\\n"
55582" tmp2 = _mm_unpacklo_ps((row2), (row3)); \\\n"
55583" tmp1 = _mm_unpackhi_ps((row0), (row1)); \\\n"
55584" tmp3 = _mm_unpackhi_ps((row2), (row3)); \\\n"
55585" (row0) = _mm_movelh_ps(tmp0, tmp2); \\\n"
55586" (row1) = _mm_movehl_ps(tmp2, tmp0); \\\n"
55587" (row2) = _mm_movelh_ps(tmp1, tmp3); \\\n"
55588" (row3) = _mm_movehl_ps(tmp3, tmp1); \\\n"
55589"} while (0)\n"
55590"\n"
55591"/* Aliases for compatibility. */\n"
55592"#define _m_pextrw _mm_extract_pi16\n"
55593"#define _m_pinsrw _mm_insert_pi16\n"
55594"#define _m_pmaxsw _mm_max_pi16\n"
55595"#define _m_pmaxub _mm_max_pu8\n"
55596"#define _m_pminsw _mm_min_pi16\n"
55597"#define _m_pminub _mm_min_pu8\n"
55598"#define _m_pmovmskb _mm_movemask_pi8\n"
55599"#define _m_pmulhuw _mm_mulhi_pu16\n"
55600"#define _m_pshufw _mm_shuffle_pi16\n"
55601"#define _m_maskmovq _mm_maskmove_si64\n"
55602"#define _m_pavgb _mm_avg_pu8\n"
55603"#define _m_pavgw _mm_avg_pu16\n"
55604"#define _m_psadbw _mm_sad_pu8\n"
55605"#define _m_ _mm_\n"
55606"#define _m_ _mm_\n"
55607"\n"
55608"#undef __DEFAULT_FN_ATTRS\n"
55609"#undef __DEFAULT_FN_ATTRS_MMX\n"
55610"\n"
55611"/* Ugly hack for backwards-compatibility (compatible with gcc) */\n"
55612"#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)\n"
55613"#include <emmintrin.h>\n"
55614"#endif\n"
55615"\n"
55616"#endif /* __XMMINTRIN_H */\n"
55617"" } ,
55618 { "/builtins/xopintrin.h" , "/*===---- xopintrin.h - XOP intrinsics -------------------------------------===\n"
55619" *\n"
55620" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
55621" * of this software and associated documentation files (the \"Software\"), to deal\n"
55622" * in the Software without restriction, including without limitation the rights\n"
55623" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
55624" * copies of the Software, and to permit persons to whom the Software is\n"
55625" * furnished to do so, subject to the following conditions:\n"
55626" *\n"
55627" * The above copyright notice and this permission notice shall be included in\n"
55628" * all copies or substantial portions of the Software.\n"
55629" *\n"
55630" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
55631" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
55632" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
55633" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
55634" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
55635" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
55636" * THE SOFTWARE.\n"
55637" *\n"
55638" *===-----------------------------------------------------------------------===\n"
55639" */\n"
55640"\n"
55641"#ifndef __X86INTRIN_H\n"
55642"#error \"Never use <xopintrin.h> directly; include <x86intrin.h> instead.\"\n"
55643"#endif\n"
55644"\n"
55645"#ifndef __XOPINTRIN_H\n"
55646"#define __XOPINTRIN_H\n"
55647"\n"
55648"#include <fma4intrin.h>\n"
55649"\n"
55650"/* Define the default attributes for the functions in this file. */\n"
55651"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(128)))\n"
55652"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(256)))\n"
55653"\n"
55654"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55655"_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55656"{\n"
55657" return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n"
55658"}\n"
55659"\n"
55660"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55661"_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55662"{\n"
55663" return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n"
55664"}\n"
55665"\n"
55666"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55667"_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55668"{\n"
55669" return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
55670"}\n"
55671"\n"
55672"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55673"_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55674"{\n"
55675" return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
55676"}\n"
55677"\n"
55678"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55679"_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55680"{\n"
55681" return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n"
55682"}\n"
55683"\n"
55684"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55685"_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55686"{\n"
55687" return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n"
55688"}\n"
55689"\n"
55690"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55691"_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55692"{\n"
55693" return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
55694"}\n"
55695"\n"
55696"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55697"_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55698"{\n"
55699" return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
55700"}\n"
55701"\n"
55702"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55703"_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55704"{\n"
55705" return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
55706"}\n"
55707"\n"
55708"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55709"_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55710"{\n"
55711" return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
55712"}\n"
55713"\n"
55714"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55715"_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55716"{\n"
55717" return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
55718"}\n"
55719"\n"
55720"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55721"_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55722"{\n"
55723" return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
55724"}\n"
55725"\n"
55726"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55727"_mm_haddw_epi8(__m128i __A)\n"
55728"{\n"
55729" return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);\n"
55730"}\n"
55731"\n"
55732"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55733"_mm_haddd_epi8(__m128i __A)\n"
55734"{\n"
55735" return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);\n"
55736"}\n"
55737"\n"
55738"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55739"_mm_haddq_epi8(__m128i __A)\n"
55740"{\n"
55741" return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);\n"
55742"}\n"
55743"\n"
55744"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55745"_mm_haddd_epi16(__m128i __A)\n"
55746"{\n"
55747" return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);\n"
55748"}\n"
55749"\n"
55750"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55751"_mm_haddq_epi16(__m128i __A)\n"
55752"{\n"
55753" return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);\n"
55754"}\n"
55755"\n"
55756"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55757"_mm_haddq_epi32(__m128i __A)\n"
55758"{\n"
55759" return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);\n"
55760"}\n"
55761"\n"
55762"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55763"_mm_haddw_epu8(__m128i __A)\n"
55764"{\n"
55765" return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);\n"
55766"}\n"
55767"\n"
55768"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55769"_mm_haddd_epu8(__m128i __A)\n"
55770"{\n"
55771" return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);\n"
55772"}\n"
55773"\n"
55774"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55775"_mm_haddq_epu8(__m128i __A)\n"
55776"{\n"
55777" return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);\n"
55778"}\n"
55779"\n"
55780"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55781"_mm_haddd_epu16(__m128i __A)\n"
55782"{\n"
55783" return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);\n"
55784"}\n"
55785"\n"
55786"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55787"_mm_haddq_epu16(__m128i __A)\n"
55788"{\n"
55789" return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);\n"
55790"}\n"
55791"\n"
55792"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55793"_mm_haddq_epu32(__m128i __A)\n"
55794"{\n"
55795" return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);\n"
55796"}\n"
55797"\n"
55798"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55799"_mm_hsubw_epi8(__m128i __A)\n"
55800"{\n"
55801" return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);\n"
55802"}\n"
55803"\n"
55804"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55805"_mm_hsubd_epi16(__m128i __A)\n"
55806"{\n"
55807" return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);\n"
55808"}\n"
55809"\n"
55810"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55811"_mm_hsubq_epi32(__m128i __A)\n"
55812"{\n"
55813" return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);\n"
55814"}\n"
55815"\n"
55816"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55817"_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)\n"
55818"{\n"
55819" return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));\n"
55820"}\n"
55821"\n"
55822"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
55823"_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)\n"
55824"{\n"
55825" return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));\n"
55826"}\n"
55827"\n"
55828"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55829"_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)\n"
55830"{\n"
55831" return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);\n"
55832"}\n"
55833"\n"
55834"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55835"_mm_rot_epi8(__m128i __A, __m128i __B)\n"
55836"{\n"
55837" return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);\n"
55838"}\n"
55839"\n"
55840"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55841"_mm_rot_epi16(__m128i __A, __m128i __B)\n"
55842"{\n"
55843" return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);\n"
55844"}\n"
55845"\n"
55846"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55847"_mm_rot_epi32(__m128i __A, __m128i __B)\n"
55848"{\n"
55849" return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);\n"
55850"}\n"
55851"\n"
55852"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55853"_mm_rot_epi64(__m128i __A, __m128i __B)\n"
55854"{\n"
55855" return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);\n"
55856"}\n"
55857"\n"
55858"#define _mm_roti_epi8(A, N) \\\n"
55859" (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))\n"
55860"\n"
55861"#define _mm_roti_epi16(A, N) \\\n"
55862" (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))\n"
55863"\n"
55864"#define _mm_roti_epi32(A, N) \\\n"
55865" (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))\n"
55866"\n"
55867"#define _mm_roti_epi64(A, N) \\\n"
55868" (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))\n"
55869"\n"
55870"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55871"_mm_shl_epi8(__m128i __A, __m128i __B)\n"
55872"{\n"
55873" return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);\n"
55874"}\n"
55875"\n"
55876"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55877"_mm_shl_epi16(__m128i __A, __m128i __B)\n"
55878"{\n"
55879" return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);\n"
55880"}\n"
55881"\n"
55882"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55883"_mm_shl_epi32(__m128i __A, __m128i __B)\n"
55884"{\n"
55885" return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);\n"
55886"}\n"
55887"\n"
55888"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55889"_mm_shl_epi64(__m128i __A, __m128i __B)\n"
55890"{\n"
55891" return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);\n"
55892"}\n"
55893"\n"
55894"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55895"_mm_sha_epi8(__m128i __A, __m128i __B)\n"
55896"{\n"
55897" return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);\n"
55898"}\n"
55899"\n"
55900"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55901"_mm_sha_epi16(__m128i __A, __m128i __B)\n"
55902"{\n"
55903" return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);\n"
55904"}\n"
55905"\n"
55906"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55907"_mm_sha_epi32(__m128i __A, __m128i __B)\n"
55908"{\n"
55909" return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);\n"
55910"}\n"
55911"\n"
55912"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55913"_mm_sha_epi64(__m128i __A, __m128i __B)\n"
55914"{\n"
55915" return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);\n"
55916"}\n"
55917"\n"
55918"#define _mm_com_epu8(A, B, N) \\\n"
55919" (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \\\n"
55920" (__v16qi)(__m128i)(B), (N))\n"
55921"\n"
55922"#define _mm_com_epu16(A, B, N) \\\n"
55923" (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \\\n"
55924" (__v8hi)(__m128i)(B), (N))\n"
55925"\n"
55926"#define _mm_com_epu32(A, B, N) \\\n"
55927" (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \\\n"
55928" (__v4si)(__m128i)(B), (N))\n"
55929"\n"
55930"#define _mm_com_epu64(A, B, N) \\\n"
55931" (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \\\n"
55932" (__v2di)(__m128i)(B), (N))\n"
55933"\n"
55934"#define _mm_com_epi8(A, B, N) \\\n"
55935" (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \\\n"
55936" (__v16qi)(__m128i)(B), (N))\n"
55937"\n"
55938"#define _mm_com_epi16(A, B, N) \\\n"
55939" (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \\\n"
55940" (__v8hi)(__m128i)(B), (N))\n"
55941"\n"
55942"#define _mm_com_epi32(A, B, N) \\\n"
55943" (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \\\n"
55944" (__v4si)(__m128i)(B), (N))\n"
55945"\n"
55946"#define _mm_com_epi64(A, B, N) \\\n"
55947" (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \\\n"
55948" (__v2di)(__m128i)(B), (N))\n"
55949"\n"
55950"#define _MM_PCOMCTRL_LT 0\n"
55951"#define _MM_PCOMCTRL_LE 1\n"
55952"#define _MM_PCOMCTRL_GT 2\n"
55953"#define _MM_PCOMCTRL_GE 3\n"
55954"#define _MM_PCOMCTRL_EQ 4\n"
55955"#define _MM_PCOMCTRL_NEQ 5\n"
55956"#define _MM_PCOMCTRL_FALSE 6\n"
55957"#define _MM_PCOMCTRL_TRUE 7\n"
55958"\n"
55959"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55960"_mm_comlt_epu8(__m128i __A, __m128i __B)\n"
55961"{\n"
55962" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);\n"
55963"}\n"
55964"\n"
55965"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55966"_mm_comle_epu8(__m128i __A, __m128i __B)\n"
55967"{\n"
55968" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);\n"
55969"}\n"
55970"\n"
55971"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55972"_mm_comgt_epu8(__m128i __A, __m128i __B)\n"
55973"{\n"
55974" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);\n"
55975"}\n"
55976"\n"
55977"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55978"_mm_comge_epu8(__m128i __A, __m128i __B)\n"
55979"{\n"
55980" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);\n"
55981"}\n"
55982"\n"
55983"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55984"_mm_comeq_epu8(__m128i __A, __m128i __B)\n"
55985"{\n"
55986" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);\n"
55987"}\n"
55988"\n"
55989"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55990"_mm_comneq_epu8(__m128i __A, __m128i __B)\n"
55991"{\n"
55992" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);\n"
55993"}\n"
55994"\n"
55995"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55996"_mm_comfalse_epu8(__m128i __A, __m128i __B)\n"
55997"{\n"
55998" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);\n"
55999"}\n"
56000"\n"
56001"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56002"_mm_comtrue_epu8(__m128i __A, __m128i __B)\n"
56003"{\n"
56004" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56005"}\n"
56006"\n"
56007"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56008"_mm_comlt_epu16(__m128i __A, __m128i __B)\n"
56009"{\n"
56010" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);\n"
56011"}\n"
56012"\n"
56013"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56014"_mm_comle_epu16(__m128i __A, __m128i __B)\n"
56015"{\n"
56016" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);\n"
56017"}\n"
56018"\n"
56019"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56020"_mm_comgt_epu16(__m128i __A, __m128i __B)\n"
56021"{\n"
56022" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);\n"
56023"}\n"
56024"\n"
56025"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56026"_mm_comge_epu16(__m128i __A, __m128i __B)\n"
56027"{\n"
56028" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);\n"
56029"}\n"
56030"\n"
56031"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56032"_mm_comeq_epu16(__m128i __A, __m128i __B)\n"
56033"{\n"
56034" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);\n"
56035"}\n"
56036"\n"
56037"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56038"_mm_comneq_epu16(__m128i __A, __m128i __B)\n"
56039"{\n"
56040" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56041"}\n"
56042"\n"
56043"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56044"_mm_comfalse_epu16(__m128i __A, __m128i __B)\n"
56045"{\n"
56046" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56047"}\n"
56048"\n"
56049"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56050"_mm_comtrue_epu16(__m128i __A, __m128i __B)\n"
56051"{\n"
56052" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56053"}\n"
56054"\n"
56055"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56056"_mm_comlt_epu32(__m128i __A, __m128i __B)\n"
56057"{\n"
56058" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);\n"
56059"}\n"
56060"\n"
56061"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56062"_mm_comle_epu32(__m128i __A, __m128i __B)\n"
56063"{\n"
56064" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);\n"
56065"}\n"
56066"\n"
56067"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56068"_mm_comgt_epu32(__m128i __A, __m128i __B)\n"
56069"{\n"
56070" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);\n"
56071"}\n"
56072"\n"
56073"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56074"_mm_comge_epu32(__m128i __A, __m128i __B)\n"
56075"{\n"
56076" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);\n"
56077"}\n"
56078"\n"
56079"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56080"_mm_comeq_epu32(__m128i __A, __m128i __B)\n"
56081"{\n"
56082" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);\n"
56083"}\n"
56084"\n"
56085"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56086"_mm_comneq_epu32(__m128i __A, __m128i __B)\n"
56087"{\n"
56088" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56089"}\n"
56090"\n"
56091"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56092"_mm_comfalse_epu32(__m128i __A, __m128i __B)\n"
56093"{\n"
56094" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56095"}\n"
56096"\n"
56097"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56098"_mm_comtrue_epu32(__m128i __A, __m128i __B)\n"
56099"{\n"
56100" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56101"}\n"
56102"\n"
56103"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56104"_mm_comlt_epu64(__m128i __A, __m128i __B)\n"
56105"{\n"
56106" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);\n"
56107"}\n"
56108"\n"
56109"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56110"_mm_comle_epu64(__m128i __A, __m128i __B)\n"
56111"{\n"
56112" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);\n"
56113"}\n"
56114"\n"
56115"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56116"_mm_comgt_epu64(__m128i __A, __m128i __B)\n"
56117"{\n"
56118" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);\n"
56119"}\n"
56120"\n"
56121"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56122"_mm_comge_epu64(__m128i __A, __m128i __B)\n"
56123"{\n"
56124" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);\n"
56125"}\n"
56126"\n"
56127"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56128"_mm_comeq_epu64(__m128i __A, __m128i __B)\n"
56129"{\n"
56130" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);\n"
56131"}\n"
56132"\n"
56133"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56134"_mm_comneq_epu64(__m128i __A, __m128i __B)\n"
56135"{\n"
56136" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56137"}\n"
56138"\n"
56139"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56140"_mm_comfalse_epu64(__m128i __A, __m128i __B)\n"
56141"{\n"
56142" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56143"}\n"
56144"\n"
56145"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56146"_mm_comtrue_epu64(__m128i __A, __m128i __B)\n"
56147"{\n"
56148" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56149"}\n"
56150"\n"
56151"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56152"_mm_comlt_epi8(__m128i __A, __m128i __B)\n"
56153"{\n"
56154" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);\n"
56155"}\n"
56156"\n"
56157"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56158"_mm_comle_epi8(__m128i __A, __m128i __B)\n"
56159"{\n"
56160" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);\n"
56161"}\n"
56162"\n"
56163"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56164"_mm_comgt_epi8(__m128i __A, __m128i __B)\n"
56165"{\n"
56166" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);\n"
56167"}\n"
56168"\n"
56169"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56170"_mm_comge_epi8(__m128i __A, __m128i __B)\n"
56171"{\n"
56172" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);\n"
56173"}\n"
56174"\n"
56175"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56176"_mm_comeq_epi8(__m128i __A, __m128i __B)\n"
56177"{\n"
56178" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);\n"
56179"}\n"
56180"\n"
56181"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56182"_mm_comneq_epi8(__m128i __A, __m128i __B)\n"
56183"{\n"
56184" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56185"}\n"
56186"\n"
56187"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56188"_mm_comfalse_epi8(__m128i __A, __m128i __B)\n"
56189"{\n"
56190" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56191"}\n"
56192"\n"
56193"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56194"_mm_comtrue_epi8(__m128i __A, __m128i __B)\n"
56195"{\n"
56196" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56197"}\n"
56198"\n"
56199"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56200"_mm_comlt_epi16(__m128i __A, __m128i __B)\n"
56201"{\n"
56202" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);\n"
56203"}\n"
56204"\n"
56205"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56206"_mm_comle_epi16(__m128i __A, __m128i __B)\n"
56207"{\n"
56208" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);\n"
56209"}\n"
56210"\n"
56211"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56212"_mm_comgt_epi16(__m128i __A, __m128i __B)\n"
56213"{\n"
56214" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);\n"
56215"}\n"
56216"\n"
56217"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56218"_mm_comge_epi16(__m128i __A, __m128i __B)\n"
56219"{\n"
56220" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);\n"
56221"}\n"
56222"\n"
56223"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56224"_mm_comeq_epi16(__m128i __A, __m128i __B)\n"
56225"{\n"
56226" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);\n"
56227"}\n"
56228"\n"
56229"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56230"_mm_comneq_epi16(__m128i __A, __m128i __B)\n"
56231"{\n"
56232" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56233"}\n"
56234"\n"
56235"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56236"_mm_comfalse_epi16(__m128i __A, __m128i __B)\n"
56237"{\n"
56238" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56239"}\n"
56240"\n"
56241"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56242"_mm_comtrue_epi16(__m128i __A, __m128i __B)\n"
56243"{\n"
56244" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56245"}\n"
56246"\n"
56247"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56248"_mm_comlt_epi32(__m128i __A, __m128i __B)\n"
56249"{\n"
56250" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);\n"
56251"}\n"
56252"\n"
56253"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56254"_mm_comle_epi32(__m128i __A, __m128i __B)\n"
56255"{\n"
56256" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);\n"
56257"}\n"
56258"\n"
56259"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56260"_mm_comgt_epi32(__m128i __A, __m128i __B)\n"
56261"{\n"
56262" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);\n"
56263"}\n"
56264"\n"
56265"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56266"_mm_comge_epi32(__m128i __A, __m128i __B)\n"
56267"{\n"
56268" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);\n"
56269"}\n"
56270"\n"
56271"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56272"_mm_comeq_epi32(__m128i __A, __m128i __B)\n"
56273"{\n"
56274" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);\n"
56275"}\n"
56276"\n"
56277"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56278"_mm_comneq_epi32(__m128i __A, __m128i __B)\n"
56279"{\n"
56280" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56281"}\n"
56282"\n"
56283"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56284"_mm_comfalse_epi32(__m128i __A, __m128i __B)\n"
56285"{\n"
56286" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56287"}\n"
56288"\n"
56289"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56290"_mm_comtrue_epi32(__m128i __A, __m128i __B)\n"
56291"{\n"
56292" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56293"}\n"
56294"\n"
56295"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56296"_mm_comlt_epi64(__m128i __A, __m128i __B)\n"
56297"{\n"
56298" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);\n"
56299"}\n"
56300"\n"
56301"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56302"_mm_comle_epi64(__m128i __A, __m128i __B)\n"
56303"{\n"
56304" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);\n"
56305"}\n"
56306"\n"
56307"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56308"_mm_comgt_epi64(__m128i __A, __m128i __B)\n"
56309"{\n"
56310" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);\n"
56311"}\n"
56312"\n"
56313"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56314"_mm_comge_epi64(__m128i __A, __m128i __B)\n"
56315"{\n"
56316" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);\n"
56317"}\n"
56318"\n"
56319"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56320"_mm_comeq_epi64(__m128i __A, __m128i __B)\n"
56321"{\n"
56322" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);\n"
56323"}\n"
56324"\n"
56325"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56326"_mm_comneq_epi64(__m128i __A, __m128i __B)\n"
56327"{\n"
56328" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56329"}\n"
56330"\n"
56331"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56332"_mm_comfalse_epi64(__m128i __A, __m128i __B)\n"
56333"{\n"
56334" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56335"}\n"
56336"\n"
56337"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56338"_mm_comtrue_epi64(__m128i __A, __m128i __B)\n"
56339"{\n"
56340" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56341"}\n"
56342"\n"
56343"#define _mm_permute2_pd(X, Y, C, I) \\\n"
56344" (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \\\n"
56345" (__v2df)(__m128d)(Y), \\\n"
56346" (__v2di)(__m128i)(C), (I))\n"
56347"\n"
56348"#define _mm256_permute2_pd(X, Y, C, I) \\\n"
56349" (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \\\n"
56350" (__v4df)(__m256d)(Y), \\\n"
56351" (__v4di)(__m256i)(C), (I))\n"
56352"\n"
56353"#define _mm_permute2_ps(X, Y, C, I) \\\n"
56354" (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \\\n"
56355" (__v4si)(__m128i)(C), (I))\n"
56356"\n"
56357"#define _mm256_permute2_ps(X, Y, C, I) \\\n"
56358" (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \\\n"
56359" (__v8sf)(__m256)(Y), \\\n"
56360" (__v8si)(__m256i)(C), (I))\n"
56361"\n"
56362"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
56363"_mm_frcz_ss(__m128 __A)\n"
56364"{\n"
56365" return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);\n"
56366"}\n"
56367"\n"
56368"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
56369"_mm_frcz_sd(__m128d __A)\n"
56370"{\n"
56371" return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);\n"
56372"}\n"
56373"\n"
56374"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
56375"_mm_frcz_ps(__m128 __A)\n"
56376"{\n"
56377" return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);\n"
56378"}\n"
56379"\n"
56380"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
56381"_mm_frcz_pd(__m128d __A)\n"
56382"{\n"
56383" return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);\n"
56384"}\n"
56385"\n"
56386"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
56387"_mm256_frcz_ps(__m256 __A)\n"
56388"{\n"
56389" return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);\n"
56390"}\n"
56391"\n"
56392"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
56393"_mm256_frcz_pd(__m256d __A)\n"
56394"{\n"
56395" return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);\n"
56396"}\n"
56397"\n"
56398"#undef __DEFAULT_FN_ATTRS\n"
56399"#undef __DEFAULT_FN_ATTRS256\n"
56400"\n"
56401"#endif /* __XOPINTRIN_H */\n"
56402"" } ,
56403 { "/builtins/xsavecintrin.h" , "/*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------===\n"
56404" *\n"
56405" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56406" * of this software and associated documentation files (the \"Software\"), to deal\n"
56407" * in the Software without restriction, including without limitation the rights\n"
56408" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56409" * copies of the Software, and to permit persons to whom the Software is\n"
56410" * furnished to do so, subject to the following conditions:\n"
56411" *\n"
56412" * The above copyright notice and this permission notice shall be included in\n"
56413" * all copies or substantial portions of the Software.\n"
56414" *\n"
56415" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56416" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56417" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56418" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56419" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56420" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56421" * THE SOFTWARE.\n"
56422" *\n"
56423" *===-----------------------------------------------------------------------===\n"
56424" */\n"
56425"\n"
56426"#ifndef __IMMINTRIN_H\n"
56427"#error \"Never use <xsavecintrin.h> directly; include <immintrin.h> instead.\"\n"
56428"#endif\n"
56429"\n"
56430"#ifndef __XSAVECINTRIN_H\n"
56431"#define __XSAVECINTRIN_H\n"
56432"\n"
56433"/* Define the default attributes for the functions in this file. */\n"
56434"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsavec\")))\n"
56435"\n"
56436"static __inline__ void __DEFAULT_FN_ATTRS\n"
56437"_xsavec(void *__p, unsigned long long __m) {\n"
56438" __builtin_ia32_xsavec(__p, __m);\n"
56439"}\n"
56440"\n"
56441"#ifdef __x86_64__\n"
56442"static __inline__ void __DEFAULT_FN_ATTRS\n"
56443"_xsavec64(void *__p, unsigned long long __m) {\n"
56444" __builtin_ia32_xsavec64(__p, __m);\n"
56445"}\n"
56446"#endif\n"
56447"\n"
56448"#undef __DEFAULT_FN_ATTRS\n"
56449"\n"
56450"#endif\n"
56451"" } ,
56452 { "/builtins/xsaveintrin.h" , "/*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------===\n"
56453" *\n"
56454" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56455" * of this software and associated documentation files (the \"Software\"), to deal\n"
56456" * in the Software without restriction, including without limitation the rights\n"
56457" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56458" * copies of the Software, and to permit persons to whom the Software is\n"
56459" * furnished to do so, subject to the following conditions:\n"
56460" *\n"
56461" * The above copyright notice and this permission notice shall be included in\n"
56462" * all copies or substantial portions of the Software.\n"
56463" *\n"
56464" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56465" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56466" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56467" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56468" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56469" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56470" * THE SOFTWARE.\n"
56471" *\n"
56472" *===-----------------------------------------------------------------------===\n"
56473" */\n"
56474"\n"
56475"#ifndef __IMMINTRIN_H\n"
56476"#error \"Never use <xsaveintrin.h> directly; include <immintrin.h> instead.\"\n"
56477"#endif\n"
56478"\n"
56479"#ifndef __XSAVEINTRIN_H\n"
56480"#define __XSAVEINTRIN_H\n"
56481"\n"
56482"/* Define the default attributes for the functions in this file. */\n"
56483"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsave\")))\n"
56484"\n"
56485"static __inline__ void __DEFAULT_FN_ATTRS\n"
56486"_xsave(void *__p, unsigned long long __m) {\n"
56487" __builtin_ia32_xsave(__p, __m);\n"
56488"}\n"
56489"\n"
56490"static __inline__ void __DEFAULT_FN_ATTRS\n"
56491"_xrstor(void *__p, unsigned long long __m) {\n"
56492" __builtin_ia32_xrstor(__p, __m);\n"
56493"}\n"
56494"\n"
56495"#ifdef __x86_64__\n"
56496"static __inline__ void __DEFAULT_FN_ATTRS\n"
56497"_xsave64(void *__p, unsigned long long __m) {\n"
56498" __builtin_ia32_xsave64(__p, __m);\n"
56499"}\n"
56500"\n"
56501"static __inline__ void __DEFAULT_FN_ATTRS\n"
56502"_xrstor64(void *__p, unsigned long long __m) {\n"
56503" __builtin_ia32_xrstor64(__p, __m);\n"
56504"}\n"
56505"#endif\n"
56506"\n"
56507"#undef __DEFAULT_FN_ATTRS\n"
56508"\n"
56509"#endif\n"
56510"" } ,
56511 { "/builtins/xsaveoptintrin.h" , "/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------===\n"
56512" *\n"
56513" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56514" * of this software and associated documentation files (the \"Software\"), to deal\n"
56515" * in the Software without restriction, including without limitation the rights\n"
56516" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56517" * copies of the Software, and to permit persons to whom the Software is\n"
56518" * furnished to do so, subject to the following conditions:\n"
56519" *\n"
56520" * The above copyright notice and this permission notice shall be included in\n"
56521" * all copies or substantial portions of the Software.\n"
56522" *\n"
56523" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56524" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56525" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56526" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56527" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56528" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56529" * THE SOFTWARE.\n"
56530" *\n"
56531" *===-----------------------------------------------------------------------===\n"
56532" */\n"
56533"\n"
56534"#ifndef __IMMINTRIN_H\n"
56535"#error \"Never use <xsaveoptintrin.h> directly; include <immintrin.h> instead.\"\n"
56536"#endif\n"
56537"\n"
56538"#ifndef __XSAVEOPTINTRIN_H\n"
56539"#define __XSAVEOPTINTRIN_H\n"
56540"\n"
56541"/* Define the default attributes for the functions in this file. */\n"
56542"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaveopt\")))\n"
56543"\n"
56544"static __inline__ void __DEFAULT_FN_ATTRS\n"
56545"_xsaveopt(void *__p, unsigned long long __m) {\n"
56546" __builtin_ia32_xsaveopt(__p, __m);\n"
56547"}\n"
56548"\n"
56549"#ifdef __x86_64__\n"
56550"static __inline__ void __DEFAULT_FN_ATTRS\n"
56551"_xsaveopt64(void *__p, unsigned long long __m) {\n"
56552" __builtin_ia32_xsaveopt64(__p, __m);\n"
56553"}\n"
56554"#endif\n"
56555"\n"
56556"#undef __DEFAULT_FN_ATTRS\n"
56557"\n"
56558"#endif\n"
56559"" } ,
56560 { "/builtins/xsavesintrin.h" , "/*===---- xsavesintrin.h - XSAVES intrinsic --------------------------------===\n"
56561" *\n"
56562" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56563" * of this software and associated documentation files (the \"Software\"), to deal\n"
56564" * in the Software without restriction, including without limitation the rights\n"
56565" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56566" * copies of the Software, and to permit persons to whom the Software is\n"
56567" * furnished to do so, subject to the following conditions:\n"
56568" *\n"
56569" * The above copyright notice and this permission notice shall be included in\n"
56570" * all copies or substantial portions of the Software.\n"
56571" *\n"
56572" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56573" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56574" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56575" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56576" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56577" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56578" * THE SOFTWARE.\n"
56579" *\n"
56580" *===-----------------------------------------------------------------------===\n"
56581" */\n"
56582"\n"
56583"#ifndef __IMMINTRIN_H\n"
56584"#error \"Never use <xsavesintrin.h> directly; include <immintrin.h> instead.\"\n"
56585"#endif\n"
56586"\n"
56587"#ifndef __XSAVESINTRIN_H\n"
56588"#define __XSAVESINTRIN_H\n"
56589"\n"
56590"/* Define the default attributes for the functions in this file. */\n"
56591"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaves\")))\n"
56592"\n"
56593"static __inline__ void __DEFAULT_FN_ATTRS\n"
56594"_xsaves(void *__p, unsigned long long __m) {\n"
56595" __builtin_ia32_xsaves(__p, __m);\n"
56596"}\n"
56597"\n"
56598"static __inline__ void __DEFAULT_FN_ATTRS\n"
56599"_xrstors(void *__p, unsigned long long __m) {\n"
56600" __builtin_ia32_xrstors(__p, __m);\n"
56601"}\n"
56602"\n"
56603"#ifdef __x86_64__\n"
56604"static __inline__ void __DEFAULT_FN_ATTRS\n"
56605"_xrstors64(void *__p, unsigned long long __m) {\n"
56606" __builtin_ia32_xrstors64(__p, __m);\n"
56607"}\n"
56608"\n"
56609"static __inline__ void __DEFAULT_FN_ATTRS\n"
56610"_xsaves64(void *__p, unsigned long long __m) {\n"
56611" __builtin_ia32_xsaves64(__p, __m);\n"
56612"}\n"
56613"#endif\n"
56614"\n"
56615"#undef __DEFAULT_FN_ATTRS\n"
56616"\n"
56617"#endif\n"
56618"" } ,
56619 { "/builtins/xtestintrin.h" , "/*===---- xtestintrin.h - XTEST intrinsic ----------------------------------===\n"
56620" *\n"
56621" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56622" * of this software and associated documentation files (the \"Software\"), to deal\n"
56623" * in the Software without restriction, including without limitation the rights\n"
56624" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56625" * copies of the Software, and to permit persons to whom the Software is\n"
56626" * furnished to do so, subject to the following conditions:\n"
56627" *\n"
56628" * The above copyright notice and this permission notice shall be included in\n"
56629" * all copies or substantial portions of the Software.\n"
56630" *\n"
56631" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56632" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56633" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56634" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56635" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56636" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56637" * THE SOFTWARE.\n"
56638" *\n"
56639" *===-----------------------------------------------------------------------===\n"
56640" */\n"
56641"\n"
56642"#ifndef __IMMINTRIN_H\n"
56643"#error \"Never use <xtestintrin.h> directly; include <immintrin.h> instead.\"\n"
56644"#endif\n"
56645"\n"
56646"#ifndef __XTESTINTRIN_H\n"
56647"#define __XTESTINTRIN_H\n"
56648"\n"
56649"/* xtest returns non-zero if the instruction is executed within an RTM or active\n"
56650" * HLE region. */\n"
56651"/* FIXME: This can be an either or for RTM/HLE. Deal with this when HLE is\n"
56652" * supported. */\n"
56653"static __inline__ int\n"
56654" __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n"
56655" _xtest(void) {\n"
56656" return __builtin_ia32_xtest();\n"
56657"}\n"
56658"\n"
56659"#endif\n"
56660"" } ,
56661
56662 {}
56663};
56664
56665
56666